134 files changed, 89167 insertions, 0 deletions
diff --git a/gfx/wr/webrender/Cargo.toml b/gfx/wr/webrender/Cargo.toml
new file mode 100644
index 0000000000..1895bb0dcc
--- /dev/null
+++ b/gfx/wr/webrender/Cargo.toml
@@ -0,0 +1,62 @@
+[package]
+name = "webrender"
+version = "0.62.0"
+authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
+license = "MPL-2.0"
+repository = "https://github.com/servo/webrender"
+description = "A GPU accelerated 2D renderer for web content"
+build = "build.rs"
+edition = "2018"
+
+[features]
+default = ["static_freetype"]
+profiler = ["tracy-rs/enable_profiler"]
+capture = ["api/serialize", "ron", "serde", "smallvec/serde", "etagere/serialization", "glyph_rasterizer/capture"]
+replay = ["api/deserialize", "ron", "serde", "smallvec/serde", "etagere/serialization", "glyph_rasterizer/replay"]
+display_list_stats = ["api/display_list_stats"]
+serialize_program = ["serde", "webrender_build/serialize_program"]
+dynamic_freetype = ["glyph_rasterizer/dynamic_freetype"]
+static_freetype = ["glyph_rasterizer/static_freetype"]
+leak_checks = []
+gecko = ["fog", "glyph_rasterizer/gecko"]
+sw_compositor = ["swgl"]
+
+[build-dependencies]
+build-parallel = "0.1.2"
+glslopt = "0.1.9"
+webrender_build = { version = "0.0.2", path = "../webrender_build" }
+
+[dependencies]
+bincode = "1.0"
+bitflags = "1.2"
+byteorder = "1.0"
+euclid = { version = "0.22.0", features = ["serde"] }
+fxhash = "0.2.1"
+gleam = "0.13.1"
+lazy_static = "1"
+log = "0.4"
+malloc_size_of_derive = "0.1"
+num-traits = "0.2"
+plane-split = "0.18"
+png = { optional = true, version = "0.16" }
+rayon = "1"
+ron = { optional = true, version = "0.8" }
+serde = { optional = true, version = "1.0", features = ["serde_derive"] }
+smallvec = "1"
+time = "0.1"
+api = { version = "0.62.0", path = "../webrender_api", package = "webrender_api" }
+webrender_build = { version = "0.0.2", path = "../webrender_build" }
+malloc_size_of = { version = "0.0.2", path = "../wr_malloc_size_of", package = "wr_malloc_size_of" }
+glyph_rasterizer = { version = "0.1.0", path = "../wr_glyph_rasterizer", package = "wr_glyph_rasterizer", default-features = false }
+svg_fmt = "0.4"
+tracy-rs = "0.1.2"
+derive_more = { version = "0.99", default-features = false, features = ["add_assign"] }
+etagere = "0.2.6"
+glean = "51.8.2"
+fog = { version = "0.1.0", optional = true }
+swgl = { path = "../swgl", optional = true }
+topological-sort = "0.1"
+
+[dev-dependencies]
+mozangle = "0.3.3"
+rand = "0.4"
diff --git a/gfx/wr/webrender/build.rs b/gfx/wr/webrender/build.rs
new file mode 100644
index 0000000000..60b4a96c23
--- /dev/null
+++ b/gfx/wr/webrender/build.rs
@@ -0,0 +1,333 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+extern crate webrender_build;
+
+use std::borrow::Cow;
+use std::env;
+use std::fs::{canonicalize, read_dir, File};
+use std::io::prelude::*;
+use std::path::{Path, PathBuf};
+use std::collections::hash_map::DefaultHasher;
+use std::hash::Hasher;
+use webrender_build::shader::*;
+use webrender_build::shader_features::{ShaderFeatureFlags, get_shader_features};
+
+// glsopt is known to leak, but we don't particularly care.
+#[no_mangle]
+pub extern "C" fn __lsan_default_options() -> *const u8 {
+    b"detect_leaks=0\0".as_ptr()
+}
+
+/// Compute the shader path for insertion into the include_str!() macro.
+/// This makes for more compact generated code than inserting the literal
+/// shader source into the generated file.
+///
+/// If someone is building on a network share, I'm sorry.
+fn escape_include_path(path: &Path) -> String {
+    let full_path = canonicalize(path).unwrap();
+    let full_name = full_path.as_os_str().to_str().unwrap();
+    let full_name = full_name.replace("\\\\?\\", "");
+    let full_name = full_name.replace("\\", "/");
+
+    full_name
+}
+
+fn write_unoptimized_shaders(mut glsl_files: Vec<PathBuf>, shader_file: &mut File) -> Result<(), std::io::Error> {
+    writeln!(
+        shader_file,
+        "  pub static ref UNOPTIMIZED_SHADERS: HashMap<&'static str, SourceWithDigest> = {{"
+    )?;
+    writeln!(shader_file, "    let mut shaders = HashMap::new();")?;
+
+    // Sort the file list so that the shaders.rs file is filled
+    // deterministically.
+    glsl_files.sort_by(|a, b| a.file_name().cmp(&b.file_name()));
+
+    for glsl in glsl_files {
+        // Compute the shader name.
+        assert!(glsl.is_file());
+        let shader_name = glsl.file_name().unwrap().to_str().unwrap();
+        let shader_name = shader_name.replace(".glsl", "");
+
+        // Compute a digest of the #include-expanded shader source. We store
+        // this as a literal alongside the source string so that we don't need
+        // to hash large strings at runtime.
+        let mut hasher = DefaultHasher::new();
+        let base = glsl.parent().unwrap();
+        assert!(base.is_dir());
+        ShaderSourceParser::new().parse(
+            Cow::Owned(shader_source_from_file(&glsl)),
+            &|f| Cow::Owned(shader_source_from_file(&base.join(&format!("{}.glsl", f)))),
+            &mut |s| hasher.write(s.as_bytes()),
+        );
+        let digest: ProgramSourceDigest = hasher.into();
+
+        writeln!(
+            shader_file,
+            "    shaders.insert(\"{}\", SourceWithDigest {{ source: include_str!(\"{}\"), digest: \"{}\"}});",
+            shader_name,
+            escape_include_path(&glsl),
+            digest,
+        )?;
+    }
+    writeln!(shader_file, "    shaders")?;
+    writeln!(shader_file, "  }};")?;
+
+    Ok(())
+}
+
+#[derive(Clone, Debug)]
+struct ShaderOptimizationInput {
+    shader_name: &'static str,
+    config: String,
+    gl_version: ShaderVersion,
+}
+
+#[derive(Debug)]
+struct ShaderOptimizationOutput {
+    full_shader_name: String,
+    gl_version: ShaderVersion,
+    vert_file_path: PathBuf,
+    frag_file_path: PathBuf,
+    digest: ProgramSourceDigest,
+}
+
+#[derive(Debug)]
+struct ShaderOptimizationError {
+    shader: ShaderOptimizationInput,
+    message: String,
+}
+
+fn print_shader_source(shader_src: &str) {
+    // For some reason the glsl-opt errors are offset by 1 compared
+    // to the provided shader source string.
+    println!("0\t|");
+    for (n, line) in shader_src.split('\n').enumerate() {
+        let line_number = n + 1;
+        println!("{}\t|{}", line_number, line);
+    }
+}
+
+fn write_optimized_shaders(shader_dir: &Path, shader_file: &mut File, out_dir: &str) -> Result<(), std::io::Error> {
+    writeln!(
+        shader_file,
+        "  pub static ref OPTIMIZED_SHADERS: HashMap<(ShaderVersion, &'static str), OptimizedSourceWithDigest> = {{"
+    )?;
+    writeln!(shader_file, "    let mut shaders = HashMap::new();")?;
+
+    // The full set of optimized shaders can be quite large, so only optimize
+    // for the GL version we expect to be used on the target platform. If a different GL
+    // version is used we will simply fall back to the unoptimized shaders.
+    let shader_versions = match env::var("CARGO_CFG_TARGET_OS").as_ref().map(|s| &**s) {
+        Ok("android") | Ok("windows") => [ShaderVersion::Gles],
+        _ => [ShaderVersion::Gl],
+    };
+
+    let mut shaders = Vec::default();
+    for &gl_version in &shader_versions {
+        let mut flags = ShaderFeatureFlags::all();
+        if gl_version != ShaderVersion::Gl {
+            flags.remove(ShaderFeatureFlags::GL);
+        }
+        if gl_version != ShaderVersion::Gles {
+            flags.remove(ShaderFeatureFlags::GLES);
+            flags.remove(ShaderFeatureFlags::TEXTURE_EXTERNAL);
+        }
+        if !matches!(env::var("CARGO_CFG_TARGET_OS").as_ref().map(|s| &**s), Ok("android")) {
+            flags.remove(ShaderFeatureFlags::TEXTURE_EXTERNAL_ESSL1);
+        }
+        flags.remove(ShaderFeatureFlags::DITHERING);
+
+        for (shader_name, configs) in get_shader_features(flags) {
+            for config in configs {
+                shaders.push(ShaderOptimizationInput {
+                    shader_name,
+                    config,
+                    gl_version,
+                });
+            }
+        }
+    }
+
+    let outputs = build_parallel::compile_objects(&|shader: &ShaderOptimizationInput| {
+        println!("Optimizing shader {:?}", shader);
+        let target = match shader.gl_version {
+            ShaderVersion::Gl => glslopt::Target::OpenGl,
+            ShaderVersion::Gles => glslopt::Target::OpenGles30,
+        };
+        let glslopt_ctx = glslopt::Context::new(target);
+
+        let features = shader.config.split(",").filter(|f| !f.is_empty()).collect::<Vec<_>>();
+
+        let (vert_src, frag_src) = build_shader_strings(
+            shader.gl_version,
+            &features,
+            shader.shader_name,
+            &|f| Cow::Owned(shader_source_from_file(&shader_dir.join(&format!("{}.glsl", f)))),
+        );
+
+        let full_shader_name = if shader.config.is_empty() {
+            shader.shader_name.to_string()
+        } else {
+            format!("{}_{}", shader.shader_name, shader.config.replace(",", "_"))
+        };
+
+        let vert = glslopt_ctx.optimize(glslopt::ShaderType::Vertex, vert_src.clone());
+        if !vert.get_status() {
+            print_shader_source(&vert_src);
+            return Err(ShaderOptimizationError {
+                shader: shader.clone(),
+                message: vert.get_log().to_string(),
+            });
+        }
+        let frag = glslopt_ctx.optimize(glslopt::ShaderType::Fragment, frag_src.clone());
+        if !frag.get_status() {
+            print_shader_source(&frag_src);
+            return Err(ShaderOptimizationError {
+                shader: shader.clone(),
+                message: frag.get_log().to_string(),
+            });
+        }
+
+        let vert_source = vert.get_output().unwrap();
+        let frag_source = frag.get_output().unwrap();
+
+        // Compute a digest of the optimized shader sources. We store this
+        // as a literal alongside the source string so that we don't need
+        // to hash large strings at runtime.
+        let mut hasher = DefaultHasher::new();
+
+        let vert_file_path = Path::new(out_dir)
+            .join(format!("{}_{:?}.vert", full_shader_name, shader.gl_version));
+        write_optimized_shader_file(&vert_file_path, vert_source, &shader.shader_name, &features, &mut hasher);
+
+        let frag_file_path = vert_file_path.with_extension("frag");
+        write_optimized_shader_file(&frag_file_path, frag_source, &shader.shader_name, &features, &mut hasher);
+
+        let digest: ProgramSourceDigest = hasher.into();
+
+        println!("Finished optimizing shader {:?}", shader);
+
+        Ok(ShaderOptimizationOutput {
+            full_shader_name,
+            gl_version: shader.gl_version,
+            vert_file_path,
+            frag_file_path,
+            digest,
+        })
+    }, &shaders);
+
+    match outputs {
+        Ok(mut outputs) => {
+            // Sort the shader list so that the shaders.rs file is filled
+            // deterministically.
+            outputs.sort_by(|a, b| {
+                (a.gl_version, a.full_shader_name.clone()).cmp(&(b.gl_version, b.full_shader_name.clone()))
+            });
+
+            for shader in outputs {
+                writeln!(
+                    shader_file,
+                    "    shaders.insert(({}, \"{}\"), OptimizedSourceWithDigest {{",
+                    shader.gl_version.variant_name(),
+                    shader.full_shader_name,
+                )?;
+                writeln!(
+                    shader_file,
+                    "        vert_source: include_str!(\"{}\"),",
+                    escape_include_path(&shader.vert_file_path),
+                )?;
+                writeln!(
+                    shader_file,
+                    "        frag_source: include_str!(\"{}\"),",
+                    escape_include_path(&shader.frag_file_path),
+                )?;
+                writeln!(shader_file, "        digest: \"{}\",", shader.digest)?;
+                writeln!(shader_file, "    }});")?;
+            }
+        }
+        Err(err) => match err {
+            build_parallel::Error::BuildError(err) => {
+                panic!("Error optimizing shader {:?}: {}", err.shader, err.message)
+            }
+            _ => panic!("Error optimizing shaders."),
+        }
+    }
+
+    writeln!(shader_file, "    shaders")?;
+    writeln!(shader_file, "  }};")?;
+
+    Ok(())
+}
+
+fn write_optimized_shader_file(
+    path: &Path,
+    source: &str,
+    shader_name: &str,
+    features: &[&str],
+    hasher: &mut DefaultHasher,
+) {
+    let mut file = File::create(&path).unwrap();
+    for (line_number, line) in source.lines().enumerate() {
+        // We embed the shader name and features as a comment in the
+        // source to make debugging easier.
+        // The #version directive must be on the first line so we insert
+        // the extra information on the next line.
+        if line_number == 1 {
+            let prelude = format!(
+                "// {}\n// features: {:?}\n\n",
+                shader_name, features
+            );
+            file.write_all(prelude.as_bytes()).unwrap();
+            hasher.write(prelude.as_bytes());
+        }
+        file.write_all(line.as_bytes()).unwrap();
+        file.write_all("\n".as_bytes()).unwrap();
+        hasher.write(line.as_bytes());
+        hasher.write("\n".as_bytes());
+    }
+}
+
+fn main() -> Result<(), std::io::Error> {
+    let out_dir = env::var("OUT_DIR").unwrap_or("out".to_owned());
+
+    let shaders_file_path = Path::new(&out_dir).join("shaders.rs");
+    let mut glsl_files = vec![];
+
+    println!("cargo:rerun-if-changed=res");
+    let res_dir = Path::new("res");
+    for entry in read_dir(res_dir)? {
+        let entry = entry?;
+        let path = entry.path();
+
+        if entry.file_name().to_str().unwrap().ends_with(".glsl") {
+            println!("cargo:rerun-if-changed={}", path.display());
+            glsl_files.push(path.to_owned());
+        }
+    }
+
+    let mut shader_file = File::create(shaders_file_path)?;
+
+    writeln!(shader_file, "/// AUTO GENERATED BY build.rs\n")?;
+    writeln!(shader_file, "use std::collections::HashMap;\n")?;
+    writeln!(shader_file, "use webrender_build::shader::ShaderVersion;\n")?;
+    writeln!(shader_file, "pub struct SourceWithDigest {{")?;
+    writeln!(shader_file, "    pub source: &'static str,")?;
+    writeln!(shader_file, "    pub digest: &'static str,")?;
+    writeln!(shader_file, "}}\n")?;
+    writeln!(shader_file, "pub struct OptimizedSourceWithDigest {{")?;
+    writeln!(shader_file, "    pub vert_source: &'static str,")?;
+    writeln!(shader_file, "    pub frag_source: &'static str,")?;
+    writeln!(shader_file, "    pub digest: &'static str,")?;
+    writeln!(shader_file, "}}\n")?;
+    writeln!(shader_file, "lazy_static! {{")?;
+
+    write_unoptimized_shaders(glsl_files, &mut shader_file)?;
+    writeln!(shader_file, "")?;
+    write_optimized_shaders(&res_dir, &mut shader_file, &out_dir)?;
+    writeln!(shader_file, "}}")?;
+
+    Ok(())
+}
diff --git a/gfx/wr/webrender/doc/CLIPPING_AND_POSITIONING.md b/gfx/wr/webrender/doc/CLIPPING_AND_POSITIONING.md
new file mode 100644
index 0000000000..4aa8d0c684
--- /dev/null
+++ b/gfx/wr/webrender/doc/CLIPPING_AND_POSITIONING.md
@@ -0,0 +1,150 @@
+# Original Design
+
+To understand the current design for clipping and positioning (transformations
+and scrolling) in WebRender it can be useful to have a little background about
+the original design for these features. The most important thing to remember is
+that originally clipping, scrolling regions, and transformations were
+properties of stacking contexts and they were completely _hierarchical_. This
+goes a long way toward representing the majority of CSS content on the web, but
+fails when dealing with important edges cases and features including:
+ 1. Support for sticky positioned content
+ 2. Scrolling areas that include content that is ordered both above and below
+    intersecting content from outside the scroll area.
+ 3. Items in the same scrolling root, clipped by different clips one or more of
+    which are defined outside the scrolling root itself.
+ 4. Completely non-hierarchical clipping situations, such as when items are
+    clipped by some clips in the hierarchy, but not others.
+
+Design changes have been a step by step path from the original design to one
+that can handle all CSS content.
+
+# Current Design
+
+All positioning and clipping is handled by the `SpatialTree`. The name is a
+holdover from when this tree was a tree of `Layers` which handled both
+positioning and clipping. Currently the `SpatialTree` holds:
+ 1. A hierarchical collection of `SpatialNodes`, with the final screen
+    transformation of each node depending on the relative transformation of the
+    node combined with the transformations of all of its ancestors. These nodes
+    are responsible for positioning display list items and clips.
+ 2. A collection of `ClipNodes` which specify a rectangular clip and, optionally,
+    a set of rounded rectangle clips and a masking image.
+ 3. A collection of `ClipChains`. Each `ClipChain` is a list of `ClipNode`
+    elements. Every display list item has an assigned `ClipChain` which
+    specifies what `ClipNodes` are applied to that item.
+
+The `SpatialNode` of each clip applied to an item is completely independent of
+the `SpatialNode` applied to the item itself.
+
+One holdover from the previous design is that both `ClipNode` and `SpatialNodes`
+have a parent node, which is either a `SpatialNode` or a `ClipNode`.  From this
+node WebRender can determine both a parent `ClipNode` and a parent `SpatialNode`
+by finding the first ancestor of that type. This is handled by the
+`DisplayListFlattener`.
+
+## `SpatialNode`
+There are three types of `SpatialNodes`:
+  1. Reference frames which are created when content needs to apply
+     transformation or perspective properties to display list items. Reference
+     frames establish a new coordinate system, so internally all coordinates on
+     display list items are relative to the reference frame origin. Later
+     any non-reference frame positioning nodes that display list items belong
+     to can adjust this position relative to the reference frame origin.
+  2. Scrolling nodes are used to define scrolling areas. These nodes have scroll
+     offsets which are a 2D translation relative to ancestor nodes and, ultimately,
+     the reference frame origin.
+  3. Sticky frames are responsible for implementing position:sticky behavior.
+     This is also an 2D translation.
+
+`SpatialNodes` are defined as items in the display list. After scene building
+each node is traversed hierarchically during the `SpatialTree::update()` step.
+Once reference frame transforms and relative offsets are calculated, a to screen
+space transformation can be calculated for each `SpatialNode`. This transformation
+is added the `TransformPalette` and becomes directly available to WebRender shaders.
+
+In addition to screen space transformation calculation, the `SpatialNode` tree
+is divided up into _compatible coordinate systems_. These are coordinate systems
+which differ only by 2D translations from their parent system. These compatible
+coordinate systems may even cross reference frame boundaries. The goal here is
+to allow the application clipping rectangles from different compatible
+coordinate systems without generating mask images.
+
+## `ClipNode`
+
+Each clip node holds a clip rectangle along with an optional collection of
+rounded clip rectangles and a mask image. The fact that `ClipNodes` all have a
+clip rectangle is important because it means that all content clipped by a
+clip node has a bounding rectangle, which can be converted into a bounding
+screen space rectangle.  This rectangle is called the _outer rectangle_ of the
+clip. `ClipNodes` may also have an _inner rectangle_, which is an area within
+the boundaries of the _outer rectangle_ that is completely unclipped.
+
+These rectangles are calculated during the `SpatialTree::update()` phase. In
+addition, each `ClipNode` produces a template `ClipChainNode` used to build
+the `ClipChains` which use that node.
+
+## `ClipChains`
+
+There are two ways that `ClipChains` are defined in WebRender. The first is
+through using the API for manually specifying `ClipChains` via a parent
+`ClipChain` and a list of `ClipNodes`. The second is through the hierarchy of a
+`ClipNode` established by its parent node. Every `ClipNode` has a chain of
+ancestor `SpatialNodes` and `ClipNodes`. The creation of a `ClipNode`
+automatically defines a `ClipChain` for this hierarchy. This behavior is a
+compatibility feature with the old completely hierarchical clipping architecture
+and is still how Gecko and Servo create most of their `ClipChains`. These
+hierarchical `ClipChains` are constructed during the `ClipNode::update()` step.
+
+During `ClipChain` construction, WebRender tries to eliminate clips that will
+not affect rendering, by looking at the combined _outer rectangle_ and _inner
+rectangle_ of a `ClipChain` and the _outer rectangle_ and _inner rectangle_ of
+any `ClipNode` appended to the chain. An example of the goal of this process is
+to avoid having to render a mask for a large rounded rectangle when the rest of
+the clip chain constrains the content to an area completely inside that
+rectangle. Avoiding mask rasterization in this case and others has large
+performance impacts on WebRender.
+
+# Clipping and Positioning in the Display List
+
+Each non-structural WebRender display list item has
+ * A `SpatialId` of a `SpatialNode` for positioning
+ * A `ClipId` of a `ClipNode` or a `ClipChain` for clipping
+ * An item-specific rectangular clip rectangle
+
+The positioning node determines how that item is positioned. It's assumed that
+the positioning node and the item are children of the same reference frame. The
+clipping node determines how that item is clipped. This should be fully
+independent of how the node is positioned and items can be clipped by any
+`ClipChain` regardless of the reference frame of their member clips. Finally,
+the item-specific clipping rectangle is applied directly to the item and should
+never result in the creation of a clip mask itself.
+
+## Converting user-exposed `ClipId`/`SpatialId` to internal indices
+
+WebRender must access `ClipNodes` and `SpatialNodes` quite a bit when building
+scenes and frames, so it tries to convert `ClipId`/`SpatialId`, which are already
+per-pipeline indices, to global scene-wide indices.  Internally this is a
+conversion from `ClipId` into `ClipNodeIndex` or `ClipChainIndex`, and from
+`SpatialId` into `SpatialNodeIndex`. In order to make this conversion cheaper, the
+`DisplayListFlattner` assigns offsets for each pipeline and node type in the
+scene-wide `SpatialTree`.
+
+Nodes are added to their respective arrays sequentially as the display list is
+processed during scene building. When encountering an iframe, the
+`DisplayListFlattener` must start processing the nodes for that iframe's
+pipeline, meaning that nodes are now being added out of order to the node arrays
+of the `SpatialTree`. In this case, the `SpatialTree` fills in the gaps in
+the node arrays with placeholder nodes.
+
+# Hit Testing
+
+Hit testing is the responsibility of the `HitTester` data structure. This
+structure copies information necessary for hit testing from the
+`SpatialTree`. This is done so that hit testing can still take place while a
+new `SpatialTree` is under construction.
+
+# Ideas for the Future
+1. Expose the difference between `ClipId` and `ClipChainId` in the API.
+2. Prevent having to duplicate the `SpatialTree` for hit testing.
+3. Avoid having to create placeholder nodes in the `SpatialTree` while
+   processing iframes.
diff --git a/gfx/wr/webrender/doc/blob.md b/gfx/wr/webrender/doc/blob.md
new file mode 100644
index 0000000000..b910f6f76a
--- /dev/null
+++ b/gfx/wr/webrender/doc/blob.md
@@ -0,0 +1,43 @@
+# Blob images
+
+Blob image is fallback mechanism for webrender that Gecko uses to render primitives that aren't currently supported by webrender. The main idea is to provide webrender with a custom handler that can take arbitray drawing commands serialized as buffers of bytes (the blobs) and turn them into images that webrender internally will treat as regular images.
+
+At the API level, blob images are treated as other images. They are resources created and associated with image keys, and are used in the display list with regular image display items. 
+
+
+## Active area
+
+In order to support scrolling very large content, blob images don't necessarily have a finite size. They can grow in any direction. At any time they do have an "active area", also called "visible area" which defines the portion that has to be rasterized. Typically this active area moves along large blob images depending on the scroll position.
+The coordinate system of active area the *should* be the one of the blob's drawing commands (this is really up to the blob handler implementation to enforce that, Gecko does), and its scale should correspond to device pixels. The active area's coordinates can be negative.
+
+As far as positioning goes, the active area maps to the image display item's bounds. In other words the content at the top-left corner of the active area will be rendered on screen at the position of the top-left corner of the display item's local rect.
+
+In Gecko, the active area corresponds to the intersection of the fallback content's rect and the displayport.
+
+The terms "visible area" and "visible rect" are used a lot in the blobs code, unfortunately they collide with frame building's visibility/culling terminology. They don't correspond to what is visible to the user, but rather what is in the displayport.
+
+
+## Tiling
+
+Blob images can be either tiled or non-tiled. Non-tiled blob images support invalid rects while tiled blob images track only validty at the tile level. In gecko all blobs are tiled with a tile size of 256x256.
+
+Just like regular tiled images, blob image tiles along the border of the image are shrinked to fit the remaining size. The only difference is that the tiling pattern always starts at the top-left corner for regular images (smaller boundary tiles only along the right and bottom edges), while it can be aribtrarily positioned for blob images (smaller boundary tiles potentially on all sides).
+
+The tiling logic is in webrender/src/image.rs.
+
+
+## Async rasterization
+
+Blobs are typically too slow to rasterize on the critical path. We try to avoid blocking frame building on blob image rasterization. In order to do that we rasterize blobs as part of scene building. Rather than rasterize tiles on demand from visibility informating, we rasterize the entire active area during scene building. This means we potentially process a lot more content than will be displayed if the user doesn't scroll through all of the visible area.
+
+When the render backend receives a transaction, it looks for all new and update blob images, and generate blob rasterization requests for all tiles of the blob images that overlap their active area. The requests are bundled with an `AsyncBlobImageRasterizer` object in the transaction that is sent to the scene builder thread. The async rasterizer is created by the `BlobImageHandler` at each transaction. It is a snapshot of the state of the blobs as well as external information such as fonts, and does the actual rasterization.
+
+While tiles are rasterized eagerly during scene building, their content is uploaded lazily to the texture cache depending on the result of the visibility pass during frame building.
+
+
+## Late rasterization
+
+In some case we run into a missing blob image during frame building and have to rasterize it synchronously. This happens when a rasterized tile is uploaded to the texture cache (at which point the CPU side is discarded), the texture cache entry expires and after scrolling back into view the tile is needed again.
+We should really keep the rasterized blobs around just like we keep regular images in the cache. Hopefully this section will become obsolete eventually and we'll be able to remove late blob rasterization.
+
+The information needed for async rasterization corresponds to the state of blobs before scene building while late rasterization needs the state of blobs after the last complete scene build. This means we have to be careful about which version we manipulate in the resource cache.
diff --git a/gfx/wr/webrender/doc/swizzling.md b/gfx/wr/webrender/doc/swizzling.md
new file mode 100644
index 0000000000..4b38791940
--- /dev/null
+++ b/gfx/wr/webrender/doc/swizzling.md
@@ -0,0 +1,31 @@
+> It'd be great to have some (in-tree) docs describing the process you've worked through here, the overall motivation, how it works on different GPUs / platforms etc. Perhaps as a follow up?
+
+# Swizzling in WR
+
+## Problem statement
+
+The goal is to avoid the CPU conversion of data done by the driver on texture data uploads. It's slow and always done synchronously, hurting our "Renderer" thread CPU utilization.
+
+Gecko produces all of the image data in BGRA. Switching "imagelib" to RGBA is possible, but modifying Skia to follow is less trivial.
+OpenGL support for BGRA8 as an internal texture format is a complex story: it's officially supported in Angle and a fair share of Android devices, but it's not available on the desktop GL (and until recently wasn't available in the Android emulator). Unofficially, when textures are initialized with `glTexImage` (as opposed to `glTexStorage`) with RGBA internal format, the desktop GL drivers often prefer to store the data in BGRA8 format, actually.
+
+The only way to avoid the CPU conversion is to provide the data in exactly the same format that the driver is using internally for a texture. In this case, the driver does a straght `memcpy` into its CPU-visible memory, which is the best we can hope for with OpenGL API.
+
+## Solution: swizzling
+
+https://phabricator.services.mozilla.com/D21965 is providing the solution to this problem. The main principles are:
+
+  1. Use `glTexStorage` whenever it's available. Doing so gives us full control of the internal format, also allows to avoid allocating memory for mipmaps that we don't use.
+  2. Make the shared texture cache format to be determined at the init time, based on the GL device capabilities. For Angle and OpenGL ES this is BGRA8, for desktop this is RGBA8 (since desktop GL doesn't support BGRA internal formats). WebRender is now able to tell Gecko, which color format it prefers the texture data to use.
+  3. If the data comes in a format that is different from our best case, we pretend that the data is actually in our best case format, and associate the allocated cache entry with the `Swizzle`. That swizzle configuration changes the way shaders sample from a texture, adjusting for the fact the data was provided in a different format.
+  4. The lifetime of a "swizzled" texture cache data is starting at the point the data is uploaded and ending at a point where any shader samples from this data. Any other operation on that data (copying or blitting) is not configurable by `Swizzle` and thus would produce incorrect results. To address this, the change enhances `cs_copy` shader to be used in place of blitting from the texture cache, where needed.
+  5. Swizzling becomes a part of the batch key per texture. Mixing up draw calls with texture data that is differently swizzled then introduces the batch breaks. This is a downside for the swizzling approach in general, but it's not clear to what extent this would affect Gecko.
+
+## Code paths
+
+Windows/Angle and Android:
+  - we use `glTexStorage` with BGRA8 internal format, no swizzling is needed in general case.
+
+Windows (non-Angle), Mac, Linux:
+  - if `glTexStorage` is available, we use it with RGBA8 internal format, swizzling everything on texture sampling.
+  - otherwise, we use RGBA unsized format with `gTexImage` and expect the data to come in BGRA format, no swizzling is involved.
diff --git a/gfx/wr/webrender/doc/text-rendering.md b/gfx/wr/webrender/doc/text-rendering.md
new file mode 100644
index 0000000000..b965562b99
--- /dev/null
+++ b/gfx/wr/webrender/doc/text-rendering.md
@@ -0,0 +1,720 @@
+# Text Rendering
+
+This document describes the details of how WebRender renders text, particularly the blending stage of text rendering.
+We will go into grayscale text blending, subpixel text blending, and "subpixel text with background color" blending.
+
+### Prerequisites
+
+The description below assumes you're familiar with regular rgba compositing, operator over,
+and the concept of premultiplied alpha.
+
+### Not covered in this document
+
+We are going to treat the origin of the text mask as a black box.
+We're also going to assume we can blend text in the device color space and will not go into the gamma correction and linear pre-blending that happens in some of the backends that produce the text masks.
+
+## Grayscale Text Blending
+
+Grayscale text blending is the simplest form of text blending. Our blending function has three inputs:
+
+ - The text color, as a premultiplied rgba color.
+ - The text mask, as a single-channel alpha texture.
+ - The existing contents of the framebuffer that we're rendering to, the "destination". This is also a premultiplied rgba buffer.
+
+Note: The word "grayscale" here does *not* mean that we can only draw gray text.
+It means that the mask only has a single alpha value per pixel, so we can visualize
+the mask in our minds as a grayscale image.
+
+### Deriving the math
+
+We want to mask our text color using the single-channel mask, and composite that to the destination.
+This compositing step uses operator "over", just like regular compositing of rgba images.
+
+I'll be using GLSL syntax to describe the blend equations, but please consider most of the code below pseudocode.
+
+We can express the blending described above as the following blend equation:
+
+```glsl
+vec4 textblend(vec4 text_color, vec4 mask, vec4 dest) {
+  return over(in(text_color, mask), dest);
+}
+```
+
+with `over` being the blend function for (premultiplied) operator "over":
+
+```glsl
+vec4 over(vec4 src, vec4 dest) {
+  return src + (1.0 - src.a) * dest;
+}
+```
+
+and `in` being the blend function for (premultiplied) operator "in", i.e. the masking operator:
+
+```glsl
+vec4 in(vec4 src, vec4 mask) {
+  return src * mask.a;
+}
+```
+
+So the complete blending function is:
+
+```glsl
+result.r = text_color.r * mask.a + (1.0 - text_color.a * mask.a) * dest.r;
+result.g = text_color.g * mask.a + (1.0 - text_color.a * mask.a) * dest.g;
+result.b = text_color.b * mask.a + (1.0 - text_color.a * mask.a) * dest.b;
+result.a = text_color.a * mask.a + (1.0 - text_color.a * mask.a) * dest.a;
+```
+
+### Rendering this with OpenGL
+
+In general, a fragment shader does not have access to the destination.
+So the full blend equation needs to be expressed in a way that the shader only computes values that are independent of the destination,
+and the parts of the equation that use the destination values need to be applied by the OpenGL blend pipeline itself.
+The OpenGL blend pipeline can be tweaked using the functions `glBlendEquation` and `glBlendFunc`.
+
+In our example, the fragment shader can output just `text_color * mask.a`:
+
+```glsl
+  oFragColor = text_color * mask.a;
+```
+
+and the OpenGL blend pipeline can be configured like so:
+
+```rust
+    pub fn set_blend_mode_premultiplied_alpha(&self) {
+        self.gl.blend_func(gl::ONE, gl::ONE_MINUS_SRC_ALPHA);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+```
+
+This results in an overall blend equation of
+
+```
+result.r = 1 * oFragColor.r + (1 - oFragColor.a) * dest.r;
+           ^                ^  ^^^^^^^^^^^^^^^^^
+           |                |         |
+           +--gl::ONE       |         +-- gl::ONE_MINUS_SRC_ALPHA
+                            |
+                            +-- gl::FUNC_ADD
+
+         = 1 * (text_color.r * mask.a) + (1 - (text_color.a * mask.a)) * dest.r
+         = text_color.r * mask.a + (1 - text_color.a * mask.a) * dest.r
+```
+
+which is exactly what we wanted.
+
+### Differences to the actual WebRender code
+
+There are two minor differences between the shader code above and the actual code in the text run shader in WebRender:
+
+```glsl
+oFragColor = text_color * mask.a;    // (shown above)
+// vs.
+oFragColor = vColor * mask * alpha;  // (actual webrender code)
+```
+
+`vColor` is set to the text color. The differences are:
+
+ - WebRender multiplies with all components of `mask` instead of just with `mask.a`.
+   However, our font rasterization code fills the rgb values of `mask` with the value of `mask.a`,
+   so this is completely equivalent.
+ - WebRender applies another alpha to the text. This is coming from the clip.
+   You can think of this alpha to be a pre-adjustment of the text color for that pixel, or as an
+   additional mask that gets applied to the mask.
+
+## Subpixel Text Blending
+
+Now that we have the blend equation for single-channel text blending, we can look at subpixel text blending.
+
+The main difference between subpixel text blending and grayscale text blending is the fact that,
+for subpixel text, the text mask contains a separate alpha value for each color component.
+
+### Component alpha
+
+Regular painting uses four values per pixel: three color values, and one alpha value. The alpha value applies to all components of the pixel equally.
+
+Imagine for a second a world in which you have *three alpha values per pixel*, one for each color component.
+
+ - Old world: Each pixel has four values: `color.r`, `color.g`, `color.b`, and `color.a`.
+ - New world: Each pixel has *six* values: `color.r`, `color.a_r`, `color.g`, `color.a_g`, `color.b`, and `color.a_b`.
+
+In such a world we can define a component-alpha-aware operator "over":
+
+```glsl
+vec6 over_comp(vec6 src, vec6 dest) {
+  vec6 result;
+  result.r = src.r + (1.0 - src.a_r) * dest.r;
+  result.g = src.g + (1.0 - src.a_g) * dest.g;
+  result.b = src.b + (1.0 - src.a_b) * dest.b;
+  result.a_r = src.a_r + (1.0 - src.a_r) * dest.a_r;
+  result.a_g = src.a_g + (1.0 - src.a_g) * dest.a_g;
+  result.a_b = src.a_b + (1.0 - src.a_b) * dest.a_b;
+  return result;
+}
+```
+
+and a component-alpha-aware operator "in":
+
+```glsl
+vec6 in_comp(vec6 src, vec6 mask) {
+  vec6 result;
+  result.r = src.r * mask.a_r;
+  result.g = src.g * mask.a_g;
+  result.b = src.b * mask.a_b;
+  result.a_r = src.a_r * mask.a_r;
+  result.a_g = src.a_g * mask.a_g;
+  result.a_b = src.a_b * mask.a_b;
+  return result;
+}
+```
+
+and even a component-alpha-aware version of `textblend`:
+
+```glsl
+vec6 textblend_comp(vec6 text_color, vec6 mask, vec6 dest) {
+  return over_comp(in_comp(text_color, mask), dest);
+}
+```
+
+This results in the following set of equations:
+
+```glsl
+result.r = text_color.r * mask.a_r + (1.0 - text_color.a_r * mask.a_r) * dest.r;
+result.g = text_color.g * mask.a_g + (1.0 - text_color.a_g * mask.a_g) * dest.g;
+result.b = text_color.b * mask.a_b + (1.0 - text_color.a_b * mask.a_b) * dest.b;
+result.a_r = text_color.a_r * mask.a_r + (1.0 - text_color.a_r * mask.a_r) * dest.a_r;
+result.a_g = text_color.a_g * mask.a_g + (1.0 - text_color.a_g * mask.a_g) * dest.a_g;
+result.a_b = text_color.a_b * mask.a_b + (1.0 - text_color.a_b * mask.a_b) * dest.a_b;
+```
+
+### Back to the real world
+
+If we want to transfer the component alpha blend equation into the real world, we need to make a few small changes:
+
+ - Our text color only needs one alpha value.
+   So we'll replace all instances of `text_color.a_r/g/b` with `text_color.a`.
+ - We're currently not making use of the mask's `r`, `g` and `b` values, only of the `a_r`, `a_g` and `a_b` values.
+   So in the real world, we can use the rgb channels of `mask` to store those component alphas and
+   replace `mask.a_r/g/b` with `mask.r/g/b`.
+
+These two changes give us:
+
+```glsl
+result.r = text_color.r * mask.r + (1.0 - text_color.a * mask.r) * dest.r;
+result.g = text_color.g * mask.g + (1.0 - text_color.a * mask.g) * dest.g;
+result.b = text_color.b * mask.b + (1.0 - text_color.a * mask.b) * dest.b;
+result.a_r = text_color.a * mask.r + (1.0 - text_color.a * mask.r) * dest.a_r;
+result.a_g = text_color.a * mask.g + (1.0 - text_color.a * mask.g) * dest.a_g;
+result.a_b = text_color.a * mask.b + (1.0 - text_color.a * mask.b) * dest.a_b;
+```
+
+There's a third change we need to make:
+
+ - We're rendering to a destination surface that only has one alpha channel instead of three.
+   So `dest.a_r/g/b` and `result.a_r/g/b` will need to become `dest.a` and `result.a`.
+
+This creates a problem: We're currently assigning different values to `result.a_r`, `result.a_g` and `result.a_b`.
+Which of them should we use to compute `result.a`?
+
+This question does not have an answer. One alpha value per pixel is simply not sufficient
+to express the same information as three alpha values.
+
+However, see what happens if the destination is already opaque:
+
+We have `dest.a_r == 1`, `dest.a_g == 1`, and `dest.a_b == 1`.
+
+```
+result.a_r = text_color.a * mask.r + (1 - text_color.a * mask.r) * dest.a_r
+           = text_color.a * mask.r + (1 - text_color.a * mask.r) * 1
+           = text_color.a * mask.r + 1 - text_color.a * mask.r
+           = 1
+same for result.a_g and result.a_b
+```
+
+In other words, for opaque destinations, it doesn't matter what which channel of the mask we use when computing `result.a`, the result will always be completely opaque anyways. In WebRender we just pick `mask.g` (or rather,
+have font rasterization set `mask.a` to the value of `mask.g`) because it's as good as any.
+
+The takeaway here is: **Subpixel text blending is only supported for opaque destinations.** Attempting to render subpixel
+text into partially transparent destinations will result in bad alpha values. Or rather, it will result in alpha values which
+are not anticipated by the r, g, and b values in the same pixel, so that subsequent blend operations, which will mix r and a values
+from the same pixel, will produce incorrect colors.
+
+Here's the final subpixel blend function:
+
+```glsl
+vec4 subpixeltextblend(vec4 text_color, vec4 mask, vec4 dest) {
+  vec4 result;
+  result.r = text_color.r * mask.r + (1.0 - text_color.a * mask.r) * dest.r;
+  result.g = text_color.g * mask.g + (1.0 - text_color.a * mask.g) * dest.g;
+  result.b = text_color.b * mask.b + (1.0 - text_color.a * mask.b) * dest.b;
+  result.a = text_color.a * mask.a + (1.0 - text_color.a * mask.a) * dest.a;
+  return result;
+}
+```
+
+or for short:
+
+```glsl
+vec4 subpixeltextblend(vec4 text_color, vec4 mask, vec4 dest) {
+  return text_color * mask + (1.0 - text_color.a * mask) * dest;
+}
+```
+
+To recap, here's what we gained and lost by making the transition from the full-component-alpha world to the
+regular rgba world: All colors and textures now only need four values to be represented, we still use a
+component alpha mask, and the results are equivalent to the full-component-alpha result assuming that the
+destination is opaque. We lost the ability to draw to partially transparent destinations.
+
+### Making this work in OpenGL
+
+We have the complete subpixel blend function.
+Now we need to cut it into pieces and mix it with the OpenGL blend pipeline in such a way that
+the fragment shader does not need to know about the destination.
+
+Compare the equation for the red channel and the alpha channel between the two ways of text blending:
+
+```
+  single-channel alpha:
+    result.r = text_color.r * mask.a + (1.0 - text_color.a * mask.a) * dest.r
+    result.a = text_color.a * mask.a + (1.0 - text_color.a * mask.a) * dest.r
+
+  component alpha:
+    result.r = text_color.r * mask.r + (1.0 - text_color.a * mask.r) * dest.r
+    result.a = text_color.a * mask.a + (1.0 - text_color.a * mask.a) * dest.r
+```
+
+Notably, in the single-channel alpha case, all three destination color channels are multiplied with the same thing:
+`(1.0 - text_color.a * mask.a)`. This factor also happens to be "one minus `oFragColor.a`".
+So we were able to take advantage of OpenGL's `ONE_MINUS_SRC_ALPHA` blend func.
+
+In the component alpha case, we're not so lucky: Each destination color channel
+is multiplied with a different factor. We can use `ONE_MINUS_SRC_COLOR` instead,
+and output `text_color.a * mask` from our fragment shader.
+But then there's still the problem that the first summand of the computation for `result.r` uses
+`text_color.r * mask.r` and the second summand uses `text_color.a * mask.r`.
+
+There are multiple ways to deal with this. They are:
+
+ 1. Making use of `glBlendColor` and the `GL_CONSTANT_COLOR` blend func.
+ 2. Using a two-pass method.
+ 3. Using "dual source blending".
+
+Let's look at them in order.
+
+#### 1. Subpixel text blending in OpenGL using `glBlendColor`
+
+In this approach we return `text_color.a * mask` from the shader.
+Then we set the blend color to `text_color / text_color.a` and use `GL_CONSTANT_COLOR` as the source blendfunc.
+This results in the following blend equation:
+
+```
+result.r = (text_color.r / text_color.a) * oFragColor.r + (1 - oFragColor.r) * dest.r;
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^                ^  ^^^^^^^^^^^^^^^^^
+                         |                              |      |
+                         +--gl::CONSTANT_COLOR          |      +-- gl::ONE_MINUS_SRC_COLOR
+                                                        |
+                                                        +-- gl::FUNC_ADD
+
+         = (text_color.r / text_color.a) * (text_color.a * mask.r) + (1 - (text_color.a * mask.r)) * dest.r
+         = text_color.r * mask.r + (1 - text_color.a * mask.r) * dest.r
+```
+
+At the very beginning of this document, we defined `text_color` as the *premultiplied* text color.
+So instead of actually doing the calculation `text_color.r / text_color.a` when specifying the blend color,
+we really just want to use the *unpremultiplied* text color in that place.
+That's usually the representation we start with anyway.
+
+#### 2. Two-pass subpixel blending in OpenGL
+
+The `glBlendColor` method has the disadvantage that the text color is part of the OpenGL state.
+So if we want to draw text with different colors, we have two use separate batches / draw calls
+to draw the differently-colored parts of text.
+
+Alternatively, we can use a two-pass method which avoids the need to use the `GL_CONSTANT_COLOR` blend func:
+
+ - The first pass outputs `text_color.a * mask` from the fragment shader and
+   uses `gl::ZERO, gl::ONE_MINUS_SRC_COLOR` as the glBlendFuncs. This achieves:
+
+```
+oFragColor = text_color.a * mask;
+
+result_after_pass0.r = 0 * oFragColor.r + (1 - oFragColor.r) * dest.r
+                     = (1 - text_color.a * mask.r) * dest.r
+
+result_after_pass0.g = 0 * oFragColor.g + (1 - oFragColor.g) * dest.r
+                     = (1 - text_color.a * mask.r) * dest.r
+
+...
+```
+
+ - The second pass outputs `text_color * mask` from the fragment shader and uses
+   `gl::ONE, gl::ONE` as the glBlendFuncs. This results in the correct overall blend equation.
+
+```
+oFragColor = text_color * mask;
+
+result_after_pass1.r
+ = 1 * oFragColor.r + 1 * result_after_pass0.r
+ = text_color.r * mask.r + result_after_pass0.r
+ = text_color.r * mask.r + (1 - text_color.a * mask.r) * dest.r
+```
+
+#### 3. Dual source subpixel blending in OpenGL
+
+The third approach is similar to the second approach, but makes use of the [`ARB_blend_func_extended`](https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_blend_func_extended.txt) extension
+in order to fold the two passes into one:
+Instead of outputting the two different colors in two separate passes, we output them from the same pass,
+as two separate fragment shader outputs.
+Those outputs can then be treated as two different sources in the blend equation.
+
+## Subpixel Text Rendering to Transparent Destinations with a Background Color Hint
+
+### Motivation
+
+As we've seen in the previous section, subpixel text drawing has the limitation that it only works on opaque destinations.
+
+In other words, if you use the `subpixeltextblend` function to draw something to a transparent surface,
+and then composite that surface onto on opaque background,
+the result will generally be different from drawing the text directly onto the opaque background.
+
+Let's express that inequality in code.
+
+```
+ - vec4 text_color
+ - vec4 mask
+ - vec4 transparency = vec4(0.0, 0.0, 0.0, 0.0)
+ - vec4 background with background.a == 1.0
+
+over(subpixeltextblend(text_color, mask, transparency), background).rgb
+ is, in general, not equal to
+subpixeltextblend(text_color, mask, background).rgb
+```
+
+However, one interesting observation is that if the background is black, the two *are* equal:
+
+```
+vec4 black = vec4(0.0, 0.0, 0.0, 1.0);
+
+over(subpixeltextblend(text_color, mask, transparency), black).r
+ = subpixeltextblend(text_color, mask, transparency).r +
+     (1 - subpixeltextblend(text_color, mask, transparency).a) * black.r
+ = subpixeltextblend(text_color, mask, transparency).r +
+     (1 - subpixeltextblend(text_color, mask, transparency).a) * 0
+ = subpixeltextblend(text_color, mask, transparency).r
+ = text_color.r * mask.r + (1 - text_color.a * mask.r) * transparency.r
+ = text_color.r * mask.r + (1 - text_color.a * mask.r) * 0
+ = text_color.r * mask.r + (1 - text_color.a * mask.r) * black.r
+ = subpixeltextblend(text_color, mask, black).r
+```
+
+So it works out for black backgrounds. The further your *actual* background color gets away from black,
+the more incorrect your result will be.
+
+If it works for black, is there a way to make it work for other colors?
+This is the motivating question for this third way of text blending:
+
+We want to be able to specify an *estimated background color*, and have a blending function
+`vec4 subpixeltextblend_withbgcolor(vec4 text_color, vec4 mask, vec4 bg_color, vec4 dest)`,
+in such a way that the error we get by using an intermediate surface is somehow in relation
+to the error we made when estimating the background color. In particular, if we estimated
+the background color perfectly, we want the intermediate surface to go unnoticed.
+
+Expressed as code:
+
+```
+over(subpixeltextblend_withbgcolor(text_color, mask, bg_color, transparency), bg_color)
+ should always be equal to
+subpixeltextblend(text_color, mask, bg_color)
+```
+
+This is one of three constraints we'd like `subpixeltextblend_withbgcolor` to satisfy.
+
+The next constraint is the following: If `dest` is already opaque, `subpixeltextblend_withbgcolor`
+should have the same results as `subpixeltextblend`, and the background color hint should be ignored.
+
+```
+ If dest.a == 1.0,
+subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest)
+ should always be equal to
+subpixeltextblend(text_color, mask, dest)
+```
+
+And there's a third condition we'd like it to fulfill:
+In places where the mask is zero, the destination should be unaffected.
+
+```
+subpixeltextblend_withbgcolor(text_color, transparency, bg_color, dest)
+ should always be equal to
+dest
+```
+
+### Use cases
+
+The primary use case for such a blend method is text on top of vibrant areas of a window on macOS.
+
+Vibrant backgrounds with behind-window blending are computed by the window server, and they are tinted
+in a color that's based on the chosen vibrancy type.
+
+The window's rgba buffer is transparent in the vibrant areas. Window contents, even text, are drawn onto
+that transparent rgba buffer. Then the window server composites the window onto an opaque backdrop.
+So the results on the screen are computed as follows:
+
+```glsl
+window_buffer_pixel = subpixeltextblend_withbgcolor(text_color, mask, bg_color, transparency);
+screen_pixel = over(window_buffer_pixel, window_backdrop);
+```
+
+### Prior art
+
+Apple has implemented such a method of text blending in CoreGraphics, specifically for rendering text onto vibrant backgrounds.
+It's hidden behind the private API `CGContextSetFontSmoothingBackgroundColor` and is called by AppKit internally before
+calling the `-[NSView drawRect:]` method of your `NSVisualEffectView`, with the appropriate font smoothing background color
+for the vibrancy type of that view.
+
+I'm not aware of any public documentation of this way of text blending.
+It seems to be considered an implementation detail by Apple, and is probably hidden by default because it can be a footgun:
+If the font smoothing background color you specify is very different from the actual background that our surface is placed
+on top of, the text will look glitchy.
+
+### Deriving the blending function from first principles
+
+Before we dive into the math, let's repeat our goal once more.
+
+We want to create a blending function of the form
+`vec4 subpixeltextblend_withbgcolor(vec4 text_color, vec4 mask, vec4 bg_color, vec4 dest)`
+(with `bg_color` being an opaque color)
+which satisfies the following three constraints:
+
+```
+Constraint I:
+  over(subpixeltextblend_withbgcolor(text_color, mask, bg_color, transparency), bg_color)
+   should always be equal to
+  subpixeltextblend(text_color, mask, bg_color)
+
+Constraint II:
+   If dest.a == 1.0,
+  subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest)
+   should always be equal to
+  subpixeltextblend(text_color, mask, dest)
+
+Constraint II:
+  subpixeltextblend_withbgcolor(text_color, transparency, bg_color, dest)
+   should always be equal to
+  dest
+```
+
+Constraint I and constraint II are about what happens depending on the destination's alpha.
+In particular: If the destination is completely transparent, we should blend into the
+estimated background color, and if it's completely opaque, we should blend into the destination color.
+In fact, we really want to blend into `over(dest, bg_color)`: we want `bg_color` to be used
+as a backdrop *behind* the current destination. So let's combine constraints I and II into a new
+constraint IV:
+
+```
+Constraint IV:
+  over(subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest), bg_color)
+   should always be equal to
+  subpixeltextblend(text_color, mask, over(dest, bg_color))
+```
+
+Let's look at just the left side of that equation and rejiggle it a bit:
+
+```
+over(subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest), bg_color).r
+ = subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest).r +
+   (1 - subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest).a) * bg_color.r
+
+<=>
+
+over(subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest), bg_color).r -
+(1 - subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest).a) * bg_color.r
+ = subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest).r
+```
+
+Now insert the right side of constraint IV:
+
+```
+subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest).r
+ = over(subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest), bg_color).r -
+   (1 - subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest).a) * bg_color.r
+ = subpixeltextblend(text_color, mask, over(dest, bg_color)).r -
+   (1 - subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest).a) * bg_color.r
+```
+
+Our blend function is almost finished. We just need select an alpha for our result.
+Constraints I, II and IV don't really care about the alpha value. But constraint III requires that:
+
+```
+  subpixeltextblend_withbgcolor(text_color, transparency, bg_color, dest).a
+   should always be equal to
+  dest.a
+```
+
+so the computation of the alpha value somehow needs to take into account the mask.
+
+Let's say we have an unknown function `make_alpha(text_color.a, mask)` which returns
+a number between 0 and 1 and which is 0 if the mask is entirely zero, and let's defer
+the actual implementation of that function until later.
+
+Now we can define the alpha of our overall function using the `over` function:
+
+```
+subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest).a
+ := make_alpha(text_color.a, mask) + (1 - make_alpha(text_color.a, mask)) * dest.a
+```
+
+We can plug this in to our previous result:
+
+```
+subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest).r
+ = subpixeltextblend(text_color, mask, over(dest, bg_color)).r
+   - (1 - subpixeltextblend_withbgcolor(text_color, mask, bg_color, dest).a) * bg_color.r
+ = subpixeltextblend(text_color, mask, over(dest, bg_color)).r
+   - (1 - (make_alpha(text_color.a, mask) +
+           (1 - make_alpha(text_color.a, mask)) * dest.a)) * bg_color.r
+ = text_color.r * mask.r + (1 - text_color.a * mask.r) * over(dest, bg_color).r
+   - (1 - (make_alpha(text_color.a, mask)
+           + (1 - make_alpha(text_color.a, mask)) * dest.a)) * bg_color.r
+ = text_color.r * mask.r
+   + (1 - text_color.a * mask.r) * (dest.r + (1 - dest.a) * bg_color.r)
+   - (1 - (make_alpha(text_color.a, mask)
+           + (1 - make_alpha(text_color.a, mask)) * dest.a)) * bg_color.r
+ = text_color.r * mask.r
+   + (1 - text_color.a * mask.r) * (dest.r + (1 - dest.a) * bg_color.r)
+   - (1 - (make_alpha(text_color.a, mask)
+           + (1 - make_alpha(text_color.a, mask)) * dest.a)) * bg_color.r
+ = text_color.r * mask.r
+   + (dest.r + (1 - dest.a) * bg_color.r)
+   - (text_color.a * mask.r) * (dest.r + (1 - dest.a) * bg_color.r)
+   - (1 - make_alpha(text_color.a, mask)
+      - (1 - make_alpha(text_color.a, mask)) * dest.a) * bg_color.r
+ = text_color.r * mask.r
+   + dest.r + (1 - dest.a) * bg_color.r
+   - text_color.a * mask.r * dest.r
+   - text_color.a * mask.r * (1 - dest.a) * bg_color.r
+   - (1 - make_alpha(text_color.a, mask)
+      - (1 - make_alpha(text_color.a, mask)) * dest.a) * bg_color.r
+ = text_color.r * mask.r
+   + dest.r + (1 - dest.a) * bg_color.r
+   - text_color.a * mask.r * dest.r
+   - text_color.a * mask.r * (1 - dest.a) * bg_color.r
+   - ((1 - make_alpha(text_color.a, mask)) * 1
+      - (1 - make_alpha(text_color.a, mask)) * dest.a) * bg_color.r
+ = text_color.r * mask.r
+   + dest.r + (1 - dest.a) * bg_color.r
+   - text_color.a * mask.r * dest.r
+   - text_color.a * mask.r * (1 - dest.a) * bg_color.r
+   - ((1 - make_alpha(text_color.a, mask)) * (1 - dest.a)) * bg_color.r
+ = text_color.r * mask.r
+   + dest.r - text_color.a * mask.r * dest.r
+   + (1 - dest.a) * bg_color.r
+   - text_color.a * mask.r * (1 - dest.a) * bg_color.r
+   - (1 - make_alpha(text_color.a, mask)) * (1 - dest.a) * bg_color.r
+ = text_color.r * mask.r
+   + (1 - text_color.a * mask.r) * dest.r
+   + (1 - dest.a) * bg_color.r
+   - text_color.a * mask.r * (1 - dest.a) * bg_color.r
+   - (1 - make_alpha(text_color.a, mask)) * (1 - dest.a) * bg_color.r
+ = text_color.r * mask.r
+   + (1 - text_color.a * mask.r) * dest.r
+   + (1 - text_color.a * mask.r) * (1 - dest.a) * bg_color.r
+   - (1 - make_alpha(text_color.a, mask)) * (1 - dest.a) * bg_color.r
+ = text_color.r * mask.r
+   + (1 - text_color.a * mask.r) * dest.r
+   + ((1 - text_color.a * mask.r)
+      - (1 - make_alpha(text_color.a, mask))) * (1 - dest.a) * bg_color.r
+ = text_color.r * mask.r
+   + (1 - text_color.a * mask.r) * dest.r
+   + (1 - text_color.a * mask.r
+      - 1 + make_alpha(text_color.a, mask)) * (1 - dest.a) * bg_color.r
+ = text_color.r * mask.r
+   + (1 - text_color.a * mask.r) * dest.r
+   + (make_alpha(text_color.a, mask) - text_color.a * mask.r) * (1 - dest.a) * bg_color.r
+```
+
+We now have a term of the form `A + B + C`, with `A` and `B` being guaranteed to
+be between zero and one.
+
+We also want `C` to be between zero and one.
+We can use this restriction to help us decide on an implementation of `make_alpha`.
+
+If we define `make_alpha` as
+
+```glsl
+float make_alpha(text_color_a, mask) {
+  float max_rgb = max(max(mask.r, mask.g), mask.b);
+  return text_color_a * max_rgb;
+}
+```
+
+, then `(make_alpha(text_color.a, mask) - text_color.a * mask.r)` becomes
+`(text_color.a * max(max(mask.r, mask.g), mask.b) - text_color.a * mask.r)`, which is
+`text_color.a * (max(max(mask.r, mask.g), mask.b) - mask.r)`, and the subtraction will
+always yield something that's greater or equal to zero for r, g, and b,
+because we will subtract each channel from the maximum of the channels.
+
+Putting this all together, we have:
+
+```glsl
+vec4 subpixeltextblend_withbgcolor(vec4 text_color, vec4 mask, vec4 bg_color, vec4 dest) {
+  float max_rgb = max(max(mask.r, mask.g), mask.b);
+  vec4 result;
+  result.r = text_color.r * mask.r + (1 - text_color.a * mask.r) * dest.r +
+             text_color.a * bg_color.r * (max_rgb - mask.r) * (1 - dest.a);
+  result.g = text_color.g * mask.g + (1 - text_color.a * mask.g) * dest.g +
+             text_color.a * bg_color.g * (max_rgb - mask.g) * (1 - dest.a);
+  result.b = text_color.b * mask.b + (1 - text_color.a * mask.b) * dest.b +
+             text_color.a * bg_color.b * (max_rgb - mask.b) * (1 - dest.a);
+  result.a = text_color.a * max_rgb + (1 - text_color.a * max_rgb) * dest.a;
+  return result;
+}
+```
+
+This is the final form of this blend function. It satisfies all of the four constraints.
+
+### Implementing it with OpenGL
+
+Our color channel equations consist of three pieces:
+
+ - `text_color.r * mask.r`, which simply gets added to the rest.
+ - `(1 - text_color.a * mask.r) * dest.r`, a factor which gets multiplied with the destination color.
+ - `text_color.a * bg_color.r * (max_rgb - mask.r) * (1 - dest.a)`, a factor which gets multiplied
+   with "one minus destination alpha".
+
+We will need three passes. Each pass modifies the color channels in the destination.
+This means that the part that uses `dest.r` needs to be applied first.
+Then we can apply the part that uses `1 - dest.a`.
+(This means that the first pass needs to leave `dest.a` untouched.)
+And the final pass can apply the `result.a` equation and modify `dest.a`.
+
+```
+pub fn set_blend_mode_subpixel_with_bg_color_pass0(&self) {
+    self.gl.blend_func_separate(gl::ZERO, gl::ONE_MINUS_SRC_COLOR, gl::ZERO, gl::ONE);
+}
+pub fn set_blend_mode_subpixel_with_bg_color_pass1(&self) {
+    self.gl.blend_func_separate(gl::ONE_MINUS_DST_ALPHA, gl::ONE, gl::ZERO, gl::ONE);
+}
+pub fn set_blend_mode_subpixel_with_bg_color_pass2(&self) {
+    self.gl.blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE_MINUS_SRC_ALPHA);
+}
+
+Pass0:
+    oFragColor = vec4(text.color.a) * mask;
+Pass1:
+    oFragColor = vec4(text.color.a) * text.bg_color * (vec4(mask.a) - mask);
+Pass2:
+    oFragColor = text.color * mask;
+
+result_after_pass0.r = 0 * (text_color.a * mask.r) + (1 - text_color.a * mask.r) * dest.r
+result_after_pass0.a = 0 * (text_color.a * mask.a) + 1 * dest.a
+
+result_after_pass1.r = (1 - result_after_pass0.a) * (text_color.a * (mask.max_rgb - mask.r) * bg_color.r) + 1 * result_after_pass0.r
+result_after_pass1.a = 0 * (text_color.a * (mask.max_rgb - mask.a) * bg_color.a) + 1 * result_after_pass0.a
+
+result_after_pass2.r = 1 * (text_color.r * mask.r) + 1 * result_after_pass1.r
+result_after_pass2.a = 1 * (text_color.a * mask.max_rgb) + (1 - text_color.a * mask.max_rgb) * result_after_pass1.a
+```
+
+Instead of computing `max_rgb` in the shader, we can just require the font rasterization code to fill
+`mask.a` with the `max_rgb` value.
+
diff --git a/gfx/wr/webrender/res/Proggy.ttf b/gfx/wr/webrender/res/Proggy.ttf
new file mode 100644
index 0000000000..308d3e1ac9
--- /dev/null
+++ b/gfx/wr/webrender/res/Proggy.ttf
diff --git a/gfx/wr/webrender/res/area-lut.tga b/gfx/wr/webrender/res/area-lut.tga
new file mode 100644
index 0000000000..5edcddc3d1
--- /dev/null
+++ b/gfx/wr/webrender/res/area-lut.tga
diff --git a/gfx/wr/webrender/res/base.glsl b/gfx/wr/webrender/res/base.glsl
new file mode 100644
index 0000000000..e381ff6ca9
--- /dev/null
+++ b/gfx/wr/webrender/res/base.glsl
@@ -0,0 +1,70 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#if defined(GL_ES)
+    #if GL_ES == 1
+        // Sampler default precision is lowp on mobile GPUs.
+        // This causes RGBA32F texture data to be clamped to 16 bit floats on some GPUs (e.g. Mali-T880).
+        // Define highp precision macro to allow lossless FLOAT texture sampling.
+        #define HIGHP_SAMPLER_FLOAT highp
+
+        // Default int precision in GLES 3 is highp (32 bits) in vertex shaders
+        // and mediump (16 bits) in fragment shaders. If an int is being used as
+        // a texel address in a fragment shader it, and therefore requires > 16
+        // bits, it must be qualified with this.
+        #define HIGHP_FS_ADDRESS highp
+
+        // texelFetchOffset is buggy on some Android GPUs (see issue #1694).
+        // Fallback to texelFetch on mobile GPUs.
+        #define TEXEL_FETCH(sampler, position, lod, offset) texelFetch(sampler, position + offset, lod)
+    #else
+        #define HIGHP_SAMPLER_FLOAT
+        #define HIGHP_FS_ADDRESS
+        #define TEXEL_FETCH(sampler, position, lod, offset) texelFetchOffset(sampler, position, lod, offset)
+    #endif
+#else
+    #define HIGHP_SAMPLER_FLOAT
+    #define HIGHP_FS_ADDRESS
+    #if defined(PLATFORM_MACOS) && !defined(SWGL)
+        // texelFetchOffset introduces a variety of shader compilation bugs on macOS Intel so avoid it.
+        #define TEXEL_FETCH(sampler, position, lod, offset) texelFetch(sampler, position + offset, lod)
+    #else
+        #define TEXEL_FETCH(sampler, position, lod, offset) texelFetchOffset(sampler, position, lod, offset)
+    #endif
+#endif
+
+#ifdef SWGL
+    #define SWGL_DRAW_SPAN
+    #define SWGL_CLIP_MASK
+    #define SWGL_ANTIALIAS
+    #define SWGL_BLEND
+    #define SWGL_CLIP_DIST
+#endif
+
+#ifdef WR_VERTEX_SHADER
+    #ifdef SWGL
+        // Annotate a vertex attribute as being flat per each drawn primitive instance.
+        // SWGL can use this information to avoid redundantly loading the attribute in all SIMD lanes.
+        #define PER_INSTANCE flat
+    #else
+        #define PER_INSTANCE
+    #endif
+
+    #if __VERSION__ != 100
+        #define varying out
+        #define attribute in
+    #endif
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+    precision highp float;
+    #if __VERSION__ != 100
+        #define varying in
+    #endif
+#endif
+
+// Flat interpolation is not supported on ESSL 1
+#if __VERSION__ == 100
+    #define flat
+#endif
diff --git a/gfx/wr/webrender/res/blend.glsl b/gfx/wr/webrender/res/blend.glsl
new file mode 100644
index 0000000000..2deed01143
--- /dev/null
+++ b/gfx/wr/webrender/res/blend.glsl
@@ -0,0 +1,238 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define COMPONENT_TRANSFER_IDENTITY 0
+#define COMPONENT_TRANSFER_TABLE 1
+#define COMPONENT_TRANSFER_DISCRETE 2
+#define COMPONENT_TRANSFER_LINEAR 3
+#define COMPONENT_TRANSFER_GAMMA 4
+
+// Must be kept in sync with `Filter::as_int` in internal_types.rs
+// Not all filters are defined here because some filter use different shaders.
+#define FILTER_CONTRAST            0
+#define FILTER_GRAYSCALE           1
+#define FILTER_HUE_ROTATE          2
+#define FILTER_INVERT              3
+#define FILTER_SATURATE            4
+#define FILTER_SEPIA               5
+#define FILTER_BRIGHTNESS          6
+#define FILTER_COLOR_MATRIX        7
+#define FILTER_SRGB_TO_LINEAR      8
+#define FILTER_LINEAR_TO_SRGB      9
+#define FILTER_FLOOD               10
+#define FILTER_COMPONENT_TRANSFER  11
+
+#ifdef WR_VERTEX_SHADER
+void SetupFilterParams(
+    int op,
+    float amount,
+    int gpu_data_address,
+    out vec4 color_offset,
+    out mat4 color_mat,
+    out highp int table_address
+) {
+    float lumR = 0.2126;
+    float lumG = 0.7152;
+    float lumB = 0.0722;
+    float oneMinusLumR = 1.0 - lumR;
+    float oneMinusLumG = 1.0 - lumG;
+    float oneMinusLumB = 1.0 - lumB;
+    float invAmount = 1.0 - amount;
+
+    if (op == FILTER_GRAYSCALE) {
+        color_mat = mat4(
+            vec4(lumR + oneMinusLumR * invAmount, lumR - lumR * invAmount, lumR - lumR * invAmount, 0.0),
+            vec4(lumG - lumG * invAmount, lumG + oneMinusLumG * invAmount, lumG - lumG * invAmount, 0.0),
+            vec4(lumB - lumB * invAmount, lumB - lumB * invAmount, lumB + oneMinusLumB * invAmount, 0.0),
+            vec4(0.0, 0.0, 0.0, 1.0)
+        );
+        color_offset = vec4(0.0);
+    } else if (op ==  FILTER_HUE_ROTATE) {
+        float c = cos(amount);
+        float s = sin(amount);
+        color_mat = mat4(
+            vec4(lumR + oneMinusLumR * c - lumR * s, lumR - lumR * c + 0.143 * s, lumR - lumR * c - oneMinusLumR * s, 0.0),
+            vec4(lumG - lumG * c - lumG * s, lumG + oneMinusLumG * c + 0.140 * s, lumG - lumG * c + lumG * s, 0.0),
+            vec4(lumB - lumB * c + oneMinusLumB * s, lumB - lumB * c - 0.283 * s, lumB + oneMinusLumB * c + lumB * s, 0.0),
+            vec4(0.0, 0.0, 0.0, 1.0)
+        );
+        color_offset = vec4(0.0);
+    } else if (op ==   FILTER_SATURATE) {
+        color_mat = mat4(
+            vec4(invAmount * lumR + amount, invAmount * lumR, invAmount * lumR, 0.0),
+            vec4(invAmount * lumG, invAmount * lumG + amount, invAmount * lumG, 0.0),
+            vec4(invAmount * lumB, invAmount * lumB, invAmount * lumB + amount, 0.0),
+            vec4(0.0, 0.0, 0.0, 1.0)
+        );
+        color_offset = vec4(0.0);
+    } else if (op == FILTER_SEPIA) {
+        color_mat = mat4(
+            vec4(0.393 + 0.607 * invAmount, 0.349 - 0.349 * invAmount, 0.272 - 0.272 * invAmount, 0.0),
+            vec4(0.769 - 0.769 * invAmount, 0.686 + 0.314 * invAmount, 0.534 - 0.534 * invAmount, 0.0),
+            vec4(0.189 - 0.189 * invAmount, 0.168 - 0.168 * invAmount, 0.131 + 0.869 * invAmount, 0.0),
+            vec4(0.0, 0.0, 0.0, 1.0)
+        );
+        color_offset = vec4(0.0);
+    } else if (op == FILTER_COLOR_MATRIX) {
+        vec4 mat_data[4] = fetch_from_gpu_cache_4(gpu_data_address);
+        vec4 offset_data = fetch_from_gpu_cache_1(gpu_data_address + 4);
+        color_mat = mat4(mat_data[0], mat_data[1], mat_data[2], mat_data[3]);
+        color_offset = offset_data;
+    } else if (op == FILTER_COMPONENT_TRANSFER) {
+        table_address = gpu_data_address;
+    } else if (op == FILTER_FLOOD) {
+        color_offset = fetch_from_gpu_cache_1(gpu_data_address);
+    }
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+vec3 Contrast(vec3 Cs, float amount) {
+    return clamp(Cs.rgb * amount - 0.5 * amount + 0.5, 0.0, 1.0);
+}
+
+vec3 Invert(vec3 Cs, float amount) {
+    return mix(Cs.rgb, vec3(1.0) - Cs.rgb, amount);
+}
+
+vec3 Brightness(vec3 Cs, float amount) {
+    // Apply the brightness factor.
+    // Resulting color needs to be clamped to output range
+    // since we are pre-multiplying alpha in the shader.
+    return clamp(Cs.rgb * amount, vec3(0.0), vec3(1.0));
+}
+
+// Based on the Gecko's implementation in
+// https://hg.mozilla.org/mozilla-central/file/91b4c3687d75/gfx/src/FilterSupport.cpp#l24
+// These could be made faster by sampling a lookup table stored in a float texture
+// with linear interpolation.
+
+vec3 SrgbToLinear(vec3 color) {
+    vec3 c1 = color / 12.92;
+    vec3 c2 = pow(color / 1.055 + vec3(0.055 / 1.055), vec3(2.4));
+    return if_then_else(lessThanEqual(color, vec3(0.04045)), c1, c2);
+}
+
+vec3 LinearToSrgb(vec3 color) {
+    vec3 c1 = color * 12.92;
+    vec3 c2 = vec3(1.055) * pow(color, vec3(1.0 / 2.4)) - vec3(0.055);
+    return if_then_else(lessThanEqual(color, vec3(0.0031308)), c1, c2);
+}
+
+// This function has to be factored out due to the following issue:
+// https://github.com/servo/webrender/wiki/Driver-issues#bug-1532245---switch-statement-inside-control-flow-inside-switch-statement-fails-to-compile-on-some-android-phones
+// (and now the words "default: default:" so angle_shader_validation.rs passes)
+vec4 ComponentTransfer(vec4 colora, vec4 vfuncs, highp int table_address) {
+    // We push a different amount of data to the gpu cache depending on the
+    // function type.
+    // Identity => 0 blocks
+    // Table/Discrete => 64 blocks (256 values)
+    // Linear => 1 block (2 values)
+    // Gamma => 1 block (3 values)
+    // We loop through the color components and increment the offset (for the
+    // next color component) into the gpu cache based on how many blocks that
+    // function type put into the gpu cache.
+    // Table/Discrete use a 256 entry look up table.
+    // Linear/Gamma are a simple calculation.
+
+    // Both offset and k must be marked as highp due to a Adreno 3xx bug likely
+    // to do with converting between precisions (as they would otherwise be
+    // promoted when adding to table_address).
+    highp int offset = 0;
+    highp int k;
+
+    vec4 texel;
+
+    // Dynamically indexing a vector is buggy on some platforms, so use a temporary array
+    int[4] funcs = int[4](int(vfuncs.r), int(vfuncs.g), int(vfuncs.b), int(vfuncs.a));
+    for (int i = 0; i < 4; i++) {
+        switch (funcs[i]) {
+            case COMPONENT_TRANSFER_IDENTITY:
+                break;
+            case COMPONENT_TRANSFER_TABLE:
+            case COMPONENT_TRANSFER_DISCRETE: {
+                // fetch value from lookup table
+                k = int(floor(colora[i]*255.0 + 0.5));
+                texel = fetch_from_gpu_cache_1(table_address + offset + k/4);
+                colora[i] = clamp(texel[k % 4], 0.0, 1.0);
+                // offset plus 256/4 blocks
+                offset = offset + 64;
+                break;
+            }
+            case COMPONENT_TRANSFER_LINEAR: {
+                // fetch the two values for use in the linear equation
+                texel = fetch_from_gpu_cache_1(table_address + offset);
+                colora[i] = clamp(texel[0] * colora[i] + texel[1], 0.0, 1.0);
+                // offset plus 1 block
+                offset = offset + 1;
+                break;
+            }
+            case COMPONENT_TRANSFER_GAMMA: {
+                // fetch the three values for use in the gamma equation
+                texel = fetch_from_gpu_cache_1(table_address + offset);
+                colora[i] = clamp(texel[0] * pow(colora[i], texel[1]) + texel[2], 0.0, 1.0);
+                // offset plus 1 block
+                offset = offset + 1;
+                break;
+            }
+            default:
+                // shouldn't happen
+                break;
+        }
+    }
+    return colora;
+}
+
+void CalculateFilter(
+    vec4 Cs,
+    int op,
+    float amount,
+    highp int table_address,
+    vec4 color_offset,
+    mat4 color_mat,
+    vec4 v_funcs,
+    out vec3 color,
+    out float alpha
+) {
+    // Un-premultiply the input.
+    alpha = Cs.a;
+    color = alpha != 0.0 ? Cs.rgb / alpha : Cs.rgb;
+
+    switch (op) {
+        case FILTER_CONTRAST:
+            color = Contrast(color, amount);
+            break;
+        case FILTER_INVERT:
+            color = Invert(color, amount);
+            break;
+        case FILTER_BRIGHTNESS:
+            color = Brightness(color, amount);
+            break;
+        case FILTER_SRGB_TO_LINEAR:
+            color = SrgbToLinear(color);
+            break;
+        case FILTER_LINEAR_TO_SRGB:
+            color = LinearToSrgb(color);
+            break;
+        case FILTER_COMPONENT_TRANSFER: {
+            // Get the unpremultiplied color with alpha.
+            vec4 colora = vec4(color, alpha);
+            colora = ComponentTransfer(colora, v_funcs, table_address);
+            color = colora.rgb;
+            alpha = colora.a;
+            break;
+        }
+        case FILTER_FLOOD:
+            color = color_offset.rgb;
+            alpha = color_offset.a;
+            break;
+        default:
+            // Color matrix type filters (sepia, hue-rotate, etc...)
+            vec4 result = color_mat * vec4(color, alpha) + color_offset;
+            result = clamp(result, vec4(0.0), vec4(1.0));
+            color = result.rgb;
+            alpha = result.a;
+    }
+}
+#endif
diff --git a/gfx/wr/webrender/res/brush.glsl b/gfx/wr/webrender/res/brush.glsl
new file mode 100644
index 0000000000..48b2286012
--- /dev/null
+++ b/gfx/wr/webrender/res/brush.glsl
@@ -0,0 +1,256 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/// # Brush vertex shaders memory layout
+///
+/// The overall memory layout is the same for all brush shaders.
+///
+/// The vertex shader receives a minimal amount of data from vertex attributes (packed into a single
+/// ivec4 per instance) and the rest is fetched from various uniform samplers using offsets decoded
+/// from the vertex attributes.
+///
+/// The diagram below shows the the various pieces of data fectched in the vertex shader:
+///
+///```ascii
+///                                                                         (sPrimitiveHeadersI)
+///                          (VBO)                                     +-----------------------+
+/// +----------------------------+      +----------------------------> | Int header            |
+/// | Instance vertex attributes |      |        (sPrimitiveHeadersF)  |                       |
+/// |                            |      |     +---------------------+  |   z                   |
+/// | x: prim_header_address    +-------+---> | Float header        |  |   specific_address  +-----+
+/// | y: picture_task_address   +---------+   |                     |  |   transform_address +---+ |
+/// |    clip_address           +-----+   |   |    local_rect       |  |   user_data           | | |
+/// | z: flags                   |    |   |   |    local_clip_rect  |  +-----------------------+ | |
+/// |    segment_index           |    |   |   +---------------------+                            | |
+/// | w: resource_address       +--+  |   |                                                      | |
+/// +----------------------------+ |  |   |                                 (sGpuCache)          | |
+///                                |  |   |         (sGpuCache)          +------------+          | |
+///                                |  |   |   +---------------+          | Transform  | <--------+ |
+///                (sGpuCache)     |  |   +-> | Picture task  |          +------------+            |
+///            +-------------+     |  |       |               |                                    |
+///            |  Resource   | <---+  |       |         ...   |                                    |
+///            |             |        |       +---------------+   +--------------------------------+
+///            |             |        |                           |
+///            +-------------+        |             (sGpuCache)   v                        (sGpuCache)
+///                                   |       +---------------+  +--------------+---------------+-+-+
+///                                   +-----> | Clip area     |  | Brush data   |  Segment data | | |
+///                                           |               |  |              |               | | |
+///                                           |         ...   |  |         ...  |          ...  | | | ...
+///                                           +---------------+  +--------------+---------------+-+-+
+///```
+///
+/// - Segment data address is obtained by combining the address stored in the int header and the
+///   segment index decoded from the vertex attributes.
+/// - Resource data is optional, some brush types (such as images) store some extra data there while
+///   other brush types don't use it.
+///
+
+#if (defined(WR_FEATURE_ALPHA_PASS) || defined(WR_FEATURE_ANTIALIASING)) && !defined(SWGL_ANTIALIAS)
+varying vec2 v_local_pos;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 segment_data
+);
+
+// Forward-declare the text vertex shader entry point which is currently
+// different from other brushes.
+void text_shader_main(
+    Instance instance,
+    PrimitiveHeader ph,
+    Transform transform,
+    PictureTask task,
+    ClipArea clip_area
+);
+
+#define VECS_PER_SEGMENT                    2
+
+#define BRUSH_FLAG_PERSPECTIVE_INTERPOLATION    1
+#define BRUSH_FLAG_SEGMENT_RELATIVE             2
+#define BRUSH_FLAG_SEGMENT_REPEAT_X             4
+#define BRUSH_FLAG_SEGMENT_REPEAT_Y             8
+#define BRUSH_FLAG_SEGMENT_REPEAT_X_ROUND      16
+#define BRUSH_FLAG_SEGMENT_REPEAT_Y_ROUND      32
+#define BRUSH_FLAG_SEGMENT_NINEPATCH_MIDDLE    64
+#define BRUSH_FLAG_TEXEL_RECT                 128
+#define BRUSH_FLAG_FORCE_AA                   256
+
+#define INVALID_SEGMENT_INDEX                   0xffff
+
+void brush_shader_main_vs(
+    Instance instance,
+    PrimitiveHeader ph,
+    Transform transform,
+    PictureTask pic_task,
+    ClipArea clip_area
+) {
+    int edge_flags = (instance.flags >> 12) & 0xf;
+    int brush_flags = instance.flags & 0xfff;
+
+    // Fetch the segment of this brush primitive we are drawing.
+    vec4 segment_data;
+    RectWithEndpoint segment_rect;
+    if (instance.segment_index == INVALID_SEGMENT_INDEX) {
+        segment_rect = ph.local_rect;
+        segment_data = vec4(0.0);
+    } else {
+        int segment_address = ph.specific_prim_address +
+                              VECS_PER_SPECIFIC_BRUSH +
+                              instance.segment_index * VECS_PER_SEGMENT;
+
+        vec4[2] segment_info = fetch_from_gpu_cache_2(segment_address);
+        segment_rect = RectWithEndpoint(segment_info[0].xy, segment_info[0].zw);
+        segment_rect.p0 += ph.local_rect.p0;
+        segment_rect.p1 += ph.local_rect.p0;
+        segment_data = segment_info[1];
+    }
+
+    // Most of the time this is the segment rect, but when doing the edge AA
+    // it is inflated.
+    RectWithEndpoint adjusted_segment_rect = segment_rect;
+
+    bool antialiased = !transform.is_axis_aligned || ((brush_flags & BRUSH_FLAG_FORCE_AA) != 0);
+
+    // Write the normal vertex information out.
+    if (antialiased) {
+        adjusted_segment_rect = clip_and_init_antialiasing(
+            segment_rect,
+            ph.local_rect,
+            ph.local_clip_rect,
+            edge_flags,
+            ph.z,
+            transform,
+            pic_task
+        );
+
+        // The clip was taken into account in clip_and_init_antialiasing, remove
+        // it so that it doesn't interfere with the aa.
+        ph.local_clip_rect.p0 = vec2(-1.0e16);
+        ph.local_clip_rect.p1 = vec2(1.0e16);
+    } else {
+        // The common case for most CSS content.
+
+        // TODO(gw): transform bounds may be referenced by
+        //           the fragment shader when running in
+        //           the alpha pass, even on non-transformed
+        //           items. For now, just ensure it has no
+        //           effect. We can tidy this up as we move
+        //           more items to be brush shaders.
+#if defined(WR_FEATURE_ALPHA_PASS) && !defined(SWGL_ANTIALIAS)
+        init_transform_vs(vec4(vec2(-1.0e16), vec2(1.0e16)));
+#endif
+    }
+
+    // Select the corner of the local rect that we are processing.
+    vec2 local_pos = mix(adjusted_segment_rect.p0, adjusted_segment_rect.p1, aPosition.xy);
+
+    VertexInfo vi = write_vertex(
+        local_pos,
+        ph.local_clip_rect,
+        ph.z,
+        transform,
+        pic_task
+    );
+
+    // For brush instances in the alpha pass, always write
+    // out clip information.
+    // TODO(gw): It's possible that we might want alpha
+    //           shaders that don't clip in the future,
+    //           but it's reasonable to assume that one
+    //           implies the other, for now.
+    // SW-WR may decay some requests for alpha-pass shaders to
+    // the opaque version if only the clip-mask is required. In
+    // that case the opaque vertex shader must still write out
+    // the clip information, which is cheap to do for SWGL.
+#if defined(WR_FEATURE_ALPHA_PASS) || defined(SWGL_CLIP_MASK)
+    write_clip(
+        vi.world_pos,
+        clip_area,
+        pic_task
+    );
+#endif
+
+    // Run the specific brush VS code to write interpolators.
+    brush_vs(
+        vi,
+        ph.specific_prim_address,
+        ph.local_rect,
+        segment_rect,
+        ph.user_data,
+        instance.resource_address,
+        transform.m,
+        pic_task,
+        brush_flags,
+        segment_data
+    );
+
+#if (defined(WR_FEATURE_ALPHA_PASS) || defined(WR_FEATURE_ANTIALIASING)) && !defined(SWGL_ANTIALIAS)
+    v_local_pos = vi.local_pos;
+#endif
+}
+
+#ifndef WR_VERTEX_SHADER_MAIN_FUNCTION
+// If the entry-point was not overridden before including the brush shader,
+// use the default one.
+#define WR_VERTEX_SHADER_MAIN_FUNCTION brush_shader_main_vs
+#endif
+
+void main(void) {
+
+    Instance instance = decode_instance_attributes();
+    PrimitiveHeader ph = fetch_prim_header(instance.prim_header_address);
+    Transform transform = fetch_transform(ph.transform_id);
+    PictureTask task = fetch_picture_task(instance.picture_task_address);
+    ClipArea clip_area = fetch_clip_area(instance.clip_address);
+
+    WR_VERTEX_SHADER_MAIN_FUNCTION(instance, ph, transform, task, clip_area);
+}
+
+#endif // WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+
+float antialias_brush() {
+#if (defined(WR_FEATURE_ALPHA_PASS) || defined(WR_FEATURE_ANTIALIASING)) && !defined(SWGL_ANTIALIAS)
+    return init_transform_fs(v_local_pos);
+#else
+    return 1.0;
+#endif
+}
+
+Fragment brush_fs();
+
+void main(void) {
+#ifdef WR_FEATURE_DEBUG_OVERDRAW
+    oFragColor = WR_DEBUG_OVERDRAW_COLOR;
+#else
+
+    Fragment frag = brush_fs();
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    // Apply the clip mask
+    float clip_alpha = do_clip();
+
+    frag.color *= clip_alpha;
+
+    #ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+        oFragBlend = frag.blend * clip_alpha;
+    #endif
+#endif
+
+    write_output(frag.color);
+#endif
+}
+#endif
diff --git a/gfx/wr/webrender/res/brush_blend.glsl b/gfx/wr/webrender/res/brush_blend.glsl
new file mode 100644
index 0000000000..529267f0e7
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_blend.glsl
@@ -0,0 +1,119 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 3
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,prim_shared,brush,blend
+
+// Interpolated UV coordinates to sample.
+varying vec2 v_uv;
+
+// Normalized bounds of the source image in the texture, adjusted to avoid
+// sampling artifacts.
+flat varying vec4 v_uv_sample_bounds;
+
+// x: Flag to allow perspective interpolation of UV.
+// y: Filter-dependent "amount" parameter.
+// Packed in to a vector to work around bug 1630356.
+flat varying vec2 v_perspective_amount;
+#define v_perspective v_perspective_amount.x
+#define v_amount v_perspective_amount.y
+
+// x: Blend op, y: Lookup table GPU cache address.
+// Packed in to a vector to work around bug 1630356.
+// Must be explicitly marked as highp, as the default integer precision in
+// fragment shaders is mediump which may only be 16 bits in ESSL 3, and GPU
+// cache address can exceed that maximum representable value.
+flat varying highp ivec2 v_op_table_address_vec;
+#define v_op v_op_table_address_vec.x
+#define v_table_address v_op_table_address_vec.y
+
+flat varying mat4 v_color_mat;
+// The function to use for each component of a component transfer filter. Using a int[4]
+// or ivec4 (with each element or component containing the function for each component) has
+// ran in to bugs 1695912 and 1731758, so instead use a vec4 and cast the values to/from floats.
+flat varying vec4 v_funcs;
+flat varying vec4 v_color_offset;
+
+#ifdef WR_VERTEX_SHADER
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 unused
+) {
+    ImageSource res = fetch_image_source(prim_user_data.x);
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+
+    vec2 inv_texture_size = vec2(1.0) / vec2(TEX_SIZE(sColor0).xy);
+    vec2 f = (vi.local_pos - local_rect.p0) / rect_size(local_rect);
+    f = get_image_quad_uv(prim_user_data.x, f);
+    vec2 uv = mix(uv0, uv1, f);
+    float perspective_interpolate = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0 ? 1.0 : 0.0;
+
+    v_uv = uv * inv_texture_size * mix(vi.world_pos.w, 1.0, perspective_interpolate);
+    v_perspective = perspective_interpolate;
+
+    v_uv_sample_bounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) * inv_texture_size.xyxy;
+
+    float amount = float(prim_user_data.z) / 65536.0;
+
+    v_op = prim_user_data.y & 0xffff;
+    v_amount = amount;
+
+    v_funcs.r = float((prim_user_data.y >> 28) & 0xf);
+    v_funcs.g = float((prim_user_data.y >> 24) & 0xf);
+    v_funcs.b = float((prim_user_data.y >> 20) & 0xf);
+    v_funcs.a = float((prim_user_data.y >> 16) & 0xf);
+
+    SetupFilterParams(
+        v_op,
+        amount,
+        prim_user_data.z,
+        v_color_offset,
+        v_color_mat,
+        v_table_address
+    );
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+Fragment brush_fs() {
+    float perspective_divisor = mix(gl_FragCoord.w, 1.0, v_perspective);
+    vec2 uv = v_uv * perspective_divisor;
+    // Clamp the uvs to avoid sampling artifacts.
+    uv = clamp(uv, v_uv_sample_bounds.xy, v_uv_sample_bounds.zw);
+
+    vec4 Cs = texture(sColor0, uv);
+
+    float alpha;
+    vec3 color;
+    CalculateFilter(
+        Cs,
+        v_op,
+        v_amount,
+        v_table_address,
+        v_color_offset,
+        v_color_mat,
+        v_funcs,
+        color,
+        alpha
+    );
+
+    #ifdef WR_FEATURE_ALPHA_PASS
+        alpha *= antialias_brush();
+    #endif
+
+    // Pre-multiply the alpha into the output value.
+    return Fragment(alpha * vec4(color, 1.0));
+}
+#endif
diff --git a/gfx/wr/webrender/res/brush_image.glsl b/gfx/wr/webrender/res/brush_image.glsl
new file mode 100644
index 0000000000..f40be949b9
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_image.glsl
@@ -0,0 +1,393 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 3
+
+#include shared,prim_shared,brush
+
+// Interpolated UV coordinates to sample.
+varying vec2 v_uv;
+
+#ifdef WR_FEATURE_ALPHA_PASS
+flat varying vec4 v_color;
+flat varying vec2 v_mask_swizzle;
+flat varying vec2 v_tile_repeat;
+#endif
+
+// Normalized bounds of the source image in the texture.
+flat varying vec4 v_uv_bounds;
+// Normalized bounds of the source image in the texture, adjusted to avoid
+// sampling artifacts.
+flat varying vec4 v_uv_sample_bounds;
+
+// Flag to allow perspective interpolation of UV.
+// Packed in to vector to work around bug 1630356.
+flat varying vec2 v_perspective;
+
+#ifdef WR_VERTEX_SHADER
+
+// Must match the AlphaType enum.
+#define BLEND_MODE_ALPHA            0
+#define BLEND_MODE_PREMUL_ALPHA     1
+
+struct ImageBrushData {
+    vec4 color;
+    vec4 background_color;
+    vec2 stretch_size;
+};
+
+ImageBrushData fetch_image_data(int address) {
+    vec4[3] raw_data = fetch_from_gpu_cache_3(address);
+    ImageBrushData data = ImageBrushData(
+        raw_data[0],
+        raw_data[1],
+        raw_data[2].xy
+    );
+    return data;
+}
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint prim_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 segment_data
+) {
+    ImageBrushData image_data = fetch_image_data(prim_address);
+
+    // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use
+    // non-normalized texture coordinates.
+#ifdef WR_FEATURE_TEXTURE_RECT
+    vec2 texture_size = vec2(1, 1);
+#else
+    vec2 texture_size = vec2(TEX_SIZE(sColor0));
+#endif
+
+    ImageSource res = fetch_image_source(specific_resource_address);
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+
+    RectWithEndpoint local_rect = prim_rect;
+    vec2 stretch_size = image_data.stretch_size;
+    if (stretch_size.x < 0.0) {
+        stretch_size = rect_size(local_rect);
+    }
+
+    // If this segment should interpolate relative to the
+    // segment, modify the parameters for that.
+    if ((brush_flags & BRUSH_FLAG_SEGMENT_RELATIVE) != 0) {
+        local_rect = segment_rect;
+        stretch_size = rect_size(local_rect);
+
+        if ((brush_flags & BRUSH_FLAG_TEXEL_RECT) != 0) {
+            // If the extra data is a texel rect, modify the UVs.
+            vec2 uv_size = res.uv_rect.p1 - res.uv_rect.p0;
+            uv0 = res.uv_rect.p0 + segment_data.xy * uv_size;
+            uv1 = res.uv_rect.p0 + segment_data.zw * uv_size;
+        }
+
+        #ifdef WR_FEATURE_REPETITION
+            // TODO(bug 1609893): Move this logic to the CPU as well as other sources of
+            // branchiness in this shader.
+            if ((brush_flags & BRUSH_FLAG_TEXEL_RECT) != 0) {
+                // Value of the stretch size with repetition. We have to compute it for
+                // both axis even if we only repeat on one axis because the value for
+                // each axis depends on what the repeated value would have been for the
+                // other axis.
+                vec2 repeated_stretch_size = stretch_size;
+                // Size of the uv rect of the segment we are considering when computing
+                // the repetitions. For the fill area it is a tad more complicated as we
+                // have to use the uv size of the top-middle segment to drive horizontal
+                // repetitions, and the size of the left-middle segment to drive vertical
+                // repetitions. So we track the reference sizes for both axis separately
+                // even though in the common case (the border segments) they are the same.
+                vec2 horizontal_uv_size = uv1 - uv0;
+                vec2 vertical_uv_size = uv1 - uv0;
+                // We use top and left sizes by default and fall back to bottom and right
+                // when a size is empty.
+                if ((brush_flags & BRUSH_FLAG_SEGMENT_NINEPATCH_MIDDLE) != 0) {
+                    repeated_stretch_size = segment_rect.p0 - prim_rect.p0;
+
+                    float epsilon = 0.001;
+
+                    // Adjust the the referecne uv size to compute vertical repetitions for
+                    // the fill area.
+                    vertical_uv_size.x = uv0.x - res.uv_rect.p0.x;
+                    if (vertical_uv_size.x < epsilon || repeated_stretch_size.x < epsilon) {
+                        vertical_uv_size.x = res.uv_rect.p1.x - uv1.x;
+                        repeated_stretch_size.x = prim_rect.p1.x - segment_rect.p1.x;
+                    }
+
+                    // Adjust the the referecne uv size to compute horizontal repetitions
+                    // for the fill area.
+                    horizontal_uv_size.y = uv0.y - res.uv_rect.p0.y;
+                    if (horizontal_uv_size.y < epsilon || repeated_stretch_size.y < epsilon) {
+                        horizontal_uv_size.y = res.uv_rect.p1.y - uv1.y;
+                        repeated_stretch_size.y = prim_rect.p1.y - segment_rect.p1.y;
+                    }
+                }
+
+                if ((brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_X) != 0) {
+                    float uv_ratio = horizontal_uv_size.x / horizontal_uv_size.y;
+                    stretch_size.x = repeated_stretch_size.y * uv_ratio;
+                }
+                if ((brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_Y) != 0) {
+                    float uv_ratio = vertical_uv_size.y / vertical_uv_size.x;
+                    stretch_size.y = repeated_stretch_size.x * uv_ratio;
+                }
+
+            } else {
+                if ((brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_X) != 0) {
+                    stretch_size.x = segment_data.z - segment_data.x;
+                }
+                if ((brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_Y) != 0) {
+                    stretch_size.y = segment_data.w - segment_data.y;
+                }
+            }
+            if ((brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_X_ROUND) != 0) {
+                float segment_rect_width = segment_rect.p1.x - segment_rect.p0.x;
+                float nx = max(1.0, round(segment_rect_width / stretch_size.x));
+                stretch_size.x = segment_rect_width / nx;
+            }
+            if ((brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_Y_ROUND) != 0) {
+                float segment_rect_height = segment_rect.p1.y - segment_rect.p0.y;
+                float ny = max(1.0, round(segment_rect_height / stretch_size.y));
+                stretch_size.y = segment_rect_height / ny;
+            }
+        #endif
+    }
+
+    float perspective_interpolate = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0 ? 1.0 : 0.0;
+    v_perspective.x = perspective_interpolate;
+
+    // Handle case where the UV coords are inverted (e.g. from an
+    // external image).
+    vec2 min_uv = min(uv0, uv1);
+    vec2 max_uv = max(uv0, uv1);
+
+    v_uv_sample_bounds = vec4(
+        min_uv + vec2(0.5),
+        max_uv - vec2(0.5)
+    ) / texture_size.xyxy;
+
+    vec2 f = (vi.local_pos - local_rect.p0) / rect_size(local_rect);
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    int color_mode = prim_user_data.x & 0xffff;
+    int blend_mode = prim_user_data.x >> 16;
+
+    if (color_mode == COLOR_MODE_FROM_PASS) {
+        color_mode = uMode;
+    }
+
+#endif
+
+    // Derive the texture coordinates for this image, based on
+    // whether the source image is a local-space or screen-space
+    // image.
+    int raster_space = prim_user_data.y;
+    if (raster_space == RASTER_SCREEN) {
+        // Since the screen space UVs specify an arbitrary quad, do
+        // a bilinear interpolation to get the correct UV for this
+        // local position.
+        f = get_image_quad_uv(specific_resource_address, f);
+    }
+
+    // Offset and scale v_uv here to avoid doing it in the fragment shader.
+    vec2 repeat = rect_size(local_rect) / stretch_size;
+    v_uv = mix(uv0, uv1, f) - min_uv;
+    v_uv /= texture_size;
+    v_uv *= repeat.xy;
+    if (perspective_interpolate == 0.0) {
+        v_uv *= vi.world_pos.w;
+    }
+
+#ifdef WR_FEATURE_TEXTURE_RECT
+    v_uv_bounds = vec4(0.0, 0.0, vec2(textureSize(sColor0)));
+#else
+    v_uv_bounds = vec4(min_uv, max_uv) / texture_size.xyxy;
+#endif
+
+#ifdef WR_FEATURE_REPETITION
+    // Normalize UV to 0..1 scale only if using repetition. Otherwise, leave
+    // UVs unnormalized since we won't compute a modulus without repetition
+    // enabled.
+    v_uv /= (v_uv_bounds.zw - v_uv_bounds.xy);
+#endif
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    v_tile_repeat = repeat.xy;
+
+    float opacity = float(prim_user_data.z) / 65535.0;
+    switch (blend_mode) {
+        case BLEND_MODE_ALPHA:
+            image_data.color.a *= opacity;
+            break;
+        case BLEND_MODE_PREMUL_ALPHA:
+        default:
+            image_data.color *= opacity;
+            break;
+    }
+
+    switch (color_mode) {
+        case COLOR_MODE_ALPHA:
+        case COLOR_MODE_BITMAP_SHADOW:
+            #ifdef SWGL_BLEND
+                swgl_blendDropShadow(image_data.color);
+                v_mask_swizzle = vec2(1.0, 0.0);
+                v_color = vec4(1.0);
+            #else
+                v_mask_swizzle = vec2(0.0, 1.0);
+                v_color = image_data.color;
+            #endif
+            break;
+        case COLOR_MODE_SUBPX_BG_PASS2:
+        case COLOR_MODE_IMAGE:
+            v_mask_swizzle = vec2(1.0, 0.0);
+            v_color = image_data.color;
+            break;
+        case COLOR_MODE_SUBPX_BG_PASS0:
+        case COLOR_MODE_COLOR_BITMAP:
+            v_mask_swizzle = vec2(1.0, 0.0);
+            v_color = vec4(image_data.color.a);
+            break;
+        case COLOR_MODE_SUBPX_BG_PASS1:
+            v_mask_swizzle = vec2(-1.0, 1.0);
+            v_color = vec4(image_data.color.a) * image_data.background_color;
+            break;
+        case COLOR_MODE_SUBPX_DUAL_SOURCE:
+            v_mask_swizzle = vec2(image_data.color.a, 0.0);
+            v_color = image_data.color;
+            break;
+        case COLOR_MODE_MULTIPLY_DUAL_SOURCE:
+            v_mask_swizzle = vec2(-image_data.color.a, image_data.color.a);
+            v_color = image_data.color;
+            break;
+        default:
+            v_mask_swizzle = vec2(0.0);
+            v_color = vec4(1.0);
+    }
+#endif
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+vec2 compute_repeated_uvs(float perspective_divisor) {
+#ifdef WR_FEATURE_REPETITION
+    vec2 uv_size = v_uv_bounds.zw - v_uv_bounds.xy;
+
+    #ifdef WR_FEATURE_ALPHA_PASS
+    // This prevents the uv on the top and left parts of the primitive that was inflated
+    // for anti-aliasing purposes from going beyound the range covered by the regular
+    // (non-inflated) primitive.
+    vec2 local_uv = max(v_uv * perspective_divisor, vec2(0.0));
+
+    // Handle horizontal and vertical repetitions.
+    vec2 repeated_uv = fract(local_uv) * uv_size + v_uv_bounds.xy;
+
+    // This takes care of the bottom and right inflated parts.
+    // We do it after the modulo because the latter wraps around the values exactly on
+    // the right and bottom edges, which we do not want.
+    if (local_uv.x >= v_tile_repeat.x) {
+        repeated_uv.x = v_uv_bounds.z;
+    }
+    if (local_uv.y >= v_tile_repeat.y) {
+        repeated_uv.y = v_uv_bounds.w;
+    }
+    #else
+    vec2 repeated_uv = fract(v_uv * perspective_divisor) * uv_size + v_uv_bounds.xy;
+    #endif
+
+    return repeated_uv;
+#else
+    return v_uv * perspective_divisor + v_uv_bounds.xy;
+#endif
+}
+
+Fragment brush_fs() {
+    float perspective_divisor = mix(gl_FragCoord.w, 1.0, v_perspective.x);
+    vec2 repeated_uv = compute_repeated_uvs(perspective_divisor);
+
+    // Clamp the uvs to avoid sampling artifacts.
+    vec2 uv = clamp(repeated_uv, v_uv_sample_bounds.xy, v_uv_sample_bounds.zw);
+
+    vec4 texel = TEX_SAMPLE(sColor0, uv);
+
+    Fragment frag;
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    #ifdef WR_FEATURE_ANTIALIASING
+        float alpha = antialias_brush();
+    #else
+        float alpha = 1.0;
+    #endif
+    #ifndef WR_FEATURE_DUAL_SOURCE_BLENDING
+        texel.rgb = texel.rgb * v_mask_swizzle.x + texel.aaa * v_mask_swizzle.y;
+    #endif
+
+    vec4 alpha_mask = texel * alpha;
+    frag.color = v_color * alpha_mask;
+
+    #ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+        frag.blend = alpha_mask * v_mask_swizzle.x + alpha_mask.aaaa * v_mask_swizzle.y;
+    #endif
+#else
+    frag.color = texel;
+#endif
+
+    return frag;
+}
+
+#if defined(SWGL_DRAW_SPAN) && (!defined(WR_FEATURE_ALPHA_PASS) || !defined(WR_FEATURE_DUAL_SOURCE_BLENDING))
+void swgl_drawSpanRGBA8() {
+    if (!swgl_isTextureRGBA8(sColor0)) {
+        return;
+    }
+
+    #ifdef WR_FEATURE_ALPHA_PASS
+        if (v_mask_swizzle != vec2(1.0, 0.0)) {
+            return;
+        }
+    #endif
+
+    float perspective_divisor = mix(swgl_forceScalar(gl_FragCoord.w), 1.0, v_perspective.x);
+
+    #ifdef WR_FEATURE_REPETITION
+        // Get the UVs before any repetition, scaling, or offsetting has occurred...
+        vec2 uv = v_uv * perspective_divisor;
+    #else
+        vec2 uv = compute_repeated_uvs(perspective_divisor);
+    #endif
+
+    #ifdef WR_FEATURE_ALPHA_PASS
+    if (v_color != vec4(1.0)) {
+        #ifdef WR_FEATURE_REPETITION
+            swgl_commitTextureRepeatColorRGBA8(sColor0, uv, v_tile_repeat, v_uv_bounds, v_uv_sample_bounds, v_color);
+        #else
+            swgl_commitTextureColorRGBA8(sColor0, uv, v_uv_sample_bounds, v_color);
+        #endif
+        return;
+    }
+    // No color scaling required, so just fall through to a normal textured span...
+    #endif
+
+    #ifdef WR_FEATURE_REPETITION
+        #ifdef WR_FEATURE_ALPHA_PASS
+            swgl_commitTextureRepeatRGBA8(sColor0, uv, v_tile_repeat, v_uv_bounds, v_uv_sample_bounds);
+        #else
+            swgl_commitTextureRepeatRGBA8(sColor0, uv, vec2(0.0), v_uv_bounds, v_uv_sample_bounds);
+        #endif
+    #else
+        swgl_commitTextureRGBA8(sColor0, uv, v_uv_sample_bounds);
+    #endif
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/brush_linear_gradient.glsl b/gfx/wr/webrender/res/brush_linear_gradient.glsl
new file mode 100644
index 0000000000..2c2e9a3a24
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_linear_gradient.glsl
@@ -0,0 +1,95 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 2
+
+#include shared,prim_shared,brush,gpu_buffer,gradient_shared
+
+// Start offset. Packed in to vector to work around bug 1630356.
+flat varying vec2 v_start_offset;
+
+flat varying vec2 v_scale_dir;
+
+#ifdef WR_VERTEX_SHADER
+
+struct Gradient {
+    vec4 start_end_point;
+    int extend_mode;
+    vec2 stretch_size;
+};
+
+Gradient fetch_gradient(int address) {
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
+    return Gradient(
+        data[0],
+        int(data[1].x),
+        data[1].yz
+    );
+}
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 texel_rect
+) {
+    Gradient gradient = fetch_gradient(prim_address);
+
+    write_gradient_vertex(
+        vi,
+        local_rect,
+        segment_rect,
+        prim_user_data,
+        brush_flags,
+        texel_rect,
+        gradient.extend_mode,
+        gradient.stretch_size
+    );
+
+    vec2 start_point = gradient.start_end_point.xy;
+    vec2 end_point = gradient.start_end_point.zw;
+    vec2 dir = end_point - start_point;
+
+    // Normalize UV and offsets to 0..1 scale.
+    v_scale_dir = dir / dot(dir, dir);
+    v_start_offset.x = dot(start_point, v_scale_dir);
+    v_scale_dir *= v_repeated_size;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+float get_gradient_offset(vec2 pos) {
+    // Project position onto a direction vector to compute offset.
+    return dot(pos, v_scale_dir) - v_start_offset.x;
+}
+
+Fragment brush_fs() {
+    vec4 color = sample_gradient(get_gradient_offset(compute_repeated_pos()));
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    color *= antialias_brush();
+#endif
+
+    return Fragment(color);
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+    int address = swgl_validateGradient(sGpuBuffer, get_gpu_buffer_uv(v_gradient_address.x), int(GRADIENT_ENTRIES + 2.0));
+    if (address < 0) {
+        return;
+    }
+
+    swgl_commitLinearGradientRGBA8(sGpuBuffer, address, GRADIENT_ENTRIES, true, v_gradient_repeat.x != 0.0,
+                                   v_pos, v_scale_dir, v_start_offset.x);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/brush_mix_blend.glsl b/gfx/wr/webrender/res/brush_mix_blend.glsl
new file mode 100644
index 0000000000..363f383667
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_mix_blend.glsl
@@ -0,0 +1,332 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 3
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,prim_shared,brush
+
+// UV and bounds for the source image
+varying vec2 v_src_uv;
+flat varying vec4 v_src_uv_sample_bounds;
+
+// UV and bounds for the backdrop image
+varying vec2 v_backdrop_uv;
+flat varying vec4 v_backdrop_uv_sample_bounds;
+
+// Flag to allow perspective interpolation of UV.
+// Packed in to vector to work around bug 1630356.
+flat varying vec2 v_perspective;
+// mix-blend op. Packed in to vector to work around bug 1630356.
+flat varying ivec2 v_op;
+
+#ifdef WR_VERTEX_SHADER
+
+void get_uv(
+    int res_address,
+    vec2 f,
+    ivec2 texture_size,
+    float perspective_f,
+    out vec2 out_uv,
+    out vec4 out_uv_sample_bounds
+) {
+    ImageSource res = fetch_image_source(res_address);
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+
+    vec2 inv_texture_size = vec2(1.0) / vec2(texture_size);
+    f = get_image_quad_uv(res_address, f);
+    vec2 uv = mix(uv0, uv1, f);
+
+    out_uv = uv * inv_texture_size * perspective_f;
+    out_uv_sample_bounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) * inv_texture_size.xyxy;
+}
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 unused
+) {
+    vec2 f = (vi.local_pos - local_rect.p0) / rect_size(local_rect);
+    float perspective_interpolate = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0 ? 1.0 : 0.0;
+    float perspective_f = mix(vi.world_pos.w, 1.0, perspective_interpolate);
+    v_perspective.x = perspective_interpolate;
+    v_op.x = prim_user_data.x;
+
+    get_uv(
+        prim_user_data.y,
+        f,
+        TEX_SIZE(sColor0).xy,
+        1.0,
+        v_backdrop_uv,
+        v_backdrop_uv_sample_bounds
+    );
+
+    get_uv(
+        prim_user_data.z,
+        f,
+        TEX_SIZE(sColor1).xy,
+        perspective_f,
+        v_src_uv,
+        v_src_uv_sample_bounds
+    );
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+vec3 Multiply(vec3 Cb, vec3 Cs) {
+    return Cb * Cs;
+}
+
+vec3 Screen(vec3 Cb, vec3 Cs) {
+    return Cb + Cs - (Cb * Cs);
+}
+
+vec3 HardLight(vec3 Cb, vec3 Cs) {
+    vec3 m = Multiply(Cb, 2.0 * Cs);
+    vec3 s = Screen(Cb, 2.0 * Cs - 1.0);
+    vec3 edge = vec3(0.5, 0.5, 0.5);
+    return mix(m, s, step(edge, Cs));
+}
+
+// TODO: Worth doing with mix/step? Check GLSL output.
+float ColorDodge(float Cb, float Cs) {
+    if (Cb == 0.0)
+        return 0.0;
+    else if (Cs == 1.0)
+        return 1.0;
+    else
+        return min(1.0, Cb / (1.0 - Cs));
+}
+
+// TODO: Worth doing with mix/step? Check GLSL output.
+float ColorBurn(float Cb, float Cs) {
+    if (Cb == 1.0)
+        return 1.0;
+    else if (Cs == 0.0)
+        return 0.0;
+    else
+        return 1.0 - min(1.0, (1.0 - Cb) / Cs);
+}
+
+float SoftLight(float Cb, float Cs) {
+    if (Cs <= 0.5) {
+        return Cb - (1.0 - 2.0 * Cs) * Cb * (1.0 - Cb);
+    } else {
+        float D;
+
+        if (Cb <= 0.25)
+            D = ((16.0 * Cb - 12.0) * Cb + 4.0) * Cb;
+        else
+            D = sqrt(Cb);
+
+        return Cb + (2.0 * Cs - 1.0) * (D - Cb);
+    }
+}
+
+vec3 Difference(vec3 Cb, vec3 Cs) {
+    return abs(Cb - Cs);
+}
+
+// These functions below are taken from the spec.
+// There's probably a much quicker way to implement
+// them in GLSL...
+float Sat(vec3 c) {
+    return max(c.r, max(c.g, c.b)) - min(c.r, min(c.g, c.b));
+}
+
+float Lum(vec3 c) {
+    vec3 f = vec3(0.3, 0.59, 0.11);
+    return dot(c, f);
+}
+
+vec3 ClipColor(vec3 C) {
+    float L = Lum(C);
+    float n = min(C.r, min(C.g, C.b));
+    float x = max(C.r, max(C.g, C.b));
+
+    if (n < 0.0)
+        C = L + (((C - L) * L) / (L - n));
+
+    if (x > 1.0)
+        C = L + (((C - L) * (1.0 - L)) / (x - L));
+
+    return C;
+}
+
+vec3 SetLum(vec3 C, float l) {
+    float d = l - Lum(C);
+    return ClipColor(C + d);
+}
+
+void SetSatInner(inout float Cmin, inout float Cmid, inout float Cmax, float s) {
+    if (Cmax > Cmin) {
+        Cmid = (((Cmid - Cmin) * s) / (Cmax - Cmin));
+        Cmax = s;
+    } else {
+        Cmid = 0.0;
+        Cmax = 0.0;
+    }
+    Cmin = 0.0;
+}
+
+vec3 SetSat(vec3 C, float s) {
+    if (C.r <= C.g) {
+        if (C.g <= C.b) {
+            SetSatInner(C.r, C.g, C.b, s);
+        } else {
+            if (C.r <= C.b) {
+                SetSatInner(C.r, C.b, C.g, s);
+            } else {
+                SetSatInner(C.b, C.r, C.g, s);
+            }
+        }
+    } else {
+        if (C.r <= C.b) {
+            SetSatInner(C.g, C.r, C.b, s);
+        } else {
+            if (C.g <= C.b) {
+                SetSatInner(C.g, C.b, C.r, s);
+            } else {
+                SetSatInner(C.b, C.g, C.r, s);
+            }
+        }
+    }
+    return C;
+}
+
+vec3 Hue(vec3 Cb, vec3 Cs) {
+    return SetLum(SetSat(Cs, Sat(Cb)), Lum(Cb));
+}
+
+vec3 Saturation(vec3 Cb, vec3 Cs) {
+    return SetLum(SetSat(Cb, Sat(Cs)), Lum(Cb));
+}
+
+vec3 Color(vec3 Cb, vec3 Cs) {
+    return SetLum(Cs, Lum(Cb));
+}
+
+vec3 Luminosity(vec3 Cb, vec3 Cs) {
+    return SetLum(Cb, Lum(Cs));
+}
+
+const int MixBlendMode_Multiply    = 1;
+const int MixBlendMode_Screen      = 2;
+const int MixBlendMode_Overlay     = 3;
+const int MixBlendMode_Darken      = 4;
+const int MixBlendMode_Lighten     = 5;
+const int MixBlendMode_ColorDodge  = 6;
+const int MixBlendMode_ColorBurn   = 7;
+const int MixBlendMode_HardLight   = 8;
+const int MixBlendMode_SoftLight   = 9;
+const int MixBlendMode_Difference  = 10;
+const int MixBlendMode_Exclusion   = 11;
+const int MixBlendMode_Hue         = 12;
+const int MixBlendMode_Saturation  = 13;
+const int MixBlendMode_Color       = 14;
+const int MixBlendMode_Luminosity  = 15;
+const int MixBlendMode_PlusLighter = 16;
+
+Fragment brush_fs() {
+    float perspective_divisor = mix(gl_FragCoord.w, 1.0, v_perspective.x);
+
+    vec2 src_uv = v_src_uv * perspective_divisor;
+    src_uv = clamp(src_uv, v_src_uv_sample_bounds.xy, v_src_uv_sample_bounds.zw);
+
+    vec2 backdrop_uv = clamp(v_backdrop_uv, v_backdrop_uv_sample_bounds.xy, v_backdrop_uv_sample_bounds.zw);
+
+    vec4 Cb = texture(sColor0, backdrop_uv);
+    vec4 Cs = texture(sColor1, src_uv);
+
+    // The mix-blend-mode functions assume no premultiplied alpha
+    if (Cb.a != 0.0) {
+        Cb.rgb /= Cb.a;
+    }
+
+    if (Cs.a != 0.0) {
+        Cs.rgb /= Cs.a;
+    }
+
+    // Return yellow if none of the branches match (shouldn't happen).
+    vec4 result = vec4(1.0, 1.0, 0.0, 1.0);
+
+    // On Android v_op has been packed in to a vector to avoid a driver bug
+    // on Adreno 3xx. However, this runs in to another Adreno 3xx driver bug
+    // where the switch doesn't match any cases. Unpacking the value from the
+    // vec in to a local variable prior to the switch works around this, but
+    // gets optimized away by glslopt. Adding a bitwise AND prevents that.
+    // See bug 1726755.
+    // default: default: to appease angle_shader_validation
+    switch (v_op.x & 0xFF) {
+        case MixBlendMode_Multiply:
+            result.rgb = Multiply(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_Overlay:
+            // Overlay is inverse of Hardlight
+            result.rgb = HardLight(Cs.rgb, Cb.rgb);
+            break;
+        case MixBlendMode_Darken:
+            result.rgb = min(Cs.rgb, Cb.rgb);
+            break;
+        case MixBlendMode_Lighten:
+            result.rgb = max(Cs.rgb, Cb.rgb);
+            break;
+        case MixBlendMode_ColorDodge:
+            result.r = ColorDodge(Cb.r, Cs.r);
+            result.g = ColorDodge(Cb.g, Cs.g);
+            result.b = ColorDodge(Cb.b, Cs.b);
+            break;
+        case MixBlendMode_ColorBurn:
+            result.r = ColorBurn(Cb.r, Cs.r);
+            result.g = ColorBurn(Cb.g, Cs.g);
+            result.b = ColorBurn(Cb.b, Cs.b);
+            break;
+        case MixBlendMode_HardLight:
+            result.rgb = HardLight(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_SoftLight:
+            result.r = SoftLight(Cb.r, Cs.r);
+            result.g = SoftLight(Cb.g, Cs.g);
+            result.b = SoftLight(Cb.b, Cs.b);
+            break;
+        case MixBlendMode_Difference:
+            result.rgb = Difference(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_Hue:
+            result.rgb = Hue(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_Saturation:
+            result.rgb = Saturation(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_Color:
+            result.rgb = Color(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_Luminosity:
+            result.rgb = Luminosity(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_Screen:
+        case MixBlendMode_Exclusion:
+        case MixBlendMode_PlusLighter:
+            // This should be unreachable, since we implement
+            // MixBlendMode::Screen, MixBlendMode::Exclusion and
+            // MixBlendMode::PlusLighter using glBlendFuncSeparate.
+            break;
+        default: break;
+    }
+
+    result.rgb = (1.0 - Cb.a) * Cs.rgb + Cb.a * result.rgb;
+    result.a = Cs.a;
+    result.rgb *= result.a;
+
+    return Fragment(result);
+}
+#endif
diff --git a/gfx/wr/webrender/res/brush_opacity.glsl b/gfx/wr/webrender/res/brush_opacity.glsl
new file mode 100644
index 0000000000..e6b30af69b
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_opacity.glsl
@@ -0,0 +1,83 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 3
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,prim_shared,brush
+
+// Interpolated UV coordinates to sample.
+varying vec2 v_uv;
+
+// Normalized bounds of the source image in the texture, adjusted to avoid
+// sampling artifacts.
+flat varying vec4 v_uv_sample_bounds;
+
+flat varying vec2 v_opacity_perspective_vec;
+#define v_opacity v_opacity_perspective_vec.x
+// Flag to allow perspective interpolation of UV.
+#define v_perspective v_opacity_perspective_vec.y
+
+#ifdef WR_VERTEX_SHADER
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 unused
+) {
+    ImageSource res = fetch_image_source(prim_user_data.x);
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+
+    vec2 texture_size = vec2(TEX_SIZE(sColor0).xy);
+    vec2 f = (vi.local_pos - local_rect.p0) / rect_size(local_rect);
+    f = get_image_quad_uv(prim_user_data.x, f);
+    vec2 uv = mix(uv0, uv1, f);
+    float perspective_interpolate = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0 ? 1.0 : 0.0;
+
+    v_uv = uv / texture_size * mix(vi.world_pos.w, 1.0, perspective_interpolate);
+    v_perspective = perspective_interpolate;
+
+    v_uv_sample_bounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) / texture_size.xyxy;
+
+    v_opacity = clamp(float(prim_user_data.y) / 65536.0, 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+Fragment brush_fs() {
+    float perspective_divisor = mix(gl_FragCoord.w, 1.0, v_perspective);
+    vec2 uv = v_uv * perspective_divisor;
+    // Clamp the uvs to avoid sampling artifacts.
+    uv = clamp(uv, v_uv_sample_bounds.xy, v_uv_sample_bounds.zw);
+
+    // No need to un-premultiply since we'll only apply a factor to the alpha.
+    vec4 color = texture(sColor0, uv);
+
+    float alpha = v_opacity;
+
+    #ifdef WR_FEATURE_ALPHA_PASS
+        alpha *= antialias_brush();
+    #endif
+
+    // Pre-multiply the contribution of the opacity factor.
+    return Fragment(alpha * color);
+}
+
+#if defined(SWGL_DRAW_SPAN) && !defined(WR_FEATURE_DUAL_SOURCE_BLENDING)
+void swgl_drawSpanRGBA8() {
+    float perspective_divisor = mix(swgl_forceScalar(gl_FragCoord.w), 1.0, v_perspective);
+    vec2 uv = v_uv * perspective_divisor;
+
+    swgl_commitTextureLinearColorRGBA8(sColor0, uv, v_uv_sample_bounds, v_opacity);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/brush_solid.glsl b/gfx/wr/webrender/res/brush_solid.glsl
new file mode 100644
index 0000000000..588ed3ac45
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_solid.glsl
@@ -0,0 +1,60 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 1
+
+#include shared,prim_shared,brush
+
+flat varying vec4 v_color;
+
+#ifdef WR_VERTEX_SHADER
+
+struct SolidBrush {
+    vec4 color;
+};
+
+SolidBrush fetch_solid_primitive(int address) {
+    vec4 data = fetch_from_gpu_cache_1(address);
+    return SolidBrush(data);
+}
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 unused
+) {
+    SolidBrush prim = fetch_solid_primitive(prim_address);
+
+    float opacity = float(prim_user_data.x) / 65535.0;
+    v_color = prim.color * opacity;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+Fragment brush_fs() {
+    vec4 color = v_color;
+#ifdef WR_FEATURE_ALPHA_PASS
+    color *= antialias_brush();
+#endif
+    return Fragment(color);
+}
+
+#if defined(SWGL_DRAW_SPAN) && (!defined(WR_FEATURE_ALPHA_PASS) || !defined(WR_FEATURE_DUAL_SOURCE_BLENDING))
+void swgl_drawSpanRGBA8() {
+    swgl_commitSolidRGBA8(v_color);
+}
+
+void swgl_drawSpanR8() {
+    swgl_commitSolidR8(v_color.x);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/brush_yuv_image.glsl b/gfx/wr/webrender/res/brush_yuv_image.glsl
new file mode 100644
index 0000000000..28abcab66f
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_yuv_image.glsl
@@ -0,0 +1,140 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 1
+
+#include shared,prim_shared,brush,yuv
+
+varying vec2 vUv_Y;
+flat varying vec4 vUvBounds_Y;
+
+varying vec2 vUv_U;
+flat varying vec4 vUvBounds_U;
+
+varying vec2 vUv_V;
+flat varying vec4 vUvBounds_V;
+
+YUV_PRECISION flat varying vec3 vYcbcrBias;
+YUV_PRECISION flat varying mat3 vRgbFromDebiasedYcbcr;
+
+// YUV format. Packed in to vector to work around bug 1630356.
+flat varying ivec2 vFormat;
+
+#ifdef SWGL_DRAW_SPAN
+flat varying int vRescaleFactor;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+
+YuvPrimitive fetch_yuv_primitive(int address) {
+    vec4 data = fetch_from_gpu_cache_1(address);
+    // From YuvImageData.write_prim_gpu_blocks:
+    int channel_bit_depth = int(data.x);
+    int color_space = int(data.y);
+    int yuv_format = int(data.z);
+    return YuvPrimitive(channel_bit_depth, color_space, yuv_format);
+}
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 unused
+) {
+    vec2 f = (vi.local_pos - local_rect.p0) / rect_size(local_rect);
+
+    YuvPrimitive prim = fetch_yuv_primitive(prim_address);
+
+#ifdef SWGL_DRAW_SPAN
+    // swgl_commitTextureLinearYUV needs to know the color space specifier and
+    // also needs to know how many bits of scaling are required to normalize
+    // HDR textures. Note that MSB HDR formats don't need renormalization.
+    vRescaleFactor = 0;
+    if (prim.channel_bit_depth > 8 && prim.yuv_format != YUV_FORMAT_P010) {
+        vRescaleFactor = 16 - prim.channel_bit_depth;
+    }
+#endif
+
+    YuvColorMatrixInfo mat_info = get_rgb_from_ycbcr_info(prim);
+    vYcbcrBias = mat_info.ycbcr_bias;
+    vRgbFromDebiasedYcbcr = mat_info.rgb_from_debiased_ycbrc;
+
+    vFormat.x = prim.yuv_format;
+
+    // The additional test for 99 works around a gen6 shader compiler bug: 1708937
+    if (vFormat.x == YUV_FORMAT_PLANAR || vFormat.x == 99) {
+        ImageSource res_y = fetch_image_source(prim_user_data.x);
+        ImageSource res_u = fetch_image_source(prim_user_data.y);
+        ImageSource res_v = fetch_image_source(prim_user_data.z);
+        write_uv_rect(res_y.uv_rect.p0, res_y.uv_rect.p1, f, TEX_SIZE_YUV(sColor0), vUv_Y, vUvBounds_Y);
+        write_uv_rect(res_u.uv_rect.p0, res_u.uv_rect.p1, f, TEX_SIZE_YUV(sColor1), vUv_U, vUvBounds_U);
+        write_uv_rect(res_v.uv_rect.p0, res_v.uv_rect.p1, f, TEX_SIZE_YUV(sColor2), vUv_V, vUvBounds_V);
+    } else if (vFormat.x == YUV_FORMAT_NV12 || vFormat.x == YUV_FORMAT_P010) {
+        ImageSource res_y = fetch_image_source(prim_user_data.x);
+        ImageSource res_u = fetch_image_source(prim_user_data.y);
+        write_uv_rect(res_y.uv_rect.p0, res_y.uv_rect.p1, f, TEX_SIZE_YUV(sColor0), vUv_Y, vUvBounds_Y);
+        write_uv_rect(res_u.uv_rect.p0, res_u.uv_rect.p1, f, TEX_SIZE_YUV(sColor1), vUv_U, vUvBounds_U);
+    } else if (vFormat.x == YUV_FORMAT_INTERLEAVED) {
+        ImageSource res_y = fetch_image_source(prim_user_data.x);
+        write_uv_rect(res_y.uv_rect.p0, res_y.uv_rect.p1, f, TEX_SIZE_YUV(sColor0), vUv_Y, vUvBounds_Y);
+    }
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+Fragment brush_fs() {
+    vec4 color = sample_yuv(
+        vFormat.x,
+        vYcbcrBias,
+        vRgbFromDebiasedYcbcr,
+        vUv_Y,
+        vUv_U,
+        vUv_V,
+        vUvBounds_Y,
+        vUvBounds_U,
+        vUvBounds_V
+    );
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    color *= antialias_brush();
+#endif
+
+    //color.r = float(100+vFormat) / 255.0;
+    //color.g = vYcbcrBias.x;
+    //color.b = vYcbcrBias.y;
+    return Fragment(color);
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+    if (vFormat.x == YUV_FORMAT_PLANAR) {
+        swgl_commitTextureLinearYUV(sColor0, vUv_Y, vUvBounds_Y,
+                                    sColor1, vUv_U, vUvBounds_U,
+                                    sColor2, vUv_V, vUvBounds_V,
+                                    vYcbcrBias,
+                                    vRgbFromDebiasedYcbcr,
+                                    vRescaleFactor);
+    } else if (vFormat.x == YUV_FORMAT_NV12 || vFormat.x == YUV_FORMAT_P010) {
+        swgl_commitTextureLinearYUV(sColor0, vUv_Y, vUvBounds_Y,
+                                    sColor1, vUv_U, vUvBounds_U,
+                                    vYcbcrBias,
+                                    vRgbFromDebiasedYcbcr,
+                                    vRescaleFactor);
+    } else if (vFormat.x == YUV_FORMAT_INTERLEAVED) {
+        swgl_commitTextureLinearYUV(sColor0, vUv_Y, vUvBounds_Y,
+                                    vYcbcrBias,
+                                    vRgbFromDebiasedYcbcr,
+                                    vRescaleFactor);
+    }
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/clip_shared.glsl b/gfx/wr/webrender/res/clip_shared.glsl
new file mode 100644
index 0000000000..ef28bfde22
--- /dev/null
+++ b/gfx/wr/webrender/res/clip_shared.glsl
@@ -0,0 +1,80 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include rect,render_task,gpu_cache,transform
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in vec4 aClipDeviceArea;
+PER_INSTANCE in vec4 aClipOrigins;
+PER_INSTANCE in float aDevicePixelScale;
+PER_INSTANCE in ivec2 aTransformIds;
+
+struct ClipMaskInstanceCommon {
+    RectWithEndpoint sub_rect;
+    vec2 task_origin;
+    vec2 screen_origin;
+    float device_pixel_scale;
+    int clip_transform_id;
+    int prim_transform_id;
+};
+
+ClipMaskInstanceCommon fetch_clip_item_common() {
+    ClipMaskInstanceCommon cmi;
+
+    cmi.sub_rect = RectWithEndpoint(aClipDeviceArea.xy, aClipDeviceArea.zw);
+    cmi.task_origin = aClipOrigins.xy;
+    cmi.screen_origin = aClipOrigins.zw;
+    cmi.device_pixel_scale = aDevicePixelScale;
+    cmi.clip_transform_id = aTransformIds.x;
+    cmi.prim_transform_id = aTransformIds.y;
+
+    return cmi;
+}
+
+struct ClipVertexInfo {
+    vec4 local_pos;
+    RectWithEndpoint clipped_local_rect;
+};
+
+// The transformed vertex function that always covers the whole clip area,
+// which is the intersection of all clip instances of a given primitive
+ClipVertexInfo write_clip_tile_vertex(RectWithEndpoint local_clip_rect,
+                                      Transform prim_transform,
+                                      Transform clip_transform,
+                                      RectWithEndpoint sub_rect,
+                                      vec2 task_origin,
+                                      vec2 screen_origin,
+                                      float device_pixel_scale) {
+    vec2 device_pos = screen_origin + mix(sub_rect.p0, sub_rect.p1, aPosition.xy);
+    vec2 world_pos = device_pos / device_pixel_scale;
+
+    vec4 pos = prim_transform.m * vec4(world_pos, 0.0, 1.0);
+    pos.xyz /= pos.w;
+
+    vec4 p = get_node_pos(pos.xy, clip_transform);
+    vec4 local_pos = p * pos.w;
+
+    //TODO: Interpolate in clip space, where "local_pos.w" contains
+    // the W of the homogeneous transform *from* clip space into the world.
+    //    float interpolate_w = 1.0 / local_pos.w;
+    // This is problematic today, because the W<=0 hemisphere is going to be
+    // clipped, while we currently want this shader to fill out the whole rect.
+    // We can therefore simplify this when the clip construction is rewritten
+    // to only affect the areas touched by a clip.
+    vec4 vertex_pos = vec4(
+        task_origin + mix(sub_rect.p0, sub_rect.p1, aPosition.xy),
+        0.0,
+        1.0
+    );
+
+    gl_Position = uTransform * vertex_pos;
+
+    init_transform_vs(vec4(local_clip_rect.p0, local_clip_rect.p1));
+
+    ClipVertexInfo vi = ClipVertexInfo(local_pos, local_clip_rect);
+    return vi;
+}
+
+#endif //WR_VERTEX_SHADER
diff --git a/gfx/wr/webrender/res/composite.glsl b/gfx/wr/webrender/res/composite.glsl
new file mode 100644
index 0000000000..dbd799f129
--- /dev/null
+++ b/gfx/wr/webrender/res/composite.glsl
@@ -0,0 +1,242 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Composite a picture cache tile into the framebuffer.
+
+// This shader must remain compatible with ESSL 1, at least for the
+// WR_FEATURE_TEXTURE_EXTERNAL_ESSL1 feature, so that it can be used to render
+// video on GLES devices without GL_OES_EGL_image_external_essl3 support.
+// This means we cannot use textureSize(), int inputs/outputs, etc.
+
+#include shared
+
+#ifdef WR_FEATURE_YUV
+#include yuv
+#endif
+
+#ifdef WR_FEATURE_YUV
+YUV_PRECISION flat varying vec3 vYcbcrBias;
+YUV_PRECISION flat varying mat3 vRgbFromDebiasedYcbcr;
+// YUV format. Packed in to vector to avoid bug 1630356.
+flat varying ivec2 vYuvFormat;
+
+#ifdef SWGL_DRAW_SPAN
+flat varying int vRescaleFactor;
+#endif
+varying vec2 vUV_y;
+varying vec2 vUV_u;
+varying vec2 vUV_v;
+flat varying vec4 vUVBounds_y;
+flat varying vec4 vUVBounds_u;
+flat varying vec4 vUVBounds_v;
+#else
+varying vec2 vUv;
+#ifndef WR_FEATURE_FAST_PATH
+flat varying vec4 vColor;
+flat varying vec4 vUVBounds;
+#endif
+#ifdef WR_FEATURE_TEXTURE_EXTERNAL_ESSL1
+uniform vec2 uTextureSize;
+#endif
+#endif
+
+#ifdef WR_VERTEX_SHADER
+// CPU side data is in CompositeInstance (gpu_types.rs) and is
+// converted to GPU data using desc::COMPOSITE (renderer.rs) by
+// filling vaos.composite_vao with VertexArrayKind::Composite.
+PER_INSTANCE attribute vec4 aLocalRect;
+PER_INSTANCE attribute vec4 aDeviceClipRect;
+PER_INSTANCE attribute vec4 aColor;
+PER_INSTANCE attribute vec4 aParams;
+PER_INSTANCE attribute vec4 aTransform;
+
+#ifdef WR_FEATURE_YUV
+// YUV treats these as a UV clip rect (clamp)
+PER_INSTANCE attribute vec4 aUvRect0;
+PER_INSTANCE attribute vec4 aUvRect1;
+PER_INSTANCE attribute vec4 aUvRect2;
+#else
+PER_INSTANCE attribute vec4 aUvRect0;
+#endif
+
+vec2 apply_transform(vec2 p, vec4 transform) {
+    return p * transform.xy + transform.zw;
+}
+
+#ifdef WR_FEATURE_YUV
+YuvPrimitive fetch_yuv_primitive() {
+    // From ExternalSurfaceDependency::Yuv:
+    int color_space = int(aParams.y);
+    int yuv_format = int(aParams.z);
+    int channel_bit_depth = int(aParams.w);
+    return YuvPrimitive(channel_bit_depth, color_space, yuv_format);
+}
+#endif
+
+void main(void) {
+	// Get world position
+    vec2 world_p0 = apply_transform(aLocalRect.xy, aTransform);
+    vec2 world_p1 = apply_transform(aLocalRect.zw, aTransform);
+    vec2 world_pos = mix(world_p0, world_p1, aPosition.xy);
+
+    // Clip the position to the world space clip rect
+    vec2 clipped_world_pos = clamp(world_pos, aDeviceClipRect.xy, aDeviceClipRect.zw);
+
+    // Derive the normalized UV from the clipped vertex position
+    vec2 uv = (clipped_world_pos - world_p0) / (world_p1 - world_p0);
+
+#ifdef WR_FEATURE_YUV
+    YuvPrimitive prim = fetch_yuv_primitive();
+
+#ifdef SWGL_DRAW_SPAN
+    // swgl_commitTextureLinearYUV needs to know the color space specifier and
+    // also needs to know how many bits of scaling are required to normalize
+    // HDR textures. Note that MSB HDR formats don't need renormalization.
+    vRescaleFactor = 0;
+    if (prim.channel_bit_depth > 8 && prim.yuv_format != YUV_FORMAT_P010) {
+        vRescaleFactor = 16 - prim.channel_bit_depth;
+    }
+#endif
+
+    YuvColorMatrixInfo mat_info = get_rgb_from_ycbcr_info(prim);
+    vYcbcrBias = mat_info.ycbcr_bias;
+    vRgbFromDebiasedYcbcr = mat_info.rgb_from_debiased_ycbrc;
+
+    vYuvFormat.x = prim.yuv_format;
+
+    write_uv_rect(
+        aUvRect0.xy,
+        aUvRect0.zw,
+        uv,
+        TEX_SIZE_YUV(sColor0),
+        vUV_y,
+        vUVBounds_y
+    );
+    write_uv_rect(
+        aUvRect1.xy,
+        aUvRect1.zw,
+        uv,
+        TEX_SIZE_YUV(sColor1),
+        vUV_u,
+        vUVBounds_u
+    );
+    write_uv_rect(
+        aUvRect2.xy,
+        aUvRect2.zw,
+        uv,
+        TEX_SIZE_YUV(sColor2),
+        vUV_v,
+        vUVBounds_v
+    );
+#else
+    uv = mix(aUvRect0.xy, aUvRect0.zw, uv);
+    // The uvs may be inverted, so use the min and max for the bounds
+    vec4 uvBounds = vec4(min(aUvRect0.xy, aUvRect0.zw), max(aUvRect0.xy, aUvRect0.zw));
+    int rescale_uv = int(aParams.y);
+    if (rescale_uv == 1)
+    {
+        // using an atlas, so UVs are in pixels, and need to be
+        // normalized and clamped.
+#if defined(WR_FEATURE_TEXTURE_RECT)
+        vec2 texture_size = vec2(1.0, 1.0);
+#elif defined(WR_FEATURE_TEXTURE_EXTERNAL_ESSL1)
+        vec2 texture_size = uTextureSize;
+#else
+        vec2 texture_size = vec2(TEX_SIZE(sColor0));
+#endif
+        uvBounds += vec4(0.5, 0.5, -0.5, -0.5);
+    #ifndef WR_FEATURE_TEXTURE_RECT
+        uv /= texture_size;
+        uvBounds /= texture_size.xyxy;
+    #endif
+    }
+
+    vUv = uv;
+#ifndef WR_FEATURE_FAST_PATH
+    vUVBounds = uvBounds;
+    // Pass through color
+    vColor = aColor;
+#endif
+#endif
+
+    gl_Position = uTransform * vec4(clipped_world_pos, aParams.x /* z_id */, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+#ifdef WR_FEATURE_YUV
+    vec4 color = sample_yuv(
+        vYuvFormat.x,
+        vYcbcrBias,
+        vRgbFromDebiasedYcbcr,
+        vUV_y,
+        vUV_u,
+        vUV_v,
+        vUVBounds_y,
+        vUVBounds_u,
+        vUVBounds_v
+    );
+#else
+    // The color is just the texture sample modulated by a supplied color.
+    // In the fast path we avoid clamping the UV coordinates and modulating by the color.
+#ifdef WR_FEATURE_FAST_PATH
+    vec2 uv = vUv;
+#else
+    vec2 uv = clamp(vUv, vUVBounds.xy, vUVBounds.zw);
+#endif
+    vec4 texel = TEX_SAMPLE(sColor0, uv);
+#ifdef WR_FEATURE_FAST_PATH
+    vec4 color = texel;
+#else
+    vec4 color = vColor * texel;
+#endif
+#endif
+    write_output(color);
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+#ifdef WR_FEATURE_YUV
+    if (vYuvFormat.x == YUV_FORMAT_PLANAR) {
+        swgl_commitTextureLinearYUV(sColor0, vUV_y, vUVBounds_y,
+                                    sColor1, vUV_u, vUVBounds_u,
+                                    sColor2, vUV_v, vUVBounds_v,
+                                    vYcbcrBias,
+                                    vRgbFromDebiasedYcbcr,
+                                    vRescaleFactor);
+    } else if (vYuvFormat.x == YUV_FORMAT_NV12 || vYuvFormat.x == YUV_FORMAT_P010) {
+        swgl_commitTextureLinearYUV(sColor0, vUV_y, vUVBounds_y,
+                                    sColor1, vUV_u, vUVBounds_u,
+                                    vYcbcrBias,
+                                    vRgbFromDebiasedYcbcr,
+                                    vRescaleFactor);
+    } else if (vYuvFormat.x == YUV_FORMAT_INTERLEAVED) {
+        swgl_commitTextureLinearYUV(sColor0, vUV_y, vUVBounds_y,
+                                    vYcbcrBias,
+                                    vRgbFromDebiasedYcbcr,
+                                    vRescaleFactor);
+    }
+#else
+#ifdef WR_FEATURE_FAST_PATH
+    vec4 color = vec4(1.0);
+#ifdef WR_FEATURE_TEXTURE_RECT
+    vec4 uvBounds = vec4(vec2(0.0), vec2(textureSize(sColor0)));
+#else
+    vec4 uvBounds = vec4(0.0, 0.0, 1.0, 1.0);
+#endif
+#else
+    vec4 color = vColor;
+    vec4 uvBounds = vUVBounds;
+#endif
+    if (color != vec4(1.0)) {
+        swgl_commitTextureColorRGBA8(sColor0, vUv, uvBounds, color);
+    } else {
+        swgl_commitTextureRGBA8(sColor0, vUv, uvBounds);
+    }
+#endif
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_blur.glsl b/gfx/wr/webrender/res/cs_blur.glsl
new file mode 100644
index 0000000000..f29e3c4bf4
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_blur.glsl
@@ -0,0 +1,196 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,prim_shared
+
+varying vec2 vUv;
+flat varying vec4 vUvRect;
+flat varying vec2 vOffsetScale;
+// The number of pixels on each end that we apply the blur filter over.
+// Packed in to vector to work around bug 1630356.
+flat varying ivec2 vSupport;
+flat varying vec2 vGaussCoefficients;
+
+#ifdef WR_VERTEX_SHADER
+// Applies a separable gaussian blur in one direction, as specified
+// by the dir field in the blur command.
+
+#define DIR_HORIZONTAL  0
+#define DIR_VERTICAL    1
+
+PER_INSTANCE in int aBlurRenderTaskAddress;
+PER_INSTANCE in int aBlurSourceTaskAddress;
+PER_INSTANCE in int aBlurDirection;
+
+struct BlurTask {
+    RectWithEndpoint task_rect;
+    float blur_radius;
+    vec2 blur_region;
+};
+
+BlurTask fetch_blur_task(int address) {
+    RenderTaskData task_data = fetch_render_task_data(address);
+
+    BlurTask task = BlurTask(
+        task_data.task_rect,
+        task_data.user_data.x,
+        task_data.user_data.yz
+    );
+
+    return task;
+}
+
+void calculate_gauss_coefficients(float sigma) {
+    // Incremental Gaussian Coefficent Calculation (See GPU Gems 3 pp. 877 - 889)
+    vGaussCoefficients = vec2(1.0 / (sqrt(2.0 * 3.14159265) * sigma),
+                              exp(-0.5 / (sigma * sigma)));
+
+    // Pre-calculate the coefficient total in the vertex shader so that
+    // we can avoid having to do it per-fragment and also avoid division
+    // by zero in the degenerate case.
+    vec3 gauss_coefficient = vec3(vGaussCoefficients,
+                                  vGaussCoefficients.y * vGaussCoefficients.y);
+    float gauss_coefficient_total = gauss_coefficient.x;
+    for (int i = 1; i <= vSupport.x; i += 2) {
+        gauss_coefficient.xy *= gauss_coefficient.yz;
+        float gauss_coefficient_subtotal = gauss_coefficient.x;
+        gauss_coefficient.xy *= gauss_coefficient.yz;
+        gauss_coefficient_subtotal += gauss_coefficient.x;
+        gauss_coefficient_total += 2.0 * gauss_coefficient_subtotal;
+    }
+
+    // Scale initial coefficient by total to avoid passing the total separately
+    // to the fragment shader.
+    vGaussCoefficients.x /= gauss_coefficient_total;
+}
+
+void main(void) {
+    BlurTask blur_task = fetch_blur_task(aBlurRenderTaskAddress);
+    RectWithEndpoint src_rect = fetch_render_task_rect(aBlurSourceTaskAddress);
+
+    RectWithEndpoint target_rect = blur_task.task_rect;
+
+    vec2 texture_size = vec2(TEX_SIZE(sColor0).xy);
+
+    // Ensure that the support is an even number of pixels to simplify the
+    // fragment shader logic.
+    //
+    // TODO(pcwalton): Actually make use of this fact and use the texture
+    // hardware for linear filtering.
+    vSupport.x = int(ceil(1.5 * blur_task.blur_radius)) * 2;
+
+    if (vSupport.x > 0) {
+        calculate_gauss_coefficients(blur_task.blur_radius);
+    } else {
+        // The gauss function gets NaNs when blur radius is zero.
+        vGaussCoefficients = vec2(1.0, 1.0);
+    }
+
+    switch (aBlurDirection) {
+        case DIR_HORIZONTAL:
+            vOffsetScale = vec2(1.0 / texture_size.x, 0.0);
+            break;
+        case DIR_VERTICAL:
+            vOffsetScale = vec2(0.0, 1.0 / texture_size.y);
+            break;
+        default:
+            vOffsetScale = vec2(0.0);
+    }
+
+    vUvRect = vec4(src_rect.p0 + vec2(0.5),
+                   src_rect.p0 + blur_task.blur_region - vec2(0.5));
+    vUvRect /= texture_size.xyxy;
+
+    vec2 pos = mix(target_rect.p0, target_rect.p1, aPosition.xy);
+
+    vec2 uv0 = src_rect.p0 / texture_size;
+    vec2 uv1 = src_rect.p1 / texture_size;
+    vUv = mix(uv0, uv1, aPosition.xy);
+
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+#if defined WR_FEATURE_COLOR_TARGET
+#define SAMPLE_TYPE vec4
+#define SAMPLE_TEXTURE(uv)  texture(sColor0, uv)
+#else
+#define SAMPLE_TYPE float
+#define SAMPLE_TEXTURE(uv)  texture(sColor0, uv).r
+#endif
+
+// TODO(gw): Write a fast path blur that handles smaller blur radii
+//           with a offset / weight uniform table and a constant
+//           loop iteration count!
+
+void main(void) {
+    SAMPLE_TYPE original_color = SAMPLE_TEXTURE(vUv);
+
+    // Incremental Gaussian Coefficent Calculation (See GPU Gems 3 pp. 877 - 889)
+    vec3 gauss_coefficient = vec3(vGaussCoefficients,
+                                  vGaussCoefficients.y * vGaussCoefficients.y);
+
+    SAMPLE_TYPE avg_color = original_color * gauss_coefficient.x;
+
+    // Evaluate two adjacent texels at a time. We can do this because, if c0
+    // and c1 are colors of adjacent texels and k0 and k1 are arbitrary
+    // factors, this formula:
+    //
+    //     k0 * c0 + k1 * c1          (Equation 1)
+    //
+    // is equivalent to:
+    //
+    //                                 k1
+    //     (k0 + k1) * lerp(c0, c1, -------)
+    //                              k0 + k1
+    //
+    // A texture lookup of adjacent texels evaluates this formula:
+    //
+    //     lerp(c0, c1, t)
+    //
+    // for some t. So we can let `t = k1/(k0 + k1)` and effectively evaluate
+    // Equation 1 with a single texture lookup.
+    //
+    // Clamp loop condition variable to a statically known value to workaround
+    // driver bug on Adreno 3xx. vSupport should not exceed 300 anyway, due to
+    // the max blur radius being 100. See bug 1720841 for details.
+    int support = min(vSupport.x, 300);
+    for (int i = 1; i <= support; i += 2) {
+        gauss_coefficient.xy *= gauss_coefficient.yz;
+
+        float gauss_coefficient_subtotal = gauss_coefficient.x;
+        gauss_coefficient.xy *= gauss_coefficient.yz;
+        gauss_coefficient_subtotal += gauss_coefficient.x;
+        float gauss_ratio = gauss_coefficient.x / gauss_coefficient_subtotal;
+
+        vec2 offset = vOffsetScale * (float(i) + gauss_ratio);
+
+        vec2 st0 = max(vUv - offset, vUvRect.xy);
+        vec2 st1 = min(vUv + offset, vUvRect.zw);
+        avg_color += (SAMPLE_TEXTURE(st0) + SAMPLE_TEXTURE(st1)) *
+                     gauss_coefficient_subtotal;
+    }
+
+    oFragColor = vec4(avg_color);
+}
+
+#ifdef SWGL_DRAW_SPAN
+    #ifdef WR_FEATURE_COLOR_TARGET
+void swgl_drawSpanRGBA8() {
+    swgl_commitGaussianBlurRGBA8(sColor0, vUv, vUvRect, vOffsetScale.x != 0.0,
+                                 vSupport.x, vGaussCoefficients);
+}
+    #else
+void swgl_drawSpanR8() {
+    swgl_commitGaussianBlurR8(sColor0, vUv, vUvRect, vOffsetScale.x != 0.0,
+                              vSupport.x, vGaussCoefficients);
+}
+    #endif
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_border_segment.glsl b/gfx/wr/webrender/res/cs_border_segment.glsl
new file mode 100644
index 0000000000..66c7d2d798
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_border_segment.glsl
@@ -0,0 +1,450 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,rect,ellipse
+
+// For edges, the colors are the same. For corners, these
+// are the colors of each edge making up the corner.
+flat varying vec4 vColor00;
+flat varying vec4 vColor01;
+flat varying vec4 vColor10;
+flat varying vec4 vColor11;
+
+// A point + tangent defining the line where the edge
+// transition occurs. Used for corners only.
+flat varying vec4 vColorLine;
+
+// x: segment, y: clip mode
+// We cast these to/from floats rather than using an ivec due to a driver bug
+// on Adreno 3xx. See bug 1730458.
+flat varying mediump vec2 vSegmentClipMode;
+// x, y: styles, z, w: edge axes
+// We cast these to/from floats rather than using an ivec (and bitshifting)
+// due to a driver bug on Adreno 3xx. See bug 1730458.
+flat varying mediump vec4 vStyleEdgeAxis;
+
+// xy = Local space position of the clip center.
+// zw = Scale the rect origin by this to get the outer
+// corner from the segment rectangle.
+flat varying vec4 vClipCenter_Sign;
+
+// An outer and inner elliptical radii for border
+// corner clipping.
+flat varying vec4 vClipRadii;
+
+// Reference point for determine edge clip lines.
+flat varying vec4 vEdgeReference;
+
+// Stores widths/2 and widths/3 to save doing this in FS.
+flat varying vec4 vPartialWidths;
+
+// Clipping parameters for dot or dash.
+flat varying vec4 vClipParams1;
+flat varying vec4 vClipParams2;
+
+// Local space position
+varying vec2 vPos;
+
+#define SEGMENT_TOP_LEFT        0
+#define SEGMENT_TOP_RIGHT       1
+#define SEGMENT_BOTTOM_RIGHT    2
+#define SEGMENT_BOTTOM_LEFT     3
+#define SEGMENT_LEFT            4
+#define SEGMENT_TOP             5
+#define SEGMENT_RIGHT           6
+#define SEGMENT_BOTTOM          7
+
+// Border styles as defined in webrender_api/types.rs
+#define BORDER_STYLE_NONE         0
+#define BORDER_STYLE_SOLID        1
+#define BORDER_STYLE_DOUBLE       2
+#define BORDER_STYLE_DOTTED       3
+#define BORDER_STYLE_DASHED       4
+#define BORDER_STYLE_HIDDEN       5
+#define BORDER_STYLE_GROOVE       6
+#define BORDER_STYLE_RIDGE        7
+#define BORDER_STYLE_INSET        8
+#define BORDER_STYLE_OUTSET       9
+
+#define CLIP_NONE        0
+#define CLIP_DASH_CORNER 1
+#define CLIP_DASH_EDGE   2
+#define CLIP_DOT         3
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in vec2 aTaskOrigin;
+PER_INSTANCE in vec4 aRect;
+PER_INSTANCE in vec4 aColor0;
+PER_INSTANCE in vec4 aColor1;
+PER_INSTANCE in int aFlags;
+PER_INSTANCE in vec2 aWidths;
+PER_INSTANCE in vec2 aRadii;
+PER_INSTANCE in vec4 aClipParams1;
+PER_INSTANCE in vec4 aClipParams2;
+
+vec2 get_outer_corner_scale(int segment) {
+    vec2 p;
+
+    switch (segment) {
+        case SEGMENT_TOP_LEFT:
+            p = vec2(0.0, 0.0);
+            break;
+        case SEGMENT_TOP_RIGHT:
+            p = vec2(1.0, 0.0);
+            break;
+        case SEGMENT_BOTTOM_RIGHT:
+            p = vec2(1.0, 1.0);
+            break;
+        case SEGMENT_BOTTOM_LEFT:
+            p = vec2(0.0, 1.0);
+            break;
+        default:
+            // The result is only used for non-default segment cases
+            p = vec2(0.0);
+            break;
+    }
+
+    return p;
+}
+
+// NOTE(emilio): If you change this algorithm, do the same change
+// in border.rs
+vec4 mod_color(vec4 color, bool is_black, bool lighter) {
+    const float light_black = 0.7;
+    const float dark_black = 0.3;
+
+    const float dark_scale = 0.66666666;
+    const float light_scale = 1.0;
+
+    if (is_black) {
+        if (lighter) {
+            return vec4(vec3(light_black), color.a);
+        }
+        return vec4(vec3(dark_black), color.a);
+    }
+
+    if (lighter) {
+        return vec4(color.rgb * light_scale, color.a);
+    }
+    return vec4(color.rgb * dark_scale, color.a);
+}
+
+vec4[2] get_colors_for_side(vec4 color, int style) {
+    vec4 result[2];
+
+    bool is_black = color.rgb == vec3(0.0, 0.0, 0.0);
+
+    switch (style) {
+        case BORDER_STYLE_GROOVE:
+            result[0] = mod_color(color, is_black, true);
+            result[1] = mod_color(color, is_black, false);
+            break;
+        case BORDER_STYLE_RIDGE:
+            result[0] = mod_color(color, is_black, false);
+            result[1] = mod_color(color, is_black, true);
+            break;
+        default:
+            result[0] = color;
+            result[1] = color;
+            break;
+    }
+
+    return result;
+}
+
+void main(void) {
+    int segment = aFlags & 0xff;
+    int style0 = (aFlags >> 8) & 0xff;
+    int style1 = (aFlags >> 16) & 0xff;
+    int clip_mode = (aFlags >> 24) & 0x0f;
+
+    vec2 size = aRect.zw - aRect.xy;
+    vec2 outer_scale = get_outer_corner_scale(segment);
+    vec2 outer = outer_scale * size;
+    vec2 clip_sign = 1.0 - 2.0 * outer_scale;
+
+    // Set some flags used by the FS to determine the
+    // orientation of the two edges in this corner.
+    ivec2 edge_axis = ivec2(0, 0);
+    // Derive the positions for the edge clips, which must be handled
+    // differently between corners and edges.
+    vec2 edge_reference = vec2(0.0);
+    switch (segment) {
+        case SEGMENT_TOP_LEFT:
+            edge_axis = ivec2(0, 1);
+            edge_reference = outer;
+            break;
+        case SEGMENT_TOP_RIGHT:
+            edge_axis = ivec2(1, 0);
+            edge_reference = vec2(outer.x - aWidths.x, outer.y);
+            break;
+        case SEGMENT_BOTTOM_RIGHT:
+            edge_axis = ivec2(0, 1);
+            edge_reference = outer - aWidths;
+            break;
+        case SEGMENT_BOTTOM_LEFT:
+            edge_axis = ivec2(1, 0);
+            edge_reference = vec2(outer.x, outer.y - aWidths.y);
+            break;
+        case SEGMENT_TOP:
+        case SEGMENT_BOTTOM:
+            edge_axis = ivec2(1, 1);
+            break;
+        case SEGMENT_LEFT:
+        case SEGMENT_RIGHT:
+        default:
+            break;
+    }
+
+    vSegmentClipMode = vec2(float(segment), float(clip_mode));
+    vStyleEdgeAxis = vec4(float(style0), float(style1), float(edge_axis.x), float(edge_axis.y));
+
+    vPartialWidths = vec4(aWidths / 3.0, aWidths / 2.0);
+    vPos = size * aPosition.xy;
+
+    vec4[2] color0 = get_colors_for_side(aColor0, style0);
+    vColor00 = color0[0];
+    vColor01 = color0[1];
+    vec4[2] color1 = get_colors_for_side(aColor1, style1);
+    vColor10 = color1[0];
+    vColor11 = color1[1];
+    vClipCenter_Sign = vec4(outer + clip_sign * aRadii, clip_sign);
+    vClipRadii = vec4(aRadii, max(aRadii - aWidths, 0.0));
+    vColorLine = vec4(outer, aWidths.y * -clip_sign.y, aWidths.x * clip_sign.x);
+    vEdgeReference = vec4(edge_reference, edge_reference + aWidths);
+    vClipParams1 = aClipParams1;
+    vClipParams2 = aClipParams2;
+
+    // For the case of dot and dash clips, optimize the number of pixels that
+    // are hit to just include the dot itself.
+    if (clip_mode == CLIP_DOT) {
+        float radius = aClipParams1.z;
+
+        // Expand by a small amount to allow room for AA around
+        // the dot if it's big enough.
+        if (radius > 0.5)
+            radius += 2.0;
+
+        vPos = vClipParams1.xy + radius * (2.0 * aPosition.xy - 1.0);
+        vPos = clamp(vPos, vec2(0.0), size);
+    } else if (clip_mode == CLIP_DASH_CORNER) {
+        vec2 center = (aClipParams1.xy + aClipParams2.xy) * 0.5;
+        // This is a gross approximation which works out because dashes don't have
+        // a strong curvature and we will overshoot by inflating the geometry by
+        // this amount on each side (sqrt(2) * length(dash) would be enough and we
+        // compute 2 * approx_length(dash)).
+        float dash_length = length(aClipParams1.xy - aClipParams2.xy);
+        float width = max(aWidths.x, aWidths.y);
+        // expand by a small amout for AA just like we do for dots.
+        vec2 r = vec2(max(dash_length, width)) + 2.0;
+        vPos = clamp(vPos, center - r, center + r);
+    }
+
+    gl_Position = uTransform * vec4(aTaskOrigin + aRect.xy + vPos, 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+vec4 evaluate_color_for_style_in_corner(
+    vec2 clip_relative_pos,
+    int style,
+    vec4 color0,
+    vec4 color1,
+    vec4 clip_radii,
+    float mix_factor,
+    int segment,
+    float aa_range
+) {
+    switch (style) {
+        case BORDER_STYLE_DOUBLE: {
+            // Get the distances from 0.33 of the radii, and
+            // also 0.67 of the radii. Use these to form a
+            // SDF subtraction which will clip out the inside
+            // third of the rounded edge.
+            float d_radii_a = distance_to_ellipse(
+                clip_relative_pos,
+                clip_radii.xy - vPartialWidths.xy
+            );
+            float d_radii_b = distance_to_ellipse(
+                clip_relative_pos,
+                clip_radii.xy - 2.0 * vPartialWidths.xy
+            );
+            float d = min(-d_radii_a, d_radii_b);
+            color0 *= distance_aa(aa_range, d);
+            break;
+        }
+        case BORDER_STYLE_GROOVE:
+        case BORDER_STYLE_RIDGE: {
+            float d = distance_to_ellipse(
+                clip_relative_pos,
+                clip_radii.xy - vPartialWidths.zw
+            );
+            float alpha = distance_aa(aa_range, d);
+            float swizzled_factor;
+            switch (segment) {
+                case SEGMENT_TOP_LEFT: swizzled_factor = 0.0; break;
+                case SEGMENT_TOP_RIGHT: swizzled_factor = mix_factor; break;
+                case SEGMENT_BOTTOM_RIGHT: swizzled_factor = 1.0; break;
+                case SEGMENT_BOTTOM_LEFT: swizzled_factor = 1.0 - mix_factor; break;
+                default: swizzled_factor = 0.0; break;
+            };
+            vec4 c0 = mix(color1, color0, swizzled_factor);
+            vec4 c1 = mix(color0, color1, swizzled_factor);
+            color0 = mix(c0, c1, alpha);
+            break;
+        }
+        default:
+            break;
+    }
+
+    return color0;
+}
+
+vec4 evaluate_color_for_style_in_edge(
+    vec2 pos_vec,
+    int style,
+    vec4 color0,
+    vec4 color1,
+    float aa_range,
+    int edge_axis_id
+) {
+    vec2 edge_axis = edge_axis_id != 0 ? vec2(0.0, 1.0) : vec2(1.0, 0.0);
+    float pos = dot(pos_vec, edge_axis);
+    switch (style) {
+        case BORDER_STYLE_DOUBLE: {
+            float d = -1.0;
+            float partial_width = dot(vPartialWidths.xy, edge_axis);
+            if (partial_width >= 1.0) {
+                vec2 ref = vec2(
+                    dot(vEdgeReference.xy, edge_axis) + partial_width,
+                    dot(vEdgeReference.zw, edge_axis) - partial_width
+                );
+                d = min(pos - ref.x, ref.y - pos);
+            }
+            color0 *= distance_aa(aa_range, d);
+            break;
+        }
+        case BORDER_STYLE_GROOVE:
+        case BORDER_STYLE_RIDGE: {
+            float ref = dot(vEdgeReference.xy + vPartialWidths.zw, edge_axis);
+            float d = pos - ref;
+            float alpha = distance_aa(aa_range, d);
+            color0 = mix(color0, color1, alpha);
+            break;
+        }
+        default:
+            break;
+    }
+
+    return color0;
+}
+
+void main(void) {
+    float aa_range = compute_aa_range(vPos);
+    vec4 color0, color1;
+
+    int segment = int(vSegmentClipMode.x);
+    int clip_mode = int(vSegmentClipMode.y);
+    ivec2 style = ivec2(int(vStyleEdgeAxis.x), int(vStyleEdgeAxis.y));
+    ivec2 edge_axis = ivec2(int(vStyleEdgeAxis.z), int(vStyleEdgeAxis.w));
+
+    float mix_factor = 0.0;
+    if (edge_axis.x != edge_axis.y) {
+        float d_line = distance_to_line(vColorLine.xy, vColorLine.zw, vPos);
+        mix_factor = distance_aa(aa_range, -d_line);
+    }
+
+    // Check if inside corner clip-region
+    vec2 clip_relative_pos = vPos - vClipCenter_Sign.xy;
+    bool in_clip_region = all(lessThan(vClipCenter_Sign.zw * clip_relative_pos, vec2(0.0)));
+    float d = -1.0;
+
+    switch (clip_mode) {
+        case CLIP_DOT: {
+            // Set clip distance based or dot position and radius.
+            d = distance(vClipParams1.xy, vPos) - vClipParams1.z;
+            break;
+        }
+        case CLIP_DASH_EDGE: {
+            bool is_vertical = vClipParams1.x == 0.;
+            float half_dash = is_vertical ? vClipParams1.y : vClipParams1.x;
+            // We want to draw something like:
+            // +---+---+---+---+
+            // |xxx|   |   |xxx|
+            // +---+---+---+---+
+            float pos = is_vertical ? vPos.y : vPos.x;
+            bool in_dash = pos < half_dash || pos > 3.0 * half_dash;
+            if (!in_dash) {
+                d = 1.;
+            }
+            break;
+        }
+        case CLIP_DASH_CORNER: {
+            // Get SDF for the two line/tangent clip lines,
+            // do SDF subtract to get clip distance.
+            float d0 = distance_to_line(vClipParams1.xy,
+                                        vClipParams1.zw,
+                                        vPos);
+            float d1 = distance_to_line(vClipParams2.xy,
+                                        vClipParams2.zw,
+                                        vPos);
+            d = max(d0, -d1);
+            break;
+        }
+        case CLIP_NONE:
+        default:
+            break;
+    }
+
+    if (in_clip_region) {
+        float d_radii_a = distance_to_ellipse(clip_relative_pos, vClipRadii.xy);
+        float d_radii_b = distance_to_ellipse(clip_relative_pos, vClipRadii.zw);
+        float d_radii = max(d_radii_a, -d_radii_b);
+        d = max(d, d_radii);
+
+        color0 = evaluate_color_for_style_in_corner(
+            clip_relative_pos,
+            style.x,
+            vColor00,
+            vColor01,
+            vClipRadii,
+            mix_factor,
+            segment,
+            aa_range
+        );
+        color1 = evaluate_color_for_style_in_corner(
+            clip_relative_pos,
+            style.y,
+            vColor10,
+            vColor11,
+            vClipRadii,
+            mix_factor,
+            segment,
+            aa_range
+        );
+    } else {
+        color0 = evaluate_color_for_style_in_edge(
+            vPos,
+            style.x,
+            vColor00,
+            vColor01,
+            aa_range,
+            edge_axis.x
+        );
+        color1 = evaluate_color_for_style_in_edge(
+            vPos,
+            style.y,
+            vColor10,
+            vColor11,
+            aa_range,
+            edge_axis.y
+        );
+    }
+
+    float alpha = distance_aa(aa_range, d);
+    vec4 color = mix(color0, color1, mix_factor);
+    oFragColor = color * alpha;
+}
+#endif
diff --git a/gfx/wr/webrender/res/cs_border_solid.glsl b/gfx/wr/webrender/res/cs_border_solid.glsl
new file mode 100644
index 0000000000..ff7ac2f780
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_border_solid.glsl
@@ -0,0 +1,178 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,rect,ellipse
+
+#define DONT_MIX 0
+#define MIX_AA 1
+#define MIX_NO_AA 2
+
+// For edges, the colors are the same. For corners, these
+// are the colors of each edge making up the corner.
+flat varying vec4 vColor0;
+flat varying vec4 vColor1;
+
+// A point + tangent defining the line where the edge
+// transition occurs. Used for corners only.
+flat varying vec4 vColorLine;
+
+// A boolean indicating that we should be mixing between edge colors.
+// Packed in to a vector to work around bug 1630356.
+flat varying ivec2 vMixColors;
+
+// xy = Local space position of the clip center.
+// zw = Scale the rect origin by this to get the outer
+// corner from the segment rectangle.
+flat varying vec4 vClipCenter_Sign;
+
+// An outer and inner elliptical radii for border
+// corner clipping.
+flat varying vec4 vClipRadii;
+
+// Position, scale, and radii of horizontally and vertically adjacent corner clips.
+flat varying vec4 vHorizontalClipCenter_Sign;
+flat varying vec2 vHorizontalClipRadii;
+flat varying vec4 vVerticalClipCenter_Sign;
+flat varying vec2 vVerticalClipRadii;
+
+// Local space position
+varying vec2 vPos;
+
+#define SEGMENT_TOP_LEFT        0
+#define SEGMENT_TOP_RIGHT       1
+#define SEGMENT_BOTTOM_RIGHT    2
+#define SEGMENT_BOTTOM_LEFT     3
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in vec2 aTaskOrigin;
+PER_INSTANCE in vec4 aRect;
+PER_INSTANCE in vec4 aColor0;
+PER_INSTANCE in vec4 aColor1;
+PER_INSTANCE in int aFlags;
+PER_INSTANCE in vec2 aWidths;
+PER_INSTANCE in vec2 aRadii;
+PER_INSTANCE in vec4 aClipParams1;
+PER_INSTANCE in vec4 aClipParams2;
+
+vec2 get_outer_corner_scale(int segment) {
+    vec2 p;
+
+    switch (segment) {
+        case SEGMENT_TOP_LEFT:
+            p = vec2(0.0, 0.0);
+            break;
+        case SEGMENT_TOP_RIGHT:
+            p = vec2(1.0, 0.0);
+            break;
+        case SEGMENT_BOTTOM_RIGHT:
+            p = vec2(1.0, 1.0);
+            break;
+        case SEGMENT_BOTTOM_LEFT:
+            p = vec2(0.0, 1.0);
+            break;
+        default:
+            // The result is only used for non-default segment cases
+            p = vec2(0.0);
+            break;
+    }
+
+    return p;
+}
+
+void main(void) {
+    int segment = aFlags & 0xff;
+    bool do_aa = ((aFlags >> 24) & 0xf0) != 0;
+
+    vec2 outer_scale = get_outer_corner_scale(segment);
+    vec2 size = aRect.zw - aRect.xy;
+    vec2 outer = outer_scale * size;
+    vec2 clip_sign = 1.0 - 2.0 * outer_scale;
+
+    int mix_colors;
+    switch (segment) {
+        case SEGMENT_TOP_LEFT:
+        case SEGMENT_TOP_RIGHT:
+        case SEGMENT_BOTTOM_RIGHT:
+        case SEGMENT_BOTTOM_LEFT: {
+            mix_colors = do_aa ? MIX_AA : MIX_NO_AA;
+            break;
+        }
+        default:
+            mix_colors = DONT_MIX;
+            break;
+    }
+
+    vMixColors.x = mix_colors;
+    vPos = size * aPosition.xy;
+
+    vColor0 = aColor0;
+    vColor1 = aColor1;
+    vClipCenter_Sign = vec4(outer + clip_sign * aRadii, clip_sign);
+    vClipRadii = vec4(aRadii, max(aRadii - aWidths, 0.0));
+    vColorLine = vec4(outer, aWidths.y * -clip_sign.y, aWidths.x * clip_sign.x);
+
+    vec2 horizontal_clip_sign = vec2(-clip_sign.x, clip_sign.y);
+    vHorizontalClipCenter_Sign = vec4(aClipParams1.xy +
+                                      horizontal_clip_sign * aClipParams1.zw,
+                                      horizontal_clip_sign);
+    vHorizontalClipRadii = aClipParams1.zw;
+
+    vec2 vertical_clip_sign = vec2(clip_sign.x, -clip_sign.y);
+    vVerticalClipCenter_Sign = vec4(aClipParams2.xy +
+                                    vertical_clip_sign * aClipParams2.zw,
+                                    vertical_clip_sign);
+    vVerticalClipRadii = aClipParams2.zw;
+
+    gl_Position = uTransform * vec4(aTaskOrigin + aRect.xy + vPos, 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    float aa_range = compute_aa_range(vPos);
+    bool do_aa = vMixColors.x != MIX_NO_AA;
+
+    float mix_factor = 0.0;
+    if (vMixColors.x != DONT_MIX) {
+        float d_line = distance_to_line(vColorLine.xy, vColorLine.zw, vPos);
+        if (do_aa) {
+            mix_factor = distance_aa(aa_range, -d_line);
+        } else {
+            mix_factor = d_line + EPSILON >= 0. ? 1.0 : 0.0;
+        }
+    }
+
+    // Check if inside main corner clip-region
+    vec2 clip_relative_pos = vPos - vClipCenter_Sign.xy;
+    bool in_clip_region = all(lessThan(vClipCenter_Sign.zw * clip_relative_pos, vec2(0.0)));
+
+    float d = -1.0;
+    if (in_clip_region) {
+        float d_radii_a = distance_to_ellipse(clip_relative_pos, vClipRadii.xy);
+        float d_radii_b = distance_to_ellipse(clip_relative_pos, vClipRadii.zw);
+        d = max(d_radii_a, -d_radii_b);
+    }
+
+    // And again for horizontally-adjacent corner
+    clip_relative_pos = vPos - vHorizontalClipCenter_Sign.xy;
+    in_clip_region = all(lessThan(vHorizontalClipCenter_Sign.zw * clip_relative_pos, vec2(0.0)));
+    if (in_clip_region) {
+        float d_radii = distance_to_ellipse(clip_relative_pos, vHorizontalClipRadii.xy);
+        d = max(d_radii, d);
+    }
+
+    // And finally for vertically-adjacent corner
+    clip_relative_pos = vPos - vVerticalClipCenter_Sign.xy;
+    in_clip_region = all(lessThan(vVerticalClipCenter_Sign.zw * clip_relative_pos, vec2(0.0)));
+    if (in_clip_region) {
+        float d_radii = distance_to_ellipse(clip_relative_pos, vVerticalClipRadii.xy);
+        d = max(d_radii, d);
+    }
+
+    float alpha = do_aa ? distance_aa(aa_range, d) : 1.0;
+    vec4 color = mix(vColor0, vColor1, mix_factor);
+    oFragColor = color * alpha;
+}
+#endif
diff --git a/gfx/wr/webrender/res/cs_clip_box_shadow.glsl b/gfx/wr/webrender/res/cs_clip_box_shadow.glsl
new file mode 100644
index 0000000000..8087098f4c
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_clip_box_shadow.glsl
@@ -0,0 +1,327 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,clip_shared
+
+varying vec4 vLocalPos;
+varying vec2 vUv;
+flat varying vec4 vUvBounds;
+flat varying vec4 vEdge;
+flat varying vec4 vUvBounds_NoClamp;
+// Clip mode. Packed in to a vector to avoid bug 1630356.
+flat varying vec2 vClipMode;
+
+#define MODE_STRETCH        0
+#define MODE_SIMPLE         1
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in ivec2 aClipDataResourceAddress;
+PER_INSTANCE in vec2 aClipSrcRectSize;
+PER_INSTANCE in int aClipMode;
+PER_INSTANCE in ivec2 aStretchMode;
+PER_INSTANCE in vec4 aClipDestRect;
+
+struct ClipMaskInstanceBoxShadow {
+    ClipMaskInstanceCommon base;
+    ivec2 resource_address;
+};
+
+ClipMaskInstanceBoxShadow fetch_clip_item() {
+    ClipMaskInstanceBoxShadow cmi;
+
+    cmi.base = fetch_clip_item_common();
+    cmi.resource_address = aClipDataResourceAddress;
+
+    return cmi;
+}
+
+struct BoxShadowData {
+    vec2 src_rect_size;
+    int clip_mode;
+    int stretch_mode_x;
+    int stretch_mode_y;
+    RectWithEndpoint dest_rect;
+};
+
+BoxShadowData fetch_data() {
+    BoxShadowData bs_data = BoxShadowData(
+        aClipSrcRectSize,
+        aClipMode,
+        aStretchMode.x,
+        aStretchMode.y,
+        RectWithEndpoint(aClipDestRect.xy, aClipDestRect.zw)
+    );
+    return bs_data;
+}
+
+void main(void) {
+    ClipMaskInstanceBoxShadow cmi = fetch_clip_item();
+    Transform clip_transform = fetch_transform(cmi.base.clip_transform_id);
+    Transform prim_transform = fetch_transform(cmi.base.prim_transform_id);
+    BoxShadowData bs_data = fetch_data();
+    ImageSource res = fetch_image_source_direct(cmi.resource_address);
+
+    RectWithEndpoint dest_rect = bs_data.dest_rect;
+
+    ClipVertexInfo vi = write_clip_tile_vertex(
+        dest_rect,
+        prim_transform,
+        clip_transform,
+        cmi.base.sub_rect,
+        cmi.base.task_origin,
+        cmi.base.screen_origin,
+        cmi.base.device_pixel_scale
+    );
+    vClipMode.x = float(bs_data.clip_mode);
+
+    vec2 texture_size = vec2(TEX_SIZE(sColor0));
+    vec2 local_pos = vi.local_pos.xy / vi.local_pos.w;
+    vLocalPos = vi.local_pos;
+    vec2 dest_rect_size = rect_size(dest_rect);
+
+    switch (bs_data.stretch_mode_x) {
+        case MODE_STRETCH: {
+            vEdge.x = 0.5;
+            vEdge.z = (dest_rect_size.x / bs_data.src_rect_size.x) - 0.5;
+            vUv.x = (local_pos.x - dest_rect.p0.x) / bs_data.src_rect_size.x;
+            break;
+        }
+        case MODE_SIMPLE:
+        default: {
+            vEdge.xz = vec2(1.0);
+            vUv.x = (local_pos.x - dest_rect.p0.x) / dest_rect_size.x;
+            break;
+        }
+    }
+
+    switch (bs_data.stretch_mode_y) {
+        case MODE_STRETCH: {
+            vEdge.y = 0.5;
+            vEdge.w = (dest_rect_size.y / bs_data.src_rect_size.y) - 0.5;
+            vUv.y = (local_pos.y - dest_rect.p0.y) / bs_data.src_rect_size.y;
+            break;
+        }
+        case MODE_SIMPLE:
+        default: {
+            vEdge.yw = vec2(1.0);
+            vUv.y = (local_pos.y - dest_rect.p0.y) / dest_rect_size.y;
+            break;
+        }
+    }
+
+    vUv *= vi.local_pos.w;
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+    vUvBounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) / texture_size.xyxy;
+    vUvBounds_NoClamp = vec4(uv0, uv1) / texture_size.xyxy;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    vec2 uv_linear = vUv / vLocalPos.w;
+    vec2 uv = clamp(uv_linear, vec2(0.0), vEdge.xy);
+    uv += max(vec2(0.0), uv_linear - vEdge.zw);
+    uv = mix(vUvBounds_NoClamp.xy, vUvBounds_NoClamp.zw, uv);
+    uv = clamp(uv, vUvBounds.xy, vUvBounds.zw);
+
+    float in_shadow_rect = init_transform_rough_fs(vLocalPos.xy / vLocalPos.w);
+
+    float texel = TEX_SAMPLE(sColor0, uv).r;
+
+    float alpha = mix(texel, 1.0 - texel, vClipMode.x);
+    float result = vLocalPos.w > 0.0 ? mix(vClipMode.x, alpha, in_shadow_rect) : 0.0;
+
+    oFragColor = vec4(result);
+}
+
+#ifdef SWGL_DRAW_SPAN
+// As with cs_clip_rectangle, this shader spends a lot of time doing clipping and
+// combining for every fragment, even if outside of the primitive to initialize
+// the clip tile, or inside the inner bounds of the primitive, where the shadow
+// is unnecessary. To alleviate this, the span shader attempts to first intersect
+// the the local clip bounds, outside of which we can just use a solid fill
+// to initialize those clip tile fragments. Once inside the primitive bounds,
+// we further intersect with the inner region where no shadow is necessary either
+// so that we can commit entire spans of texture within this nine-patch region
+// instead of having to do the work of mapping per fragment.
+void swgl_drawSpanR8() {
+    // Perspective is not supported.
+    if (swgl_interpStep(vLocalPos).w != 0.0) {
+        return;
+    }
+
+    // If the span is completely outside the Z-range and clipped out, just
+    // output clear so we don't need to consider invalid W in the rest of the
+    // shader.
+    float w = swgl_forceScalar(vLocalPos.w);
+    if (w <= 0.0) {
+        swgl_commitSolidR8(0.0);
+        return;
+    }
+
+    // To start, we evaluate the box shadow in both UV and local space relative
+    // to the local-space position. This will be interpolated across the span to
+    // track whether we intersect the nine-patch.
+    w = 1.0 / w;
+    vec2 uv_linear = vUv * w;
+    vec2 uv_linear0 = swgl_forceScalar(uv_linear);
+    vec2 uv_linear_step = swgl_interpStep(vUv).xy * w;
+    vec2 local_pos = vLocalPos.xy * w;
+    vec2 local_pos0 = swgl_forceScalar(local_pos);
+    vec2 local_step = swgl_interpStep(vLocalPos).xy * w;
+
+    // We need to compute the local-space distance to the bounding box and then
+    // figure out how many processing steps that maps to. If we are stepping in
+    // a negative direction on an axis, we need to swap the sides of the box
+    // which we consider as the start or end. If there is no local-space step
+    // on an axis (i.e. constant Y), we need to take care to force the steps to
+    // either the start or end of the span depending on if we are inside or
+    // outside of the bounding box.
+    vec4 clip_dist =
+        mix(vTransformBounds, vTransformBounds.zwxy, lessThan(local_step, vec2(0.0)).xyxy)
+            - local_pos0.xyxy;
+    clip_dist =
+        mix(1.0e6 * step(0.0, clip_dist),
+            clip_dist * recip(local_step).xyxy,
+            notEqual(local_step, vec2(0.0)).xyxy);
+
+    // Find the start and end of the shadowed region on this span.
+    float shadow_start = max(clip_dist.x, clip_dist.y);
+    float shadow_end = min(clip_dist.z, clip_dist.w);
+
+    // Flip the offsets from the start of the span so we can compare against the
+    // remaining span length which automatically deducts as we commit fragments.
+    ivec2 shadow_steps = ivec2(clamp(
+        swgl_SpanLength - swgl_StepSize * vec2(floor(shadow_start), ceil(shadow_end)),
+        0.0, swgl_SpanLength));
+    int shadow_start_len = shadow_steps.x;
+    int shadow_end_len = shadow_steps.y;
+
+    // Likewise, once inside the primitive bounds, we also need to track which
+    // sector of the nine-patch we are in which requires intersecting against
+    // the inner box instead of the outer box.
+    vec4 opaque_dist =
+        mix(vEdge, vEdge.zwxy, lessThan(uv_linear_step, vec2(0.0)).xyxy)
+            - uv_linear0.xyxy;
+    opaque_dist =
+        mix(1.0e6 * step(0.0, opaque_dist),
+            opaque_dist * recip(uv_linear_step).xyxy,
+            notEqual(uv_linear_step, vec2(0.0)).xyxy);
+
+    // Unlike for the shadow clipping bounds, here we need to rather find the floor of all
+    // the offsets so that we don't accidentally process any chunks in the transitional areas
+    // between sectors of the nine-patch.
+    ivec4 opaque_steps = ivec4(clamp(
+        swgl_SpanLength -
+            swgl_StepSize *
+                vec4(floor(opaque_dist.x), floor(opaque_dist.y), floor(opaque_dist.z), floor(opaque_dist.w)),
+        shadow_end_len, swgl_SpanLength));
+
+    // Fill any initial sections of the span that are clipped out based on clip mode.
+    if (swgl_SpanLength > shadow_start_len) {
+        int num_before = swgl_SpanLength - shadow_start_len;
+        swgl_commitPartialSolidR8(num_before, vClipMode.x);
+        float steps_before = float(num_before / swgl_StepSize);
+        uv_linear += steps_before * uv_linear_step;
+        local_pos += steps_before * local_step;
+    }
+
+    // This loop tries to repeatedly process entire spans of the nine-patch that map
+    // to a contiguous spans of texture in the source box shadow. First, we process
+    // a chunk with per-fragment clipping and mapping in case we're starting on a
+    // transitional region between sectors of the nine-patch which may need to map
+    // to different spans of texture per-fragment. After, we find the largest span
+    // within the current sector before we hit the next transitional region, and
+    // attempt to commit an entire span of texture therein.
+    while (swgl_SpanLength > 0) {
+        // Here we might be in a transitional chunk, so do everything per-fragment.
+        {
+            vec2 uv = clamp(uv_linear, vec2(0.0), vEdge.xy);
+            uv += max(vec2(0.0), uv_linear - vEdge.zw);
+            uv = mix(vUvBounds_NoClamp.xy, vUvBounds_NoClamp.zw, uv);
+            uv = clamp(uv, vUvBounds.xy, vUvBounds.zw);
+
+            float in_shadow_rect = init_transform_rough_fs(local_pos);
+
+            float texel = TEX_SAMPLE(sColor0, uv).r;
+
+            float alpha = mix(texel, 1.0 - texel, vClipMode.x);
+            float result = mix(vClipMode.x, alpha, in_shadow_rect);
+            swgl_commitColorR8(result);
+
+            uv_linear += uv_linear_step;
+            local_pos += local_step;
+        }
+        // If we now hit the end of the clip bounds, just bail out since there is
+        // no more shadow to map.
+        if (swgl_SpanLength <= shadow_end_len) {
+            break;
+        }
+        // By here we've determined to be still inside the nine-patch. We need to
+        // compare against the inner rectangle thresholds to see which sector of
+        // the nine-patch to use and thus how to map the box shadow texture. Stop
+        // at least one step before the end of the shadow region to properly clip
+        // on the boundary.
+        int num_inside = swgl_SpanLength - swgl_StepSize - shadow_end_len;
+        vec4 uv_bounds = vUvBounds;
+        if (swgl_SpanLength >= opaque_steps.y) {
+            // We're in the top Y band of the nine-patch.
+            num_inside = min(num_inside, swgl_SpanLength - opaque_steps.y);
+        } else if (swgl_SpanLength >= opaque_steps.w) {
+            // We're in the middle Y band of the nine-patch. Set the UV clamp bounds
+            // to the vertical center texel of the box shadow.
+            num_inside = min(num_inside, swgl_SpanLength - opaque_steps.w);
+            uv_bounds.yw = vec2(clamp(mix(vUvBounds_NoClamp.y, vUvBounds_NoClamp.w, vEdge.y),
+                                      vUvBounds.y, vUvBounds.w));
+        }
+        if (swgl_SpanLength >= opaque_steps.x) {
+            // We're in the left X column of the nine-patch.
+            num_inside = min(num_inside, swgl_SpanLength - opaque_steps.x);
+        } else if (swgl_SpanLength >= opaque_steps.z) {
+            // We're in the middle X band of the nine-patch. Set the UV clamp bounds
+            // to the horizontal center texel of the box shadow.
+            num_inside = min(num_inside, swgl_SpanLength - opaque_steps.z);
+            uv_bounds.xz = vec2(clamp(mix(vUvBounds_NoClamp.x, vUvBounds_NoClamp.z, vEdge.x),
+                                      vUvBounds.x, vUvBounds.z));
+        }
+        if (num_inside > 0) {
+            // We have a non-zero span of fragments within the sector. Map to the UV
+            // start offset of the sector and the UV offset within the sector.
+            vec2 uv = clamp(uv_linear, vec2(0.0), vEdge.xy);
+            uv += max(vec2(0.0), uv_linear - vEdge.zw);
+            uv = mix(vUvBounds_NoClamp.xy, vUvBounds_NoClamp.zw, uv);
+            // If we're in the center sector of the nine-patch, then we only need to
+            // sample from a single texel of the box shadow. Just sample that single
+            // texel once and output it for the entire span. Otherwise, we just need
+            // to commit an actual span of texture from the box shadow. Depending on
+            // if we are in clip-out mode, we may need to invert the source texture.
+            if (uv_bounds.xy == uv_bounds.zw) {
+                uv = clamp(uv, uv_bounds.xy, uv_bounds.zw);
+                float texel = TEX_SAMPLE(sColor0, uv).r;
+                float alpha = mix(texel, 1.0 - texel, vClipMode.x);
+                swgl_commitPartialSolidR8(num_inside, alpha);
+            } else if (vClipMode.x != 0.0) {
+                swgl_commitPartialTextureLinearInvertR8(num_inside, sColor0, uv, uv_bounds);
+            } else {
+                swgl_commitPartialTextureLinearR8(num_inside, sColor0, uv, uv_bounds);
+            }
+            float steps_inside = float(num_inside / swgl_StepSize);
+            uv_linear += steps_inside * uv_linear_step;
+            local_pos += steps_inside * local_step;
+        }
+        // By here we're probably in a transitional chunk of the nine-patch that
+        // requires per-fragment processing, so loop around again to the handler
+        // for that case.
+    }
+
+    // Fill any remaining sections of the span that are clipped out.
+    if (swgl_SpanLength > 0) {
+        swgl_commitPartialSolidR8(swgl_SpanLength, vClipMode.x);
+    }
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_clip_image.glsl b/gfx/wr/webrender/res/cs_clip_image.glsl
new file mode 100644
index 0000000000..94d5d87cbe
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_clip_image.glsl
@@ -0,0 +1,117 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,clip_shared
+
+varying vec2 vLocalPos;
+varying vec2 vClipMaskImageUv;
+
+flat varying vec4 vClipMaskUvInnerRect;
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in vec4 aClipTileRect;
+PER_INSTANCE in ivec2 aClipDataResourceAddress;
+PER_INSTANCE in vec4 aClipLocalRect;
+
+struct ClipMaskInstanceImage {
+    ClipMaskInstanceCommon base;
+    RectWithEndpoint tile_rect;
+    ivec2 resource_address;
+    RectWithEndpoint local_rect;
+};
+
+ClipMaskInstanceImage fetch_clip_item() {
+    ClipMaskInstanceImage cmi;
+
+    cmi.base = fetch_clip_item_common();
+
+    cmi.tile_rect = RectWithEndpoint(aClipTileRect.xy, aClipTileRect.zw);
+    cmi.resource_address = aClipDataResourceAddress;
+    cmi.local_rect = RectWithEndpoint(aClipLocalRect.xy, aClipLocalRect.zw);
+
+    return cmi;
+}
+
+struct ClipImageVertexInfo {
+    vec2 local_pos;
+    vec4 world_pos;
+};
+
+// This differs from write_clip_tile_vertex in that we forward transform the
+// primitive's local-space tile rect into the target space. We use scissoring
+// to ensure that the primitive does not draw outside the target bounds.
+ClipImageVertexInfo write_clip_image_vertex(RectWithEndpoint tile_rect,
+                                            RectWithEndpoint local_clip_rect,
+                                            Transform prim_transform,
+                                            Transform clip_transform,
+                                            RectWithEndpoint sub_rect,
+                                            vec2 task_origin,
+                                            vec2 screen_origin,
+                                            float device_pixel_scale) {
+    vec2 local_pos = rect_clamp(local_clip_rect, mix(tile_rect.p0, tile_rect.p1, aPosition.xy));
+    vec4 world_pos = prim_transform.m * vec4(local_pos, 0.0, 1.0);
+    vec4 final_pos = vec4(
+        world_pos.xy * device_pixel_scale + (task_origin - screen_origin) * world_pos.w,
+        0.0,
+        world_pos.w
+    );
+    gl_Position = uTransform * final_pos;
+
+    init_transform_vs(
+        clip_transform.is_axis_aligned
+            ? vec4(vec2(-1.0e16), vec2(1.0e16))
+            : vec4(local_clip_rect.p0, local_clip_rect.p1));
+
+    ClipImageVertexInfo vi = ClipImageVertexInfo(local_pos, world_pos);
+    return vi;
+}
+
+void main(void) {
+    ClipMaskInstanceImage cmi = fetch_clip_item();
+    Transform clip_transform = fetch_transform(cmi.base.clip_transform_id);
+    Transform prim_transform = fetch_transform(cmi.base.prim_transform_id);
+    ImageSource res = fetch_image_source_direct(cmi.resource_address);
+
+    ClipImageVertexInfo vi = write_clip_image_vertex(
+        cmi.tile_rect,
+        cmi.local_rect,
+        prim_transform,
+        clip_transform,
+        cmi.base.sub_rect,
+        cmi.base.task_origin,
+        cmi.base.screen_origin,
+        cmi.base.device_pixel_scale
+    );
+    vLocalPos = vi.local_pos;
+    vec2 uv = (vi.local_pos - cmi.tile_rect.p0) / rect_size(cmi.tile_rect);
+
+    vec2 texture_size = vec2(TEX_SIZE(sColor0));
+    vec4 uv_rect = vec4(res.uv_rect.p0, res.uv_rect.p1);
+    vClipMaskImageUv = mix(uv_rect.xy, uv_rect.zw, uv) / texture_size;
+
+    // applying a half-texel offset to the UV boundaries to prevent linear samples from the outside
+    vClipMaskUvInnerRect = (uv_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    float alpha = init_transform_rough_fs(vLocalPos);
+    vec2 source_uv = clamp(vClipMaskImageUv, vClipMaskUvInnerRect.xy, vClipMaskUvInnerRect.zw);
+    float clip_alpha = texture(sColor0, source_uv).r; //careful: texture has type A8
+    oFragColor = vec4(mix(1.0, clip_alpha, alpha), 0.0, 0.0, 1.0);
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanR8() {
+    if (has_valid_transform_bounds()) {
+        return;
+    }
+
+    swgl_commitTextureLinearR8(sColor0, vClipMaskImageUv, vClipMaskUvInnerRect);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_clip_rectangle.glsl b/gfx/wr/webrender/res/cs_clip_rectangle.glsl
new file mode 100644
index 0000000000..1835770bf6
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_clip_rectangle.glsl
@@ -0,0 +1,495 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,clip_shared,ellipse
+
+varying vec4 vLocalPos;
+#ifdef WR_FEATURE_FAST_PATH
+flat varying vec3 vClipParams;      // xy = box size, z = radius
+#else
+flat varying vec4 vClipCenter_Radius_TL;
+flat varying vec4 vClipCenter_Radius_TR;
+flat varying vec4 vClipCenter_Radius_BL;
+flat varying vec4 vClipCenter_Radius_BR;
+    #ifdef SWGL_DRAW_SPAN
+        flat varying vec4 vClipCorner_TL;
+        flat varying vec4 vClipCorner_TR;
+        flat varying vec4 vClipCorner_BL;
+        flat varying vec4 vClipCorner_BR;
+    #endif
+#endif
+// Clip mode. Packed in to a vector to work around bug 1630356.
+flat varying vec2 vClipMode;
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in vec2 aClipLocalPos;
+PER_INSTANCE in vec4 aClipLocalRect;
+PER_INSTANCE in float aClipMode;
+PER_INSTANCE in vec4 aClipRect_TL;
+PER_INSTANCE in vec4 aClipRadii_TL;
+PER_INSTANCE in vec4 aClipRect_TR;
+PER_INSTANCE in vec4 aClipRadii_TR;
+PER_INSTANCE in vec4 aClipRect_BL;
+PER_INSTANCE in vec4 aClipRadii_BL;
+PER_INSTANCE in vec4 aClipRect_BR;
+PER_INSTANCE in vec4 aClipRadii_BR;
+
+struct ClipMaskInstanceRect {
+    ClipMaskInstanceCommon base;
+    vec2 local_pos;
+};
+
+ClipMaskInstanceRect fetch_clip_item() {
+    ClipMaskInstanceRect cmi;
+
+    cmi.base = fetch_clip_item_common();
+    cmi.local_pos = aClipLocalPos;
+
+    return cmi;
+}
+
+struct ClipRect {
+    RectWithEndpoint rect;
+    float mode;
+};
+
+struct ClipCorner {
+    RectWithEndpoint rect;
+    vec4 outer_inner_radius;
+};
+
+struct ClipData {
+    ClipRect rect;
+    ClipCorner top_left;
+    ClipCorner top_right;
+    ClipCorner bottom_left;
+    ClipCorner bottom_right;
+};
+
+ClipData fetch_clip() {
+    ClipData clip;
+
+    clip.rect = ClipRect(RectWithEndpoint(aClipLocalRect.xy, aClipLocalRect.zw), aClipMode);
+    clip.top_left = ClipCorner(RectWithEndpoint(aClipRect_TL.xy, aClipRect_TL.zw), aClipRadii_TL);
+    clip.top_right = ClipCorner(RectWithEndpoint(aClipRect_TR.xy, aClipRect_TR.zw), aClipRadii_TR);
+    clip.bottom_left = ClipCorner(RectWithEndpoint(aClipRect_BL.xy, aClipRect_BL.zw), aClipRadii_BL);
+    clip.bottom_right = ClipCorner(RectWithEndpoint(aClipRect_BR.xy, aClipRect_BR.zw), aClipRadii_BR);
+
+    return clip;
+}
+
+void main(void) {
+    ClipMaskInstanceRect cmi = fetch_clip_item();
+    Transform clip_transform = fetch_transform(cmi.base.clip_transform_id);
+    Transform prim_transform = fetch_transform(cmi.base.prim_transform_id);
+    ClipData clip = fetch_clip();
+
+    RectWithEndpoint local_rect = clip.rect.rect;
+    vec2 diff = cmi.local_pos - local_rect.p0;
+    local_rect.p0 = cmi.local_pos;
+    local_rect.p1 += diff;
+
+    ClipVertexInfo vi = write_clip_tile_vertex(
+        local_rect,
+        prim_transform,
+        clip_transform,
+        cmi.base.sub_rect,
+        cmi.base.task_origin,
+        cmi.base.screen_origin,
+        cmi.base.device_pixel_scale
+    );
+
+    vClipMode.x = clip.rect.mode;
+    vLocalPos = vi.local_pos;
+
+#ifdef WR_FEATURE_FAST_PATH
+    // If the radii are all uniform, we can use a much simpler 2d
+    // signed distance function to get a rounded rect clip.
+    vec2 half_size = 0.5 * rect_size(local_rect);
+    float radius = clip.top_left.outer_inner_radius.x;
+    vLocalPos.xy -= (half_size + cmi.local_pos) * vi.local_pos.w;
+    vClipParams = vec3(half_size - vec2(radius), radius);
+#else
+    RectWithEndpoint clip_rect = local_rect;
+
+    vec2 r_tl = clip.top_left.outer_inner_radius.xy;
+    vec2 r_tr = clip.top_right.outer_inner_radius.xy;
+    vec2 r_br = clip.bottom_right.outer_inner_radius.xy;
+    vec2 r_bl = clip.bottom_left.outer_inner_radius.xy;
+
+    vClipCenter_Radius_TL = vec4(clip_rect.p0 + r_tl,
+                                 inverse_radii_squared(r_tl));
+
+    vClipCenter_Radius_TR = vec4(clip_rect.p1.x - r_tr.x,
+                                 clip_rect.p0.y + r_tr.y,
+                                 inverse_radii_squared(r_tr));
+
+    vClipCenter_Radius_BR = vec4(clip_rect.p1 - r_br,
+                                 inverse_radii_squared(r_br));
+
+    vClipCenter_Radius_BL = vec4(clip_rect.p0.x + r_bl.x,
+                                 clip_rect.p1.y - r_bl.y,
+                                 inverse_radii_squared(r_bl));
+
+    #ifdef SWGL_DRAW_SPAN
+        // For the half-space span shader, we need to know the half-spaces of
+        // the corners separate from the center and radius. We compute a point
+        // that falls on the diagonal (which is just an inner vertex pushed out
+        // along one axis, but not on both). We also compute the direction of
+        // the half-space, which is a perpendicular vertex (-y,x) of the vector
+        // of the diagonal. We leave the scales of the vectors unchanged.
+        vClipCorner_TL = vec4(clip_rect.p0.x,
+                              clip_rect.p0.y + r_tl.y,
+                              -r_tl.yx);
+        vClipCorner_TR = vec4(clip_rect.p1.x - r_tr.x,
+                              clip_rect.p0.y,
+                              vec2(r_tr.y, -r_tr.x));
+        vClipCorner_BR = vec4(clip_rect.p1.x,
+                              clip_rect.p1.y - r_br.y,
+                              r_br.yx);
+        vClipCorner_BL = vec4(clip_rect.p0.x + r_bl.x,
+                              clip_rect.p1.y,
+                              vec2(-r_bl.y, r_bl.x));
+    #endif
+#endif
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+#ifdef WR_FEATURE_FAST_PATH
+// See http://www.iquilezles.org/www/articles/distfunctions2d/distfunctions2d.htm
+float sd_box(in vec2 pos, in vec2 box_size) {
+    vec2 d = abs(pos) - box_size;
+    return length(max(d, vec2(0.0))) + min(max(d.x,d.y), 0.0);
+}
+
+float sd_rounded_box(in vec2 pos, in vec2 box_size, in float radius) {
+    return sd_box(pos, box_size) - radius;
+}
+#endif
+
+void main(void) {
+    vec2 local_pos = vLocalPos.xy / vLocalPos.w;
+    float aa_range = compute_aa_range(local_pos);
+
+#ifdef WR_FEATURE_FAST_PATH
+    float dist = sd_rounded_box(local_pos, vClipParams.xy, vClipParams.z);
+#else
+    float dist = distance_to_rounded_rect(
+        local_pos,
+        vClipCenter_Radius_TL,
+        vClipCenter_Radius_TR,
+        vClipCenter_Radius_BR,
+        vClipCenter_Radius_BL,
+        vTransformBounds
+    );
+#endif
+
+    // Compute AA for the given dist and range.
+    float alpha = distance_aa(aa_range, dist);
+
+    // Select alpha or inverse alpha depending on clip in/out.
+    float final_alpha = mix(alpha, 1.0 - alpha, vClipMode.x);
+
+    float final_final_alpha = vLocalPos.w > 0.0 ? final_alpha : 0.0;
+    oFragColor = vec4(final_final_alpha, 0.0, 0.0, 1.0);
+}
+
+#ifdef SWGL_DRAW_SPAN
+// Currently the cs_clip_rectangle shader is slow because it always evaluates
+// the corner ellipse segments and the rectangle AA for every fragment the
+// shader is run on. To alleviate this for now with SWGL, this essentially
+// implements a rounded-rectangle span rasterizer inside the span shader. The
+// motivation is that we can separate out the parts of the span which are fully
+// opaque and fully transparent, outputting runs of fixed color in those areas,
+// while only evaluating the ellipse segments and AA in the smaller outlying
+// parts of the span that actually need it.
+// The shader conceptually represents a rounded rectangle as an inner octagon
+// (8 half-spaces) bounding the opaque region and an outer octagon bounding the
+// curve and AA parts. Everything outside is transparent. The line of the span
+// is intersected with half-spaces, looking for interior spans that minimally
+// intersect the half-spaces (start max, end min). In the ideal case we hit a
+// start corner ellipse segment and an end corner ellipse segment, rendering
+// the two curves on the ends with an opaque run in between, outputting clear
+// for any transparent runs before and after the start and end curves.
+// This is slightly complicated by the fact that the results here must agree
+// with the main results of the fragment shader, in case SWGL has to fall back
+// to the main fragment shader for any reason. So, we make an effort to handle
+// both ways of operating - the uniform radius fast-path and the varying radius
+// slow-path.
+void swgl_drawSpanR8() {
+    // Perspective is not supported.
+    if (swgl_interpStep(vLocalPos).w != 0.0) {
+        return;
+    }
+
+    // If the span is completely outside the Z-range and clipped out, just
+    // output clear so we don't need to consider invalid W in the rest of the
+    // shader.
+    float w = swgl_forceScalar(vLocalPos.w);
+    if (w <= 0.0) {
+        swgl_commitSolidR8(0.0);
+        return;
+    }
+
+    // To start, we evaluate the rounded-rectangle in local space relative to
+    // the local-space position. This will be interpolated across the span to
+    // track whether we intersect any half-spaces.
+    w = 1.0 / w;
+    vec2 local_pos = vLocalPos.xy * w;
+    vec2 local_pos0 = swgl_forceScalar(local_pos);
+    vec2 local_step = swgl_interpStep(vLocalPos).xy * w;
+    float step_scale = max(dot(local_step, local_step), 1.0e-6);
+
+    // Get the local-space AA range. This range represents 1/fwidth(local_pos),
+    // essentially the scale of how much local-space maps to an AA pixel. We
+    // need to know the inverse, how much local-space we traverse per AA pixel
+    // pixel step. We then scale this to represent the amount of span steps
+    // traversed per AA pixel step.
+    float aa_range = compute_aa_range(local_pos);
+    float aa_margin = inversesqrt(aa_range * aa_range * step_scale);
+
+    // We need to know the bounds of the aligned rectangle portion of the rrect
+    // in local-space. If we're using the fast-path, this is specified as the
+    // inner bounding-box half-width of the rrect and the uniform outer radius
+    // of the corners in vClipParams, which we map to the outer bounding-box.
+    // For the general case, we have already stored the outer bounding box in
+    // vTransformBounds.
+    #ifdef WR_FEATURE_FAST_PATH
+        vec4 clip_rect = vec4(-vClipParams.xy - vClipParams.z, vClipParams.xy + vClipParams.z);
+    #else
+        vec4 clip_rect = vTransformBounds;
+    #endif
+
+    // We need to compute the local-space distance to the bounding box and then
+    // figure out how many processing steps that maps to. If we are stepping in
+    // a negative direction on an axis, we need to swap the sides of the box
+    // which we consider as the start or end. If there is no local-space step
+    // on an axis (i.e. constant Y), we need to take care to force the steps to
+    // either the start or end of the span depending on if we are inside or
+    // outside of the bounding box.
+    vec4 clip_dist =
+        mix(clip_rect, clip_rect.zwxy, lessThan(local_step, vec2(0.0)).xyxy)
+            - local_pos0.xyxy;
+    clip_dist =
+        mix(1.0e6 * step(0.0, clip_dist),
+            clip_dist * recip(local_step).xyxy,
+            notEqual(local_step, vec2(0.0)).xyxy);
+
+    // Initially, the opaque region is bounded by the further start intersect
+    // with the bounding box and the nearest end intersect with the bounding
+    // box.
+    float opaque_start = max(clip_dist.x, clip_dist.y);
+    float opaque_end = min(clip_dist.z, clip_dist.w);
+    float aa_start = opaque_start;
+    float aa_end = opaque_end;
+
+    // Here we actually intersect with the half-space of the corner. We get the
+    // plane distance of the local-space position from the diagonal bounding
+    // ellipse segment from the opaque region. The half-space is defined by the
+    // direction vector of the plane and an offset point that falls on the
+    // dividing line (which is a vertex on the corner box, which is actually on
+    // the outer radius of the bounding box, but not a corner vertex). This
+    // distance is positive if on the curve side and negative if on the inner
+    // opaque region. If we are on the curve side, we need to verify we are
+    // traveling in direction towards the opaque region so that we will
+    // eventually intersect the diagonal so we can calculate when the start
+    // corner segment will end, otherwise we are going away from the rrect.
+    // If we are inside the opaque interior, we need to verify we are traveling
+    // in direction towards the curve, so that we can calculate when the end
+    // corner segment will start. Further, if we intersect, we calculate the
+    // offset of the outer octagon where AA starts from the inner octagon of
+    // where the opaque region starts using the apex vector (which is transpose
+    // of the half-space's direction).
+    //
+    // We need to intersect the corner ellipse segments. Significantly, we need
+    // to know where the apex of the ellipse segment is and how far to push the
+    // outer diagonal of the octagon from the inner diagonal. The position of
+    // the inner diagonal simply runs diagonal across the corner box and has a
+    // constant offset from vertex on the inner bounding box. The apex also has
+    // a constant offset along the opposite diagonal relative to the diagonal
+    // intersect which is 1/sqrt(2) - 0.5 assuming unit length for the diagonal.
+    // We then need to project the vector to the apex onto the local-space step
+    // scale, but we do this with reference to the normal vector of the diagonal
+    // using dot(normal, apex) / dot(normal, local_step), where the apex vector
+    // is (0.7071 - 0.5) * abs(normal).yx * sign(normal).
+    vec4 start_plane = vec4(1.0e6);
+    vec4 end_plane = vec4(1.0e6);
+
+    #define CLIP_CORNER(offset, normal, info) do {                            \
+        float dist = dot(local_pos0 - (offset), (normal));                    \
+        float scale = -dot(local_step, (normal));                             \
+        if (scale >= 0.0) {                                                   \
+            if (dist > opaque_start * scale) {                                \
+                SET_CORNER(start_corner, info);                               \
+                start_plane = vec4(offset, normal);                           \
+                float inv_scale = recip(max(scale, 1.0e-6));                  \
+                opaque_start = dist * inv_scale;                              \
+                float apex = (0.7071 - 0.5) * 2.0 * abs(normal.x * normal.y); \
+                aa_start = opaque_start - apex * inv_scale;                   \
+            }                                                                 \
+        } else if (dist > opaque_end * scale) {                               \
+            SET_CORNER(end_corner, info);                                     \
+            end_plane = vec4(offset, normal);                                 \
+            float inv_scale = recip(min(scale, -1.0e-6));                     \
+            opaque_end = dist * inv_scale;                                    \
+            float apex = (0.7071 - 0.5) * 2.0 * abs(normal.x * normal.y);     \
+            aa_end = opaque_end - apex * inv_scale;                           \
+        }                                                                     \
+    } while (false)
+
+    #ifdef WR_FEATURE_FAST_PATH
+        // For the fast-path, we only have the half-width of the inner bounding
+        // box. We need to map this to points that fall on the diagonal of the
+        // half-space for each corner. To do this we just need to push out the
+        // vertex in the right direction on a single axis, leaving the other
+        // unchanged.
+        vec2 corner_tl = -vClipParams.xy - vec2(vClipParams.z, 0.0);
+        vec2 corner_tr = vec2(vClipParams.x, -vClipParams.y - vClipParams.z);
+        vec2 corner_br = vClipParams.xy + vec2(vClipParams.z, 0.0);
+        vec2 corner_bl = vec2(-vClipParams.x, vClipParams.y + vClipParams.z);
+        // The direction vector of the corner half-space has constant length,
+        // but just needs an appropriate direction set.
+        vec2 n_tl = -vClipParams.zz;
+        vec2 n_tr = vec2(vClipParams.z, -vClipParams.z);
+        vec2 n_br = vClipParams.zz;
+        vec2 n_bl = vec2(-vClipParams.z, vClipParams.z);
+
+        #define SET_CORNER(corner, info)
+
+        // Clip against the corner half-spaces.
+        CLIP_CORNER(corner_tl, n_tl, );
+        CLIP_CORNER(corner_tr, n_tr, );
+        CLIP_CORNER(corner_br, n_br, );
+        CLIP_CORNER(corner_bl, n_bl, );
+
+        // Later we need to calculate distance AA for both corners and the
+        // outer bounding rect. For the fast-path, this is all done inside
+        // sd_rounded_box.
+        #define AA_RECT(local_pos) \
+            sd_rounded_box(local_pos, vClipParams.xy, vClipParams.z)
+    #else
+        // For the general case, we need to remember which of the actual start
+        // and end corners we intersect, so that we can evaluate the curve AA
+        // against only those corners rather than having to try against all 4
+        // corners for both sides of the span. Initialize these values so that
+        // if no corner is intersected, they will just zero the AA.
+        vec4 start_corner = vec4(vec2(1.0e6), vec2(1.0));
+        vec4 end_corner = vec4(vec2(1.0e6), vec2(1.0));
+
+        #define SET_CORNER(corner, info) corner = info
+
+        // Clip against the corner half-spaces. We have already computed the
+        // corner half-spaces in the vertex shader.
+        CLIP_CORNER(vClipCorner_TL.xy, vClipCorner_TL.zw, vClipCenter_Radius_TL);
+        CLIP_CORNER(vClipCorner_TR.xy, vClipCorner_TR.zw, vClipCenter_Radius_TR);
+        CLIP_CORNER(vClipCorner_BR.xy, vClipCorner_BR.zw, vClipCenter_Radius_BR);
+        CLIP_CORNER(vClipCorner_BL.xy, vClipCorner_BL.zw, vClipCenter_Radius_BL);
+
+        // Later we need to calculate distance AA for both corners and the
+        // outer bounding rect. For the general case, we need to explicitly
+        // evaluate either the ellipse segment distance or the rect distance.
+        #define AA_RECT(local_pos) \
+            signed_distance_rect(local_pos, vTransformBounds.xy, vTransformBounds.zw)
+        #define AA_CORNER(local_pos, corner) \
+            distance_to_ellipse_approx(local_pos - corner.xy, corner.zw, 1.0)
+    #endif
+
+    // Pad the AA region by a margin, as the intersections take place assuming
+    // pixel centers, but AA actually starts half a pixel away from the center.
+    // If the AA region narrows to nothing, be careful not to inflate so much
+    // that we start processing AA for fragments that don't need it.
+    aa_margin = max(aa_margin - max(aa_start - aa_end, 0.0), 0.0);
+    aa_start -= aa_margin;
+    aa_end += aa_margin;
+
+    // Compute the thresholds at which we need to transition between various
+    // segments of the span, from fully transparent outside to the start of
+    // the outer octagon where AA starts, from there to where the inner opaque
+    // octagon starts, from there to where the opaque inner octagon ends and
+    // AA starts again, to finally where the outer octagon/AA ends and we're
+    // back to fully transparent. These thresholds are just flipped offsets
+    // from the start of the span so we can compare against the remaining
+    // span length which automatically deducts as we commit fragments.
+    ivec4 steps = ivec4(clamp(
+        swgl_SpanLength -
+            swgl_StepSize *
+                vec4(floor(aa_start), ceil(opaque_start), floor(opaque_end), ceil(aa_end)),
+        0.0, swgl_SpanLength));
+    int aa_start_len = steps.x;
+    int opaque_start_len = steps.y;
+    int opaque_end_len = steps.z;
+    int aa_end_len = steps.w;
+
+    // Output fully clear while we're outside the AA region.
+    if (swgl_SpanLength > aa_start_len) {
+        int num_aa = swgl_SpanLength - aa_start_len;
+        swgl_commitPartialSolidR8(num_aa, vClipMode.x);
+        local_pos += float(num_aa / swgl_StepSize) * local_step;
+    }
+    #ifdef AA_CORNER
+    if (start_plane.x < 1.0e5) {
+        // We're now in the outer octagon which requires AA. Evaluate the corner
+        // distance of the start corner here and output AA for it. Before we hit
+        // the actual opaque inner octagon, we have a transitional step where the
+        // diagonal might intersect mid-way through the step. We have consider
+        // either the corner or rect distance depending on which side we're on.
+        while (swgl_SpanLength > opaque_start_len) {
+            float alpha = distance_aa(aa_range,
+                dot(local_pos - start_plane.xy, start_plane.zw) > 0.0
+                    ? AA_CORNER(local_pos, start_corner)
+                    : AA_RECT(local_pos));
+            swgl_commitColorR8(mix(alpha, 1.0 - alpha, vClipMode.x));
+            local_pos += local_step;
+        }
+    }
+    #endif
+    // If there's no start corner, just do rect AA until opaque.
+    while (swgl_SpanLength > opaque_start_len) {
+        float alpha = distance_aa(aa_range, AA_RECT(local_pos));
+        swgl_commitColorR8(mix(alpha, 1.0 - alpha, vClipMode.x));
+        local_pos += local_step;
+    }
+    // Now we're finally in the opaque inner octagon part of the span. Just
+    // output a solid run.
+    if (swgl_SpanLength > opaque_end_len) {
+        int num_opaque = swgl_SpanLength - opaque_end_len;
+        swgl_commitPartialSolidR8(num_opaque, 1.0 - vClipMode.x);
+        local_pos += float(num_opaque / swgl_StepSize) * local_step;
+    }
+    #ifdef AA_CORNER
+    if (end_plane.x < 1.0e5) {
+        // Finally we're in the AA region on the other side, inside the outer
+        // octagon again. Just evaluate the distance to the end corner and
+        // compute AA for it. We're leaving the opaque inner octagon, but like
+        // before, we have to be careful we're not dealing with a step partially
+        // intersected by the end corner's diagonal. Check which side we are on
+        // and use either the corner or rect distance as appropriate.
+        while (swgl_SpanLength > aa_end_len) {
+            float alpha = distance_aa(aa_range,
+                dot(local_pos - end_plane.xy, end_plane.zw) > 0.0
+                    ? AA_CORNER(local_pos, end_corner)
+                    : AA_RECT(local_pos));
+            swgl_commitColorR8(mix(alpha, 1.0 - alpha, vClipMode.x));
+            local_pos += local_step;
+        }
+    }
+    #endif
+    // If there's no end corner, just do rect AA until clear.
+    while (swgl_SpanLength > aa_end_len) {
+        float alpha = distance_aa(aa_range, AA_RECT(local_pos));
+        swgl_commitColorR8(mix(alpha, 1.0 - alpha, vClipMode.x));
+        local_pos += local_step;
+    }
+    // We're now outside the outer AA octagon on the other side. Just output
+    // fully clear.
+    if (swgl_SpanLength > 0) {
+        swgl_commitPartialSolidR8(swgl_SpanLength, vClipMode.x);
+    }
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_conic_gradient.glsl b/gfx/wr/webrender/res/cs_conic_gradient.glsl
new file mode 100644
index 0000000000..e788ca260c
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_conic_gradient.glsl
@@ -0,0 +1,67 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,rect,render_task,gpu_cache,gpu_buffer,gradient
+
+#define PI                  3.141592653589793
+
+varying vec2 v_pos;
+
+flat varying vec2 v_center;
+
+// x: start offset, y: offset scale, z: angle
+// Packed in to a vector to work around bug 1630356.
+flat varying vec3 v_start_offset_offset_scale_angle_vec;
+#define v_start_offset v_start_offset_offset_scale_angle_vec.x
+#define v_offset_scale v_start_offset_offset_scale_angle_vec.y
+#define v_angle v_start_offset_offset_scale_angle_vec.z
+
+#ifdef WR_VERTEX_SHADER
+
+#define EXTEND_MODE_REPEAT 1
+
+PER_INSTANCE in vec4 aTaskRect;
+PER_INSTANCE in vec2 aCenter;
+PER_INSTANCE in vec2 aScale;
+PER_INSTANCE in float aStartOffset;
+PER_INSTANCE in float aEndOffset;
+PER_INSTANCE in float aAngle;
+PER_INSTANCE in int aExtendMode;
+PER_INSTANCE in int aGradientStopsAddress;
+
+void main(void) {
+    // Store 1/d where d = end_offset - start_offset
+    // If d = 0, we can't get its reciprocal. Instead, just use a zero scale.
+    float d = aEndOffset - aStartOffset;
+    v_offset_scale = d != 0.0 ? 1.0 / d : 0.0;
+
+    vec2 pos = mix(aTaskRect.xy, aTaskRect.zw, aPosition.xy);
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+
+    v_angle = PI / 2.0 - aAngle;
+    v_start_offset = aStartOffset * v_offset_scale;
+
+    // v_pos and v_center are in a coordinate space relative to the task rect
+    // (so they are independent of the task origin).
+    v_center = aCenter * v_offset_scale;
+    v_pos = (aTaskRect.zw - aTaskRect.xy) * aPosition.xy * v_offset_scale * aScale;
+
+    v_gradient_repeat.x = float(aExtendMode == EXTEND_MODE_REPEAT);
+    v_gradient_address.x = aGradientStopsAddress;
+}
+#endif
+
+
+#ifdef WR_FRAGMENT_SHADER
+
+void main(void) {
+    // Use inverse trig to find the angle offset from the relative position.
+    vec2 current_dir = v_pos - v_center;
+    float current_angle = atan(current_dir.y, current_dir.x) + v_angle;
+    float offset = fract(current_angle / (2.0 * PI)) * v_offset_scale - v_start_offset;
+
+    oFragColor = sample_gradient(offset);
+}
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_fast_linear_gradient.glsl b/gfx/wr/webrender/res/cs_fast_linear_gradient.glsl
new file mode 100644
index 0000000000..23b52e22c4
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_fast_linear_gradient.glsl
@@ -0,0 +1,32 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared
+
+varying float vPos;
+flat varying vec4 vColor0;
+flat varying vec4 vColor1;
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in vec4 aTaskRect;
+PER_INSTANCE in vec4 aColor0;
+PER_INSTANCE in vec4 aColor1;
+PER_INSTANCE in float aAxisSelect;
+
+void main(void) {
+    vPos = mix(0.0, 1.0, mix(aPosition.x, aPosition.y, aAxisSelect));
+
+    vColor0 = aColor0;
+    vColor1 = aColor1;
+
+    gl_Position = uTransform * vec4(mix(aTaskRect.xy, aTaskRect.zw, aPosition.xy), 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    oFragColor = mix(vColor0, vColor1, vPos);
+}
+#endif
diff --git a/gfx/wr/webrender/res/cs_line_decoration.glsl b/gfx/wr/webrender/res/cs_line_decoration.glsl
new file mode 100644
index 0000000000..81de807a14
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_line_decoration.glsl
@@ -0,0 +1,165 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared
+
+#define LINE_STYLE_SOLID        0
+#define LINE_STYLE_DOTTED       1
+#define LINE_STYLE_DASHED       2
+#define LINE_STYLE_WAVY         3
+
+// Fragment position in the coordinate system used for positioning decorations.
+// To keep the code independent of whether the line is horizontal or vertical,
+// vLocalPos.x is always parallel, and .y always perpendicular, to the line
+// being decorated.
+varying vec2 vLocalPos;
+
+// Line style. Packed in to a vector to work around bug 1630356.
+flat varying ivec2 vStyle;
+
+flat varying vec4 vParams;
+
+#ifdef WR_VERTEX_SHADER
+
+// The size of the mask tile we're rendering, in pixels.
+PER_INSTANCE in vec4 aTaskRect;
+
+// The size of the mask tile. aLocalSize.x is always horizontal and .y vertical,
+// regardless of the line's orientation. The size is chosen by
+// prim_store::line_dec::get_line_decoration_sizes.
+PER_INSTANCE in vec2 aLocalSize;
+
+// A LINE_STYLE_* value, indicating what sort of line to draw.
+PER_INSTANCE in int aStyle;
+
+// 0.0 for a horizontal line, 1.0 for a vertical line.
+PER_INSTANCE in float aAxisSelect;
+
+// The thickness of the wavy line itself, not the amplitude of the waves (i.e.,
+// the thickness of the final decorated line).
+PER_INSTANCE in float aWavyLineThickness;
+
+void main(void) {
+    vec2 size = mix(aLocalSize, aLocalSize.yx, aAxisSelect);
+    vStyle.x = aStyle;
+
+    switch (vStyle.x) {
+        case LINE_STYLE_SOLID: {
+            break;
+        }
+        case LINE_STYLE_DASHED: {
+            vParams = vec4(size.x,          // period
+                           0.5 * size.x,    // dash length
+                           0.0,
+                           0.0);
+            break;
+        }
+        case LINE_STYLE_DOTTED: {
+            float diameter = size.y;
+            float period = diameter * 2.0;
+            float center_line = 0.5 * size.y;
+            vParams = vec4(period,
+                           diameter / 2.0, // radius
+                           center_line,
+                           0.0);
+            break;
+        }
+        case LINE_STYLE_WAVY: {
+            // This logic copied from gecko to get the same results
+            float line_thickness = max(aWavyLineThickness, 1.0);
+            // Difference in height between peaks and troughs
+            // (and since slopes are 45 degrees, the length of each slope)
+            float slope_length = size.y - line_thickness;
+            // Length of flat runs
+            float flat_length = max((line_thickness - 1.0) * 2.0, 1.0);
+
+            vParams = vec4(line_thickness / 2.0,
+                           slope_length,
+                           flat_length,
+                           size.y);
+            break;
+        }
+        default:
+            vParams = vec4(0.0);
+    }
+
+    vLocalPos = mix(aPosition.xy, aPosition.yx, aAxisSelect) * size;
+
+    gl_Position = uTransform * vec4(mix(aTaskRect.xy, aTaskRect.zw, aPosition.xy), 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+#define MAGIC_WAVY_LINE_AA_SNAP         0.5
+
+void main(void) {
+    // Find the appropriate distance to apply the step over.
+    vec2 pos = vLocalPos;
+    float aa_range = compute_aa_range(pos);
+    float alpha = 1.0;
+
+    switch (vStyle.x) {
+        case LINE_STYLE_SOLID: {
+            break;
+        }
+        case LINE_STYLE_DASHED: {
+            // Calculate dash alpha (on/off) based on dash length
+            alpha = step(floor(pos.x + 0.5), vParams.y);
+            break;
+        }
+        case LINE_STYLE_DOTTED: {
+            // Get the dot alpha
+            vec2 dot_relative_pos = pos - vParams.yz;
+            float dot_distance = length(dot_relative_pos) - vParams.y;
+            alpha = distance_aa(aa_range, dot_distance);
+            break;
+        }
+        case LINE_STYLE_WAVY: {
+            float half_line_thickness = vParams.x;
+            float slope_length = vParams.y;
+            float flat_length = vParams.z;
+            float vertical_bounds = vParams.w;
+            // Our pattern is just two slopes and two flats
+            float half_period = slope_length + flat_length;
+
+            float mid_height = vertical_bounds / 2.0;
+            float peak_offset = mid_height - half_line_thickness;
+            // Flip the wave every half period
+            float flip = -2.0 * (step(mod(pos.x, 2.0 * half_period), half_period) - 0.5);
+            // float flip = -1.0;
+            peak_offset *= flip;
+            float peak_height = mid_height + peak_offset;
+
+            // Convert pos to a local position within one half period
+            pos.x = mod(pos.x, half_period);
+
+            // Compute signed distance to the 3 lines that make up an arc
+            float dist1 = distance_to_line(vec2(0.0, peak_height),
+                                           vec2(1.0, -flip),
+                                           pos);
+            float dist2 = distance_to_line(vec2(0.0, peak_height),
+                                           vec2(0, -flip),
+                                           pos);
+            float dist3 = distance_to_line(vec2(flat_length, peak_height),
+                                           vec2(-1.0, -flip),
+                                           pos);
+            float dist = abs(max(max(dist1, dist2), dist3));
+
+            // Apply AA based on the thickness of the wave
+            alpha = distance_aa(aa_range, dist - half_line_thickness);
+
+            // Disable AA for thin lines
+            if (half_line_thickness <= 1.0) {
+                alpha = 1.0 - step(alpha, MAGIC_WAVY_LINE_AA_SNAP);
+            }
+
+            break;
+        }
+        default: break;
+    }
+
+    oFragColor = vec4(alpha);
+}
+#endif
diff --git a/gfx/wr/webrender/res/cs_linear_gradient.glsl b/gfx/wr/webrender/res/cs_linear_gradient.glsl
new file mode 100644
index 0000000000..4ab95cf6f6
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_linear_gradient.glsl
@@ -0,0 +1,68 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,rect,render_task,gpu_cache,gpu_buffer,gradient
+
+varying vec2 v_pos;
+
+flat varying vec2 v_scale_dir;
+
+// Start offset. Packed in to a vector to work around bug 1630356.
+flat varying vec2 v_start_offset;
+
+#ifdef WR_VERTEX_SHADER
+
+#define EXTEND_MODE_REPEAT 1
+
+PER_INSTANCE in vec4 aTaskRect;
+PER_INSTANCE in vec2 aStartPoint;
+PER_INSTANCE in vec2 aEndPoint;
+PER_INSTANCE in vec2 aScale;
+PER_INSTANCE in int aExtendMode;
+PER_INSTANCE in int aGradientStopsAddress;
+
+void main(void) {
+    vec2 pos = mix(aTaskRect.xy, aTaskRect.zw, aPosition.xy);
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+
+    v_pos = aPosition.xy * aScale;
+
+    vec2 dir = aEndPoint - aStartPoint;
+
+    // Normalize UV and offsets to 0..1 scale.
+    v_scale_dir = dir / dot(dir, dir);
+    v_start_offset.x = dot(aStartPoint, v_scale_dir);
+
+    v_scale_dir *= (aTaskRect.zw - aTaskRect.xy);
+
+    v_gradient_repeat.x = float(aExtendMode == EXTEND_MODE_REPEAT);
+    v_gradient_address.x = aGradientStopsAddress;
+}
+#endif
+
+
+#ifdef WR_FRAGMENT_SHADER
+
+void main(void) {
+    // Project position onto a direction vector to compute offset.
+    float offset = dot(v_pos, v_scale_dir) - v_start_offset.x;
+
+    oFragColor = sample_gradient(offset);
+}
+
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+    int address = swgl_validateGradient(sGpuBuffer, get_gpu_buffer_uv(v_gradient_address.x), int(GRADIENT_ENTRIES + 2.0));
+    if (address < 0) {
+        return;
+    }
+
+    swgl_commitLinearGradientRGBA8(sGpuBuffer, address, GRADIENT_ENTRIES, false, v_gradient_repeat.x != 0.0,
+                                   v_pos, v_scale_dir, v_start_offset.x);
+}
+#endif
+
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_radial_gradient.glsl b/gfx/wr/webrender/res/cs_radial_gradient.glsl
new file mode 100644
index 0000000000..cdc6bfc517
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_radial_gradient.glsl
@@ -0,0 +1,71 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,rect,render_task,gpu_cache,gpu_buffer,gradient
+
+varying vec2 v_pos;
+
+// Start radius. Packed in to a vector to work around bug 1630356.
+flat varying vec2 v_start_radius;
+
+#ifdef WR_VERTEX_SHADER
+
+#define EXTEND_MODE_REPEAT 1
+
+PER_INSTANCE in vec4 aTaskRect;
+PER_INSTANCE in vec2 aCenter;
+PER_INSTANCE in vec2 aScale;
+PER_INSTANCE in float aStartRadius;
+PER_INSTANCE in float aEndRadius;
+PER_INSTANCE in float aXYRatio;
+PER_INSTANCE in int aExtendMode;
+PER_INSTANCE in int aGradientStopsAddress;
+
+void main(void) {
+    // Store 1/rd where rd = end_radius - start_radius
+    // If rd = 0, we can't get its reciprocal. Instead, just use a zero scale.
+    float rd = aEndRadius - aStartRadius;
+    float radius_scale = rd != 0.0 ? 1.0 / rd : 0.0;
+
+    vec2 pos = mix(aTaskRect.xy, aTaskRect.zw, aPosition.xy);
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+
+    v_start_radius.x = aStartRadius * radius_scale;
+
+    // Transform all coordinates by the y scale so the
+    // fragment shader can work with circles
+
+    // v_pos is in a coordinate space relative to the task rect
+    // (so it is independent of the task origin).
+    v_pos = ((aTaskRect.zw - aTaskRect.xy) * aPosition.xy * aScale - aCenter) * radius_scale;
+    v_pos.y *= aXYRatio;
+
+    v_gradient_repeat.x = float(aExtendMode == EXTEND_MODE_REPEAT);
+    v_gradient_address.x = aGradientStopsAddress;
+}
+#endif
+
+
+#ifdef WR_FRAGMENT_SHADER
+
+void main(void) {
+    // Solve for t in length(pd) = v_start_radius + t * rd
+    float offset = length(v_pos) - v_start_radius.x;
+
+    oFragColor = sample_gradient(offset);
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+    int address = swgl_validateGradient(sGpuBuffer, get_gpu_buffer_uv(v_gradient_address.x),
+                                        int(GRADIENT_ENTRIES + 2.0));
+    if (address < 0) {
+        return;
+    }
+    swgl_commitRadialGradientRGBA8(sGpuBuffer, address, GRADIENT_ENTRIES, v_gradient_repeat.x != 0.0,
+                                   v_pos, v_start_radius.x);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_scale.glsl b/gfx/wr/webrender/res/cs_scale.glsl
new file mode 100644
index 0000000000..6bbfd16429
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_scale.glsl
@@ -0,0 +1,62 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This shader must remain compatible with ESSL 1, at least for the
+// WR_FEATURE_TEXTURE_EXTERNAL_ESSL1 feature, so that it can be used to render
+// video on GLES devices without GL_OES_EGL_image_external_essl3 support.
+// This means we cannot use textureSize(), int inputs/outputs, etc.
+
+#include shared
+
+varying vec2 vUv;
+flat varying vec4 vUvRect;
+#ifdef WR_FEATURE_TEXTURE_EXTERNAL_ESSL1
+uniform vec2 uTextureSize;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE attribute vec4 aScaleTargetRect;
+PER_INSTANCE attribute vec4 aScaleSourceRect;
+
+void main(void) {
+    vec2 src_offset = aScaleSourceRect.xy;
+    vec2 src_size = aScaleSourceRect.zw - aScaleSourceRect.xy;
+
+    // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use
+    // non-normalized texture coordinates.
+#ifdef WR_FEATURE_TEXTURE_RECT
+    vec2 texture_size = vec2(1, 1);
+#elif defined(WR_FEATURE_TEXTURE_EXTERNAL_ESSL1)
+    vec2 texture_size = uTextureSize;
+#else
+    vec2 texture_size = vec2(TEX_SIZE(sColor0));
+#endif
+
+    // The uvs may be inverted, so use the min and max for the bounds
+    vUvRect = vec4(min(aScaleSourceRect.xy, aScaleSourceRect.zw) + vec2(0.5),
+                   max(aScaleSourceRect.xy, aScaleSourceRect.zw) - vec2(0.5)) / texture_size.xyxy;
+
+    vec2 pos = mix(aScaleTargetRect.xy, aScaleTargetRect.zw, aPosition.xy);
+    vUv = (src_offset + src_size * aPosition.xy) / texture_size;
+
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+}
+
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+void main(void) {
+    vec2 st = clamp(vUv, vUvRect.xy, vUvRect.zw);
+    oFragColor = TEX_SAMPLE(sColor0, st);
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+    swgl_commitTextureLinearRGBA8(sColor0, vUv, vUvRect);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_svg_filter.glsl b/gfx/wr/webrender/res/cs_svg_filter.glsl
new file mode 100644
index 0000000000..77c5332fbe
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_svg_filter.glsl
@@ -0,0 +1,594 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,prim_shared
+
+varying vec2 vInput1Uv;
+varying vec2 vInput2Uv;
+flat varying vec4 vInput1UvRect;
+flat varying vec4 vInput2UvRect;
+flat varying ivec4 vData;
+flat varying vec4 vFilterData0;
+flat varying vec4 vFilterData1;
+
+// x: Filter input count, y: Filter kind.
+// Packed in to a vector to work around bug 1630356.
+flat varying ivec2 vFilterInputCountFilterKindVec;
+#define vFilterInputCount vFilterInputCountFilterKindVec.x
+#define vFilterKind vFilterInputCountFilterKindVec.y
+// Packed in to a vector to work around bug 1630356.
+flat varying vec2 vFloat0;
+
+flat varying mat4 vColorMat;
+flat varying ivec4 vFuncs;
+
+#define FILTER_BLEND                0
+#define FILTER_FLOOD                1
+#define FILTER_LINEAR_TO_SRGB       2
+#define FILTER_SRGB_TO_LINEAR       3
+#define FILTER_OPACITY              4
+#define FILTER_COLOR_MATRIX         5
+#define FILTER_DROP_SHADOW          6
+#define FILTER_OFFSET               7
+#define FILTER_COMPONENT_TRANSFER   8
+#define FILTER_IDENTITY             9
+#define FILTER_COMPOSITE            10
+
+#define COMPOSITE_OVER       0
+#define COMPOSITE_IN         1
+#define COMPOSITE_OUT        2
+#define COMPOSITE_ATOP       3
+#define COMPOSITE_XOR        4
+#define COMPOSITE_LIGHTER    5
+#define COMPOSITE_ARITHMETIC 6
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in int aFilterRenderTaskAddress;
+PER_INSTANCE in int aFilterInput1TaskAddress;
+PER_INSTANCE in int aFilterInput2TaskAddress;
+PER_INSTANCE in int aFilterKind;
+PER_INSTANCE in int aFilterInputCount;
+PER_INSTANCE in int aFilterGenericInt;
+PER_INSTANCE in ivec2 aFilterExtraDataAddress;
+
+struct FilterTask {
+    RectWithEndpoint task_rect;
+    vec3 user_data;
+};
+
+FilterTask fetch_filter_task(int address) {
+    RenderTaskData task_data = fetch_render_task_data(address);
+
+    FilterTask task = FilterTask(
+        task_data.task_rect,
+        task_data.user_data.xyz
+    );
+
+    return task;
+}
+
+vec4 compute_uv_rect(RectWithEndpoint task_rect, vec2 texture_size) {
+    vec4 uvRect = vec4(task_rect.p0 + vec2(0.5),
+                       task_rect.p1 - vec2(0.5));
+    uvRect /= texture_size.xyxy;
+    return uvRect;
+}
+
+vec2 compute_uv(RectWithEndpoint task_rect, vec2 texture_size) {
+    vec2 uv0 = task_rect.p0 / texture_size;
+    vec2 uv1 = floor(task_rect.p1) / texture_size;
+    return mix(uv0, uv1, aPosition.xy);
+}
+
+void main(void) {
+    FilterTask filter_task = fetch_filter_task(aFilterRenderTaskAddress);
+    RectWithEndpoint target_rect = filter_task.task_rect;
+
+    vec2 pos = mix(target_rect.p0, target_rect.p1, aPosition.xy);
+
+    RectWithEndpoint input_1_task;
+    if (aFilterInputCount > 0) {
+        vec2 texture_size = vec2(TEX_SIZE(sColor0).xy);
+        input_1_task = fetch_render_task_rect(aFilterInput1TaskAddress);
+        vInput1UvRect = compute_uv_rect(input_1_task, texture_size);
+        vInput1Uv = compute_uv(input_1_task, texture_size);
+    }
+
+    RectWithEndpoint input_2_task;
+    if (aFilterInputCount > 1) {
+        vec2 texture_size = vec2(TEX_SIZE(sColor1).xy);
+        input_2_task = fetch_render_task_rect(aFilterInput2TaskAddress);
+        vInput2UvRect = compute_uv_rect(input_2_task, texture_size);
+        vInput2Uv = compute_uv(input_2_task, texture_size);
+    }
+
+    vFilterInputCount = aFilterInputCount;
+    vFilterKind = aFilterKind;
+
+    // This assignment is only used for component transfer filters but this
+    // assignment has to be done here and not in the component transfer case
+    // below because it doesn't get executed on Windows because of a suspected
+    // miscompile of this shader on Windows. See
+    // https://github.com/servo/webrender/wiki/Driver-issues#bug-1505871---assignment-to-varying-flat-arrays-inside-switch-statement-of-vertex-shader-suspected-miscompile-on-windows
+    // default: just to satisfy angle_shader_validation.rs which needs one
+    // default: for every switch, even in comments.
+    vFuncs.r = (aFilterGenericInt >> 12) & 0xf; // R
+    vFuncs.g = (aFilterGenericInt >> 8)  & 0xf; // G
+    vFuncs.b = (aFilterGenericInt >> 4)  & 0xf; // B
+    vFuncs.a = (aFilterGenericInt)       & 0xf; // A
+
+    switch (aFilterKind) {
+        case FILTER_BLEND:
+            vData = ivec4(aFilterGenericInt, 0, 0, 0);
+            break;
+        case FILTER_FLOOD:
+            vFilterData0 = fetch_from_gpu_cache_1_direct(aFilterExtraDataAddress);
+            break;
+        case FILTER_OPACITY:
+            vFloat0.x = filter_task.user_data.x;
+            break;
+        case FILTER_COLOR_MATRIX:
+            vec4 mat_data[4] = fetch_from_gpu_cache_4_direct(aFilterExtraDataAddress);
+            vColorMat = mat4(mat_data[0], mat_data[1], mat_data[2], mat_data[3]);
+            vFilterData0 = fetch_from_gpu_cache_1_direct(aFilterExtraDataAddress + ivec2(4, 0));
+            break;
+        case FILTER_DROP_SHADOW:
+            vFilterData0 = fetch_from_gpu_cache_1_direct(aFilterExtraDataAddress);
+            break;
+        case FILTER_OFFSET:
+            vec2 texture_size = vec2(TEX_SIZE(sColor0).xy);
+            vFilterData0 = vec4(-filter_task.user_data.xy / texture_size, vec2(0.0));
+
+            RectWithEndpoint task_rect = input_1_task;
+            vec4 clipRect = vec4(task_rect.p0, task_rect.p1);
+            clipRect /= texture_size.xyxy;
+            vFilterData1 = clipRect;
+            break;
+        case FILTER_COMPONENT_TRANSFER:
+            vData = ivec4(aFilterExtraDataAddress, 0, 0);
+            break;
+        case FILTER_COMPOSITE:
+            vData = ivec4(aFilterGenericInt, 0, 0, 0);
+            if (aFilterGenericInt == COMPOSITE_ARITHMETIC) {
+              vFilterData0 = fetch_from_gpu_cache_1_direct(aFilterExtraDataAddress);
+            }
+            break;
+        default:
+            break;
+    }
+
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+#define COMPONENT_TRANSFER_IDENTITY 0
+#define COMPONENT_TRANSFER_TABLE 1
+#define COMPONENT_TRANSFER_DISCRETE 2
+#define COMPONENT_TRANSFER_LINEAR 3
+#define COMPONENT_TRANSFER_GAMMA 4
+
+vec3 Multiply(vec3 Cb, vec3 Cs) {
+    return Cb * Cs;
+}
+
+vec3 Screen(vec3 Cb, vec3 Cs) {
+    return Cb + Cs - (Cb * Cs);
+}
+
+vec3 HardLight(vec3 Cb, vec3 Cs) {
+    vec3 m = Multiply(Cb, 2.0 * Cs);
+    vec3 s = Screen(Cb, 2.0 * Cs - 1.0);
+    vec3 edge = vec3(0.5, 0.5, 0.5);
+    return mix(m, s, step(edge, Cs));
+}
+
+// TODO: Worth doing with mix/step? Check GLSL output.
+float ColorDodge(float Cb, float Cs) {
+    if (Cb == 0.0)
+        return 0.0;
+    else if (Cs == 1.0)
+        return 1.0;
+    else
+        return min(1.0, Cb / (1.0 - Cs));
+}
+
+// TODO: Worth doing with mix/step? Check GLSL output.
+float ColorBurn(float Cb, float Cs) {
+    if (Cb == 1.0)
+        return 1.0;
+    else if (Cs == 0.0)
+        return 0.0;
+    else
+        return 1.0 - min(1.0, (1.0 - Cb) / Cs);
+}
+
+float SoftLight(float Cb, float Cs) {
+    if (Cs <= 0.5) {
+        return Cb - (1.0 - 2.0 * Cs) * Cb * (1.0 - Cb);
+    } else {
+        float D;
+
+        if (Cb <= 0.25)
+            D = ((16.0 * Cb - 12.0) * Cb + 4.0) * Cb;
+        else
+            D = sqrt(Cb);
+
+        return Cb + (2.0 * Cs - 1.0) * (D - Cb);
+    }
+}
+
+vec3 Difference(vec3 Cb, vec3 Cs) {
+    return abs(Cb - Cs);
+}
+
+vec3 Exclusion(vec3 Cb, vec3 Cs) {
+    return Cb + Cs - 2.0 * Cb * Cs;
+}
+
+// These functions below are taken from the spec.
+// There's probably a much quicker way to implement
+// them in GLSL...
+float Sat(vec3 c) {
+    return max(c.r, max(c.g, c.b)) - min(c.r, min(c.g, c.b));
+}
+
+float Lum(vec3 c) {
+    vec3 f = vec3(0.3, 0.59, 0.11);
+    return dot(c, f);
+}
+
+vec3 ClipColor(vec3 C) {
+    float L = Lum(C);
+    float n = min(C.r, min(C.g, C.b));
+    float x = max(C.r, max(C.g, C.b));
+
+    if (n < 0.0)
+        C = L + (((C - L) * L) / (L - n));
+
+    if (x > 1.0)
+        C = L + (((C - L) * (1.0 - L)) / (x - L));
+
+    return C;
+}
+
+vec3 SetLum(vec3 C, float l) {
+    float d = l - Lum(C);
+    return ClipColor(C + d);
+}
+
+void SetSatInner(inout float Cmin, inout float Cmid, inout float Cmax, float s) {
+    if (Cmax > Cmin) {
+        Cmid = (((Cmid - Cmin) * s) / (Cmax - Cmin));
+        Cmax = s;
+    } else {
+        Cmid = 0.0;
+        Cmax = 0.0;
+    }
+    Cmin = 0.0;
+}
+
+vec3 SetSat(vec3 C, float s) {
+    if (C.r <= C.g) {
+        if (C.g <= C.b) {
+            SetSatInner(C.r, C.g, C.b, s);
+        } else {
+            if (C.r <= C.b) {
+                SetSatInner(C.r, C.b, C.g, s);
+            } else {
+                SetSatInner(C.b, C.r, C.g, s);
+            }
+        }
+    } else {
+        if (C.r <= C.b) {
+            SetSatInner(C.g, C.r, C.b, s);
+        } else {
+            if (C.g <= C.b) {
+                SetSatInner(C.g, C.b, C.r, s);
+            } else {
+                SetSatInner(C.b, C.g, C.r, s);
+            }
+        }
+    }
+    return C;
+}
+
+vec3 Hue(vec3 Cb, vec3 Cs) {
+    return SetLum(SetSat(Cs, Sat(Cb)), Lum(Cb));
+}
+
+vec3 Saturation(vec3 Cb, vec3 Cs) {
+    return SetLum(SetSat(Cb, Sat(Cs)), Lum(Cb));
+}
+
+vec3 Color(vec3 Cb, vec3 Cs) {
+    return SetLum(Cs, Lum(Cb));
+}
+
+vec3 Luminosity(vec3 Cb, vec3 Cs) {
+    return SetLum(Cb, Lum(Cs));
+}
+
+const int BlendMode_Normal      = 0;
+const int BlendMode_Multiply    = 1;
+const int BlendMode_Screen      = 2;
+const int BlendMode_Overlay     = 3;
+const int BlendMode_Darken      = 4;
+const int BlendMode_Lighten     = 5;
+const int BlendMode_ColorDodge  = 6;
+const int BlendMode_ColorBurn   = 7;
+const int BlendMode_HardLight   = 8;
+const int BlendMode_SoftLight   = 9;
+const int BlendMode_Difference  = 10;
+const int BlendMode_Exclusion   = 11;
+const int BlendMode_Hue         = 12;
+const int BlendMode_Saturation  = 13;
+const int BlendMode_Color       = 14;
+const int BlendMode_Luminosity  = 15;
+
+vec4 blend(vec4 Cs, vec4 Cb, int mode) {
+    vec4 result = vec4(1.0, 0.0, 0.0, 1.0);
+
+    switch (mode) {
+        case BlendMode_Normal:
+            result.rgb = Cs.rgb;
+            break;
+        case BlendMode_Multiply:
+            result.rgb = Multiply(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Screen:
+            result.rgb = Screen(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Overlay:
+            // Overlay is inverse of Hardlight
+            result.rgb = HardLight(Cs.rgb, Cb.rgb);
+            break;
+        case BlendMode_Darken:
+            result.rgb = min(Cs.rgb, Cb.rgb);
+            break;
+        case BlendMode_Lighten:
+            result.rgb = max(Cs.rgb, Cb.rgb);
+            break;
+        case BlendMode_ColorDodge:
+            result.r = ColorDodge(Cb.r, Cs.r);
+            result.g = ColorDodge(Cb.g, Cs.g);
+            result.b = ColorDodge(Cb.b, Cs.b);
+            break;
+        case BlendMode_ColorBurn:
+            result.r = ColorBurn(Cb.r, Cs.r);
+            result.g = ColorBurn(Cb.g, Cs.g);
+            result.b = ColorBurn(Cb.b, Cs.b);
+            break;
+        case BlendMode_HardLight:
+            result.rgb = HardLight(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_SoftLight:
+            result.r = SoftLight(Cb.r, Cs.r);
+            result.g = SoftLight(Cb.g, Cs.g);
+            result.b = SoftLight(Cb.b, Cs.b);
+            break;
+        case BlendMode_Difference:
+            result.rgb = Difference(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Exclusion:
+            result.rgb = Exclusion(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Hue:
+            result.rgb = Hue(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Saturation:
+            result.rgb = Saturation(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Color:
+            result.rgb = Color(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Luminosity:
+            result.rgb = Luminosity(Cb.rgb, Cs.rgb);
+            break;
+        default: break;
+    }
+    vec3 rgb = (1.0 - Cb.a) * Cs.rgb + Cb.a * result.rgb;
+    result = mix(vec4(Cb.rgb * Cb.a, Cb.a), vec4(rgb, 1.0), Cs.a);
+    return result;
+}
+
+// Based on the Gecko's implementation in
+// https://hg.mozilla.org/mozilla-central/file/91b4c3687d75/gfx/src/FilterSupport.cpp#l24
+// These could be made faster by sampling a lookup table stored in a float texture
+// with linear interpolation.
+
+vec3 SrgbToLinear(vec3 color) {
+    vec3 c1 = color / 12.92;
+    vec3 c2 = pow(color / 1.055 + vec3(0.055 / 1.055), vec3(2.4));
+    return if_then_else(lessThanEqual(color, vec3(0.04045)), c1, c2);
+}
+
+vec3 LinearToSrgb(vec3 color) {
+    vec3 c1 = color * 12.92;
+    vec3 c2 = vec3(1.055) * pow(color, vec3(1.0 / 2.4)) - vec3(0.055);
+    return if_then_else(lessThanEqual(color, vec3(0.0031308)), c1, c2);
+}
+
+// This function has to be factored out due to the following issue:
+// https://github.com/servo/webrender/wiki/Driver-issues#bug-1532245---switch-statement-inside-control-flow-inside-switch-statement-fails-to-compile-on-some-android-phones
+// (and now the words "default: default:" so angle_shader_validation.rs passes)
+vec4 ComponentTransfer(vec4 colora) {
+    // We push a different amount of data to the gpu cache depending on the
+    // function type.
+    // Identity => 0 blocks
+    // Table/Discrete => 64 blocks (256 values)
+    // Linear => 1 block (2 values)
+    // Gamma => 1 block (3 values)
+    // We loop through the color components and increment the offset (for the
+    // next color component) into the gpu cache based on how many blocks that
+    // function type put into the gpu cache.
+    // Table/Discrete use a 256 entry look up table.
+    // Linear/Gamma are a simple calculation.
+    int offset = 0;
+    vec4 texel;
+    int k;
+
+    // Dynamically indexing a vector is buggy on some devices, so use a temporary array.
+    int[4] funcs = int[4](vFuncs.r, vFuncs.g, vFuncs.b, vFuncs.a);
+    for (int i = 0; i < 4; i++) {
+        switch (funcs[i]) {
+            case COMPONENT_TRANSFER_IDENTITY:
+                break;
+            case COMPONENT_TRANSFER_TABLE:
+            case COMPONENT_TRANSFER_DISCRETE:
+                // fetch value from lookup table
+                k = int(floor(colora[i]*255.0 + 0.5));
+                texel = fetch_from_gpu_cache_1_direct(vData.xy + ivec2(offset + k/4, 0));
+                colora[i] = clamp(texel[k % 4], 0.0, 1.0);
+                // offset plus 256/4 blocks
+                offset = offset + 64;
+                break;
+            case COMPONENT_TRANSFER_LINEAR:
+                // fetch the two values for use in the linear equation
+                texel = fetch_from_gpu_cache_1_direct(vData.xy + ivec2(offset, 0));
+                colora[i] = clamp(texel[0] * colora[i] + texel[1], 0.0, 1.0);
+                // offset plus 1 block
+                offset = offset + 1;
+                break;
+            case COMPONENT_TRANSFER_GAMMA:
+                // fetch the three values for use in the gamma equation
+                texel = fetch_from_gpu_cache_1_direct(vData.xy + ivec2(offset, 0));
+                colora[i] = clamp(texel[0] * pow(colora[i], texel[1]) + texel[2], 0.0, 1.0);
+                // offset plus 1 block
+                offset = offset + 1;
+                break;
+            default:
+                // shouldn't happen
+                break;
+        }
+    }
+    return colora;
+}
+
+// Composite Filter
+
+vec4 composite(vec4 Cs, vec4 Cb, int mode) {
+    vec4 Cr = vec4(0.0, 1.0, 0.0, 1.0);
+    switch (mode) {
+        case COMPOSITE_OVER:
+            Cr.rgb = Cs.a * Cs.rgb + Cb.a * Cb.rgb * (1.0 - Cs.a);
+            Cr.a = Cs.a + Cb.a * (1.0 - Cs.a);
+            break;
+        case COMPOSITE_IN:
+            Cr.rgb = Cs.a * Cs.rgb * Cb.a;
+            Cr.a = Cs.a * Cb.a;
+            break;
+        case COMPOSITE_OUT:
+            Cr.rgb = Cs.a * Cs.rgb * (1.0 - Cb.a);
+            Cr.a = Cs.a * (1.0 - Cb.a);
+            break;
+        case COMPOSITE_ATOP:
+            Cr.rgb = Cs.a * Cs.rgb * Cb.a + Cb.a * Cb.rgb * (1.0 - Cs.a);
+            Cr.a = Cs.a * Cb.a + Cb.a * (1.0 - Cs.a);
+            break;
+        case COMPOSITE_XOR:
+            Cr.rgb = Cs.a * Cs.rgb * (1.0 - Cb.a) + Cb.a * Cb.rgb * (1.0 - Cs.a);
+            Cr.a = Cs.a * (1.0 - Cb.a) + Cb.a * (1.0 - Cs.a);
+            break;
+        case COMPOSITE_LIGHTER:
+            Cr.rgb = Cs.a * Cs.rgb + Cb.a * Cb.rgb;
+            Cr.a = Cs.a + Cb.a;
+            Cr = clamp(Cr, vec4(0.0), vec4(1.0));
+            break;
+        case COMPOSITE_ARITHMETIC:
+            Cr = vec4(vFilterData0.x) * Cs * Cb + vec4(vFilterData0.y) * Cs + vec4(vFilterData0.z) * Cb + vec4(vFilterData0.w);
+            Cr = clamp(Cr, vec4(0.0), vec4(1.0));
+            break;
+        default:
+            break;
+    }
+    return Cr;
+}
+
+vec4 sampleInUvRect(sampler2D sampler, vec2 uv, vec4 uvRect) {
+    vec2 clamped = clamp(uv.xy, uvRect.xy, uvRect.zw);
+    return texture(sampler, clamped);
+}
+
+void main(void) {
+    vec4 Ca = vec4(0.0, 0.0, 0.0, 0.0);
+    vec4 Cb = vec4(0.0, 0.0, 0.0, 0.0);
+    if (vFilterInputCount > 0) {
+        Ca = sampleInUvRect(sColor0, vInput1Uv, vInput1UvRect);
+        if (Ca.a != 0.0) {
+            Ca.rgb /= Ca.a;
+        }
+    }
+    if (vFilterInputCount > 1) {
+        Cb = sampleInUvRect(sColor1, vInput2Uv, vInput2UvRect);
+        if (Cb.a != 0.0) {
+            Cb.rgb /= Cb.a;
+        }
+    }
+
+    vec4 result = vec4(1.0, 0.0, 0.0, 1.0);
+
+    bool needsPremul = true;
+
+    switch (vFilterKind) {
+        case FILTER_BLEND:
+            result = blend(Ca, Cb, vData.x);
+            needsPremul = false;
+            break;
+        case FILTER_FLOOD:
+            result = vFilterData0;
+            needsPremul = false;
+            break;
+        case FILTER_LINEAR_TO_SRGB:
+            result.rgb = LinearToSrgb(Ca.rgb);
+            result.a = Ca.a;
+            break;
+        case FILTER_SRGB_TO_LINEAR:
+            result.rgb = SrgbToLinear(Ca.rgb);
+            result.a = Ca.a;
+            break;
+        case FILTER_OPACITY:
+            result.rgb = Ca.rgb;
+            result.a = Ca.a * vFloat0.x;
+            break;
+        case FILTER_COLOR_MATRIX:
+            result = vColorMat * Ca + vFilterData0;
+            result = clamp(result, vec4(0.0), vec4(1.0));
+            break;
+        case FILTER_DROP_SHADOW:
+            vec4 shadow = vec4(vFilterData0.rgb, Cb.a * vFilterData0.a);
+            // Normal blend + source-over coposite
+            result = blend(Ca, shadow, BlendMode_Normal);
+            needsPremul = false;
+            break;
+        case FILTER_OFFSET:
+            vec2 offsetUv = vInput1Uv + vFilterData0.xy;
+            result = sampleInUvRect(sColor0, offsetUv, vInput1UvRect);
+            result *= point_inside_rect(offsetUv, vFilterData1.xy, vFilterData1.zw);
+            needsPremul = false;
+            break;
+        case FILTER_COMPONENT_TRANSFER:
+            result = ComponentTransfer(Ca);
+            break;
+        case FILTER_IDENTITY:
+            result = Ca;
+            break;
+        case FILTER_COMPOSITE:
+            result = composite(Ca, Cb, vData.x);
+            needsPremul = false;
+        default:
+            break;
+    }
+
+    if (needsPremul) {
+        result.rgb *= result.a;
+    }
+
+    oFragColor = result;
+}
+#endif
diff --git a/gfx/wr/webrender/res/debug_color.glsl b/gfx/wr/webrender/res/debug_color.glsl
new file mode 100644
index 0000000000..b5a636e535
--- /dev/null
+++ b/gfx/wr/webrender/res/debug_color.glsl
@@ -0,0 +1,24 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,shared_other
+
+varying vec4 vColor;
+
+#ifdef WR_VERTEX_SHADER
+in vec4 aColor;
+
+void main(void) {
+    vColor = vec4(aColor.rgb * aColor.a, aColor.a);
+    vec4 pos = vec4(aPosition, 0.0, 1.0);
+    pos.xy = floor(pos.xy + 0.5);
+    gl_Position = uTransform * pos;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    oFragColor = vColor;
+}
+#endif
diff --git a/gfx/wr/webrender/res/debug_font.glsl b/gfx/wr/webrender/res/debug_font.glsl
new file mode 100644
index 0000000000..475a97dfce
--- /dev/null
+++ b/gfx/wr/webrender/res/debug_font.glsl
@@ -0,0 +1,30 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,shared_other
+
+varying vec2 vColorTexCoord;
+varying vec4 vColor;
+
+#ifdef WR_VERTEX_SHADER
+in vec4 aColor;
+in vec2 aColorTexCoord;
+
+void main(void) {
+    vColor = aColor;
+    vColorTexCoord = aColorTexCoord;
+    vec4 pos = vec4(aPosition, 0.0, 1.0);
+    pos.xy = floor(pos.xy + 0.5);
+    gl_Position = uTransform * pos;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    float alpha = texture(sColor0, vColorTexCoord).r;
+    oFragColor = vColor * alpha;
+}
+#endif
diff --git a/gfx/wr/webrender/res/ellipse.glsl b/gfx/wr/webrender/res/ellipse.glsl
new file mode 100644
index 0000000000..36d20b8a5d
--- /dev/null
+++ b/gfx/wr/webrender/res/ellipse.glsl
@@ -0,0 +1,85 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Preprocess the radii for computing the distance approximation. This should
+// be used in the vertex shader if possible to avoid doing expensive division
+// in the fragment shader. When dealing with a point (zero radii), approximate
+// it as an ellipse with very small radii so that we don't need to branch.
+vec2 inverse_radii_squared(vec2 radii) {
+    return 1.0 / max(radii * radii, 1.0e-6);
+}
+
+#ifdef WR_FRAGMENT_SHADER
+
+// One iteration of Newton's method on the 2D equation of an ellipse:
+//
+//     E(x, y) = x^2/a^2 + y^2/b^2 - 1
+//
+// The Jacobian of this equation is:
+//
+//     J(E(x, y)) = [ 2*x/a^2 2*y/b^2 ]
+//
+// We approximate the distance with:
+//
+//     E(x, y) / ||J(E(x, y))||
+//
+// See G. Taubin, "Distance Approximations for Rasterizing Implicit
+// Curves", section 3.
+//
+// A scale relative to the unit scale of the ellipse may be passed in to cause
+// the math to degenerate to length(p) when scale is 0, or otherwise give the
+// normal distance approximation if scale is 1.
+float distance_to_ellipse_approx(vec2 p, vec2 inv_radii_sq, float scale) {
+    vec2 p_r = p * inv_radii_sq;
+    float g = dot(p, p_r) - scale;
+    vec2 dG = (1.0 + scale) * p_r;
+    return g * inversesqrt(dot(dG, dG));
+}
+
+// Slower but more accurate version that uses the exact distance when dealing
+// with a 0-radius point distance and otherwise uses the faster approximation
+// when dealing with non-zero radii.
+float distance_to_ellipse(vec2 p, vec2 radii) {
+    return distance_to_ellipse_approx(p, inverse_radii_squared(radii),
+                                      float(all(greaterThan(radii, vec2(0.0)))));
+}
+
+float distance_to_rounded_rect(
+    vec2 pos,
+    vec4 center_radius_tl,
+    vec4 center_radius_tr,
+    vec4 center_radius_br,
+    vec4 center_radius_bl,
+    vec4 rect_bounds
+) {
+    // Clip against each ellipse. If the fragment is in a corner, one of the
+    // branches below will select it as the corner to calculate the distance
+    // to. We want to choose the smallest distance inside either of the axis
+    // bounds as the overall distance we use to compare which corner is closer
+    // than another. If outside any ellipse, default to a small offset so a
+    // negative distance is returned for it.
+    vec4 corner = vec4(vec2(1.0e-6), vec2(1.0));
+    center_radius_tl.xy = center_radius_tl.xy - pos;
+    center_radius_tr.xy = (center_radius_tr.xy - pos) * vec2(-1.0, 1.0);
+    center_radius_br.xy = pos - center_radius_br.xy;
+    center_radius_bl.xy = (center_radius_bl.xy - pos) * vec2(1.0, -1.0);
+    if (min(center_radius_tl.x, center_radius_tl.y) > min(corner.x, corner.y)) {
+        corner = center_radius_tl;
+    }
+    if (min(center_radius_tr.x, center_radius_tr.y) > min(corner.x, corner.y)) {
+        corner = center_radius_tr;
+    }
+    if (min(center_radius_br.x, center_radius_br.y) > min(corner.x, corner.y)) {
+        corner = center_radius_br;
+    }
+    if (min(center_radius_bl.x, center_radius_bl.y) > min(corner.x, corner.y)) {
+        corner = center_radius_bl;
+    }
+
+    // Calculate the distance of the selected corner and the rectangle bounds,
+    // whichever is greater.
+    return max(distance_to_ellipse_approx(corner.xy, corner.zw, 1.0),
+               signed_distance_rect(pos, rect_bounds.xy, rect_bounds.zw));
+}
+#endif
diff --git a/gfx/wr/webrender/res/gpu_buffer.glsl b/gfx/wr/webrender/res/gpu_buffer.glsl
new file mode 100644
index 0000000000..fa9ba6f318
--- /dev/null
+++ b/gfx/wr/webrender/res/gpu_buffer.glsl
@@ -0,0 +1,23 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuBuffer;
+
+ivec2 get_gpu_buffer_uv(HIGHP_FS_ADDRESS int address) {
+    return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,
+                 uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH);
+}
+
+vec4 fetch_from_gpu_buffer_1(HIGHP_FS_ADDRESS int address) {
+    ivec2 uv = get_gpu_buffer_uv(address);
+    return texelFetch(sGpuBuffer, uv, 0);
+}
+
+vec4[2] fetch_from_gpu_buffer_2(HIGHP_FS_ADDRESS int address) {
+    ivec2 uv = get_gpu_buffer_uv(address);
+    return vec4[2](
+        TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(1, 0))
+    );
+}
diff --git a/gfx/wr/webrender/res/gpu_cache.glsl b/gfx/wr/webrender/res/gpu_cache.glsl
new file mode 100644
index 0000000000..cd5e41fec4
--- /dev/null
+++ b/gfx/wr/webrender/res/gpu_cache.glsl
@@ -0,0 +1,137 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuCache;
+
+#define VECS_PER_IMAGE_RESOURCE     2
+
+// TODO(gw): This is here temporarily while we have
+//           both GPU store and cache. When the GPU
+//           store code is removed, we can change the
+//           PrimitiveInstance instance structure to
+//           use 2x unsigned shorts as vertex attributes
+//           instead of an int, and encode the UV directly
+//           in the vertices.
+ivec2 get_gpu_cache_uv(HIGHP_FS_ADDRESS int address) {
+    return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,
+                 uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH);
+}
+
+vec4[2] fetch_from_gpu_cache_2_direct(ivec2 address) {
+    return vec4[2](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0))
+    );
+}
+
+vec4[2] fetch_from_gpu_cache_2(HIGHP_FS_ADDRESS int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[2](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0))
+    );
+}
+
+vec4 fetch_from_gpu_cache_1_direct(ivec2 address) {
+    return texelFetch(sGpuCache, address, 0);
+}
+
+vec4 fetch_from_gpu_cache_1(HIGHP_FS_ADDRESS int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return texelFetch(sGpuCache, uv, 0);
+}
+
+#ifdef WR_VERTEX_SHADER
+
+vec4[8] fetch_from_gpu_cache_8(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[8](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(4, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(5, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(6, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(7, 0))
+    );
+}
+
+vec4[3] fetch_from_gpu_cache_3(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[3](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0))
+    );
+}
+
+vec4[3] fetch_from_gpu_cache_3_direct(ivec2 address) {
+    return vec4[3](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0))
+    );
+}
+
+vec4[4] fetch_from_gpu_cache_4_direct(ivec2 address) {
+    return vec4[4](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(3, 0))
+    );
+}
+
+vec4[4] fetch_from_gpu_cache_4(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[4](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0))
+    );
+}
+
+//TODO: image resource is too specific for this module
+
+struct ImageSource {
+    RectWithEndpoint uv_rect;
+    vec4 user_data;
+};
+
+ImageSource fetch_image_source(int address) {
+    //Note: number of blocks has to match `renderer::BLOCKS_PER_UV_RECT`
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
+    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
+    return ImageSource(uv_rect, data[1]);
+}
+
+ImageSource fetch_image_source_direct(ivec2 address) {
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
+    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
+    return ImageSource(uv_rect, data[1]);
+}
+
+// Fetch optional extra data for a texture cache resource. This can contain
+// a polygon defining a UV rect within the texture cache resource.
+// Note: the polygon coordinates are in homogeneous space.
+struct ImageSourceExtra {
+    vec4 st_tl;
+    vec4 st_tr;
+    vec4 st_bl;
+    vec4 st_br;
+};
+
+ImageSourceExtra fetch_image_source_extra(int address) {
+    vec4 data[4] = fetch_from_gpu_cache_4(address + VECS_PER_IMAGE_RESOURCE);
+    return ImageSourceExtra(
+        data[0],
+        data[1],
+        data[2],
+        data[3]
+    );
+}
+
+#endif //WR_VERTEX_SHADER
diff --git a/gfx/wr/webrender/res/gpu_cache_update.glsl b/gfx/wr/webrender/res/gpu_cache_update.glsl
new file mode 100644
index 0000000000..90a8534246
--- /dev/null
+++ b/gfx/wr/webrender/res/gpu_cache_update.glsl
@@ -0,0 +1,27 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include base
+
+varying vec4 vData;
+
+#ifdef WR_VERTEX_SHADER
+in vec4 aValue;
+in vec2 aPosition;
+
+void main() {
+    vData = aValue;
+    gl_Position = vec4(aPosition * 2.0 - 1.0, 0.0, 1.0);
+    gl_PointSize = 1.0;
+}
+
+#endif //WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+out vec4 oValue;
+
+void main() {
+    oValue = vData;
+}
+#endif //WR_FRAGMENT_SHADER
diff --git a/gfx/wr/webrender/res/gradient.glsl b/gfx/wr/webrender/res/gradient.glsl
new file mode 100644
index 0000000000..103059fdf9
--- /dev/null
+++ b/gfx/wr/webrender/res/gradient.glsl
@@ -0,0 +1,63 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Gradient GPU cache address.
+// Packed in to a vector to work around bug 1630356.
+flat varying HIGHP_FS_ADDRESS ivec2 v_gradient_address;
+// Repetition along the gradient stops.
+// Packed in to a vector to work around bug 1630356.
+flat varying vec2 v_gradient_repeat;
+
+#ifdef WR_FRAGMENT_SHADER
+
+#ifdef WR_FEATURE_DITHERING
+vec4 dither(vec4 color) {
+    const int matrix_mask = 7;
+
+    ivec2 pos = ivec2(gl_FragCoord.xy) & ivec2(matrix_mask);
+    float noise_normalized = (texelFetch(sDither, pos, 0).r * 255.0 + 0.5) / 64.0;
+    float noise = (noise_normalized - 0.5) / 256.0; // scale down to the unit length
+
+    return color + vec4(noise, noise, noise, 0);
+}
+#else
+vec4 dither(vec4 color) {
+    return color;
+}
+#endif //WR_FEATURE_DITHERING
+
+#define GRADIENT_ENTRIES 128.0
+
+float clamp_gradient_entry(float offset) {
+    // Calculate the color entry index to use for this offset:
+    //     offsets < 0 use the first color entry, 0
+    //     offsets from [0, 1) use the color entries in the range of [1, N-1)
+    //     offsets >= 1 use the last color entry, N-1
+    //     so transform the range [0, 1) -> [1, N-1)
+
+    // TODO(gw): In the future we might consider making the size of the
+    // LUT vary based on number / distribution of stops in the gradient.
+    // Ensure we don't fetch outside the valid range of the LUT.
+    return clamp(1.0 + offset * GRADIENT_ENTRIES, 0.0, 1.0 + GRADIENT_ENTRIES);
+}
+
+vec4 sample_gradient(float offset) {
+    // Modulo the offset if the gradient repeats.
+    offset -= floor(offset) * v_gradient_repeat.x;
+
+    // Calculate the texel to index into the gradient color entries:
+    //     floor(x) is the gradient color entry index
+    //     fract(x) is the linear filtering factor between start and end
+    float x = clamp_gradient_entry(offset);
+    float entry_index = floor(x);
+    float entry_fract = x - entry_index;
+
+    // Fetch the start and end color. There is a [start, end] color per entry.
+    vec4 texels[2] = fetch_from_gpu_buffer_2(v_gradient_address.x + 2 * int(entry_index));
+
+    // Finally interpolate and apply dithering
+    return dither(texels[0] + texels[1] * entry_fract);
+}
+
+#endif //WR_FRAGMENT_SHADER
diff --git a/gfx/wr/webrender/res/gradient_shared.glsl b/gfx/wr/webrender/res/gradient_shared.glsl
new file mode 100644
index 0000000000..f32502a9d1
--- /dev/null
+++ b/gfx/wr/webrender/res/gradient_shared.glsl
@@ -0,0 +1,78 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include gradient
+
+// Size of the gradient pattern's rectangle, used to compute horizontal and vertical
+// repetitions. Not to be confused with another kind of repetition of the pattern
+// which happens along the gradient stops.
+flat varying vec2 v_repeated_size;
+
+varying vec2 v_pos;
+
+#ifdef WR_FEATURE_ALPHA_PASS
+flat varying vec2 v_tile_repeat;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+void write_gradient_vertex(
+    VertexInfo vi,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int brush_flags,
+    vec4 texel_rect,
+    int extend_mode,
+    vec2 stretch_size
+) {
+    if ((brush_flags & BRUSH_FLAG_SEGMENT_RELATIVE) != 0) {
+        v_pos = (vi.local_pos - segment_rect.p0) / rect_size(segment_rect);
+        v_pos = v_pos * (texel_rect.zw - texel_rect.xy) + texel_rect.xy;
+        v_pos = v_pos * rect_size(local_rect);
+    } else {
+        v_pos = vi.local_pos - local_rect.p0;
+    }
+
+    vec2 tile_repeat = rect_size(local_rect) / stretch_size;
+    v_repeated_size = stretch_size;
+
+    // Normalize UV to 0..1 scale.
+    v_pos /= v_repeated_size;
+
+    v_gradient_address.x = prim_user_data.x;
+
+    // Whether to repeat the gradient along the line instead of clamping.
+    v_gradient_repeat.x = float(extend_mode == EXTEND_MODE_REPEAT);
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    v_tile_repeat = tile_repeat;
+#endif
+}
+#endif //WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+vec2 compute_repeated_pos() {
+#if defined(WR_FEATURE_ALPHA_PASS) && !defined(SWGL_ANTIALIAS)
+    // Handle top and left inflated edges (see brush_image).
+    vec2 local_pos = max(v_pos, vec2(0.0));
+
+    // Apply potential horizontal and vertical repetitions.
+    vec2 pos = fract(local_pos);
+
+    // Handle bottom and right inflated edges (see brush_image).
+    if (local_pos.x >= v_tile_repeat.x) {
+        pos.x = 1.0;
+    }
+    if (local_pos.y >= v_tile_repeat.y) {
+        pos.y = 1.0;
+    }
+    return pos;
+#else
+    // Apply potential horizontal and vertical repetitions.
+    return fract(v_pos);
+#endif
+}
+
+#endif //WR_FRAGMENT_SHADER
+
diff --git a/gfx/wr/webrender/res/prim_shared.glsl b/gfx/wr/webrender/res/prim_shared.glsl
new file mode 100644
index 0000000000..f8c42892e1
--- /dev/null
+++ b/gfx/wr/webrender/res/prim_shared.glsl
@@ -0,0 +1,250 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include rect,render_task,gpu_cache,transform
+
+#define EXTEND_MODE_CLAMP  0
+#define EXTEND_MODE_REPEAT 1
+
+#define SUBPX_DIR_NONE        0
+#define SUBPX_DIR_HORIZONTAL  1
+#define SUBPX_DIR_VERTICAL    2
+#define SUBPX_DIR_MIXED       3
+
+#define RASTER_LOCAL            0
+#define RASTER_SCREEN           1
+
+uniform sampler2D sClipMask;
+
+#ifndef SWGL_CLIP_MASK
+// TODO: convert back to RectWithEndpoint if driver issues are resolved, if ever.
+flat varying vec4 vClipMaskUvBounds;
+varying vec2 vClipMaskUv;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+
+#define COLOR_MODE_FROM_PASS            0
+#define COLOR_MODE_ALPHA                1
+#define COLOR_MODE_SUBPX_BG_PASS0       2
+#define COLOR_MODE_SUBPX_BG_PASS1       3
+#define COLOR_MODE_SUBPX_BG_PASS2       4
+#define COLOR_MODE_SUBPX_DUAL_SOURCE    5
+#define COLOR_MODE_BITMAP_SHADOW        6
+#define COLOR_MODE_COLOR_BITMAP         7
+#define COLOR_MODE_IMAGE                8
+#define COLOR_MODE_MULTIPLY_DUAL_SOURCE 9
+
+uniform HIGHP_SAMPLER_FLOAT sampler2D sPrimitiveHeadersF;
+uniform HIGHP_SAMPLER_FLOAT isampler2D sPrimitiveHeadersI;
+
+// Instanced attributes
+PER_INSTANCE in ivec4 aData;
+
+#define VECS_PER_PRIM_HEADER_F 2U
+#define VECS_PER_PRIM_HEADER_I 2U
+
+struct Instance
+{
+    int prim_header_address;
+    int picture_task_address;
+    int clip_address;
+    int segment_index;
+    int flags;
+    int resource_address;
+    int brush_kind;
+};
+
+Instance decode_instance_attributes() {
+    Instance instance;
+
+    instance.prim_header_address = aData.x;
+    instance.picture_task_address = aData.y >> 16;
+    instance.clip_address = aData.y & 0xffff;
+    instance.segment_index = aData.z & 0xffff;
+    instance.flags = aData.z >> 16;
+    instance.resource_address = aData.w & 0xffffff;
+    instance.brush_kind = aData.w >> 24;
+
+    return instance;
+}
+
+struct PrimitiveHeader {
+    RectWithEndpoint local_rect;
+    RectWithEndpoint local_clip_rect;
+    float z;
+    int specific_prim_address;
+    int transform_id;
+    ivec4 user_data;
+};
+
+PrimitiveHeader fetch_prim_header(int index) {
+    PrimitiveHeader ph;
+
+    ivec2 uv_f = get_fetch_uv(index, VECS_PER_PRIM_HEADER_F);
+    vec4 local_rect = TEXEL_FETCH(sPrimitiveHeadersF, uv_f, 0, ivec2(0, 0));
+    vec4 local_clip_rect = TEXEL_FETCH(sPrimitiveHeadersF, uv_f, 0, ivec2(1, 0));
+    ph.local_rect = RectWithEndpoint(local_rect.xy, local_rect.zw);
+    ph.local_clip_rect = RectWithEndpoint(local_clip_rect.xy, local_clip_rect.zw);
+
+    ivec2 uv_i = get_fetch_uv(index, VECS_PER_PRIM_HEADER_I);
+    ivec4 data0 = TEXEL_FETCH(sPrimitiveHeadersI, uv_i, 0, ivec2(0, 0));
+    ivec4 data1 = TEXEL_FETCH(sPrimitiveHeadersI, uv_i, 0, ivec2(1, 0));
+    ph.z = float(data0.x);
+    ph.specific_prim_address = data0.y;
+    ph.transform_id = data0.z;
+    ph.user_data = data1;
+
+    return ph;
+}
+
+struct VertexInfo {
+    vec2 local_pos;
+    vec4 world_pos;
+};
+
+VertexInfo write_vertex(vec2 local_pos,
+                        RectWithEndpoint local_clip_rect,
+                        float z,
+                        Transform transform,
+                        PictureTask task) {
+    // Clamp to the two local clip rects.
+    vec2 clamped_local_pos = rect_clamp(local_clip_rect, local_pos);
+
+    // Transform the current vertex to world space.
+    vec4 world_pos = transform.m * vec4(clamped_local_pos, 0.0, 1.0);
+
+    // Convert the world positions to device pixel space.
+    vec2 device_pos = world_pos.xy * task.device_pixel_scale;
+
+    // Apply offsets for the render task to get correct screen location.
+    vec2 final_offset = -task.content_origin + task.task_rect.p0;
+
+    gl_Position = uTransform * vec4(device_pos + final_offset * world_pos.w, z * world_pos.w, world_pos.w);
+
+    VertexInfo vi = VertexInfo(
+        clamped_local_pos,
+        world_pos
+    );
+
+    return vi;
+}
+
+RectWithEndpoint clip_and_init_antialiasing(RectWithEndpoint segment_rect,
+                                            RectWithEndpoint prim_rect,
+                                            RectWithEndpoint clip_rect,
+                                            int edge_flags,
+                                            float z,
+                                            Transform transform,
+                                            PictureTask task) {
+#ifdef SWGL_ANTIALIAS
+    // Check if the bounds are smaller than the unmodified segment rect. If so,
+    // it is safe to enable AA on those edges.
+    bvec4 clipped = bvec4(greaterThan(clip_rect.p0, segment_rect.p0),
+                          lessThan(clip_rect.p1, segment_rect.p1));
+    swgl_antiAlias(edge_flags | (clipped.x ? 1 : 0) | (clipped.y ? 2 : 0) |
+                   (clipped.z ? 4 : 0) | (clipped.w ? 8 : 0));
+#endif
+
+    segment_rect.p0 = clamp(segment_rect.p0, clip_rect.p0, clip_rect.p1);
+    segment_rect.p1 = clamp(segment_rect.p1, clip_rect.p0, clip_rect.p1);
+
+#ifndef SWGL_ANTIALIAS
+    prim_rect.p0 = clamp(prim_rect.p0, clip_rect.p0, clip_rect.p1);
+    prim_rect.p1 = clamp(prim_rect.p1, clip_rect.p0, clip_rect.p1);
+
+    // Select between the segment and prim edges based on edge mask.
+    // We must perform the bitwise-and for each component individually, as a
+    // vector bitwise-and followed by conversion to bvec4 causes shader
+    // compilation crashes on some Adreno devices. See bug 1715746.
+    bvec4 clip_edge_mask = bvec4(bool(edge_flags & 1), bool(edge_flags & 2), bool(edge_flags & 4), bool(edge_flags & 8));
+    init_transform_vs(mix(
+        vec4(vec2(-1e16), vec2(1e16)),
+        vec4(segment_rect.p0, segment_rect.p1),
+        clip_edge_mask
+    ));
+
+    // As this is a transform shader, extrude by 2 (local space) pixels
+    // in each direction. This gives enough space around the edge to
+    // apply distance anti-aliasing. Technically, it:
+    // (a) slightly over-estimates the number of required pixels in the simple case.
+    // (b) might not provide enough edge in edge case perspective projections.
+    // However, it's fast and simple. If / when we ever run into issues, we
+    // can do some math on the projection matrix to work out a variable
+    // amount to extrude.
+
+    // Only extrude along edges where we are going to apply AA.
+    float extrude_amount = 2.0;
+    vec4 extrude_distance = mix(vec4(0.0), vec4(extrude_amount), clip_edge_mask);
+    segment_rect.p0 -= extrude_distance.xy;
+    segment_rect.p1 += extrude_distance.zw;
+#endif
+
+    return segment_rect;
+}
+
+void write_clip(vec4 world_pos, ClipArea area, PictureTask task) {
+#ifdef SWGL_CLIP_MASK
+    swgl_clipMask(
+        sClipMask,
+        (task.task_rect.p0 - task.content_origin) - (area.task_rect.p0 - area.screen_origin),
+        area.task_rect.p0,
+        rect_size(area.task_rect)
+    );
+#else
+    vec2 uv = world_pos.xy * area.device_pixel_scale +
+        world_pos.w * (area.task_rect.p0 - area.screen_origin);
+    vClipMaskUvBounds = vec4(
+        area.task_rect.p0,
+        area.task_rect.p1
+    );
+    vClipMaskUv = uv;
+#endif
+}
+
+// Read the exta image data containing the homogeneous screen space coordinates
+// of the corners, interpolate between them, and return real screen space UV.
+vec2 get_image_quad_uv(int address, vec2 f) {
+    ImageSourceExtra extra_data = fetch_image_source_extra(address);
+    vec4 x = mix(extra_data.st_tl, extra_data.st_tr, f.x);
+    vec4 y = mix(extra_data.st_bl, extra_data.st_br, f.x);
+    vec4 z = mix(x, y, f.y);
+    return z.xy / z.w;
+}
+#endif //WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+
+struct Fragment {
+    vec4 color;
+#ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+    vec4 blend;
+#endif
+};
+
+float do_clip() {
+#ifdef SWGL_CLIP_MASK
+    // SWGL relies on builtin clip-mask support to do this more efficiently,
+    // so no clipping is required here.
+    return 1.0;
+#else
+    // check for the dummy bounds, which are given to the opaque objects
+    if (vClipMaskUvBounds.xy == vClipMaskUvBounds.zw) {
+        return 1.0;
+    }
+    // anything outside of the mask is considered transparent
+    //Note: we assume gl_FragCoord.w == interpolated(1 / vClipMaskUv.w)
+    vec2 mask_uv = vClipMaskUv * gl_FragCoord.w;
+    bvec2 left = lessThanEqual(vClipMaskUvBounds.xy, mask_uv); // inclusive
+    bvec2 right = greaterThan(vClipMaskUvBounds.zw, mask_uv); // non-inclusive
+    // bail out if the pixel is outside the valid bounds
+    if (!all(bvec4(left, right))) {
+        return 0.0;
+    }
+    // finally, the slow path - fetch the mask value from an image
+    return texelFetch(sClipMask, ivec2(mask_uv), 0).r;
+#endif
+}
+
+#endif //WR_FRAGMENT_SHADER
diff --git a/gfx/wr/webrender/res/ps_clear.glsl b/gfx/wr/webrender/res/ps_clear.glsl
new file mode 100644
index 0000000000..0ef691a55e
--- /dev/null
+++ b/gfx/wr/webrender/res/ps_clear.glsl
@@ -0,0 +1,25 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared
+
+varying vec4 vColor;
+
+#ifdef WR_VERTEX_SHADER
+PER_INSTANCE in vec4 aRect;
+PER_INSTANCE in vec4 aColor;
+
+void main(void) {
+    vec2 pos = mix(aRect.xy, aRect.zw, aPosition.xy);
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+    gl_Position.z = gl_Position.w; // force depth clear to 1.0
+    vColor = aColor;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    oFragColor = vColor;
+}
+#endif
diff --git a/gfx/wr/webrender/res/ps_copy.glsl b/gfx/wr/webrender/res/ps_copy.glsl
new file mode 100644
index 0000000000..7bcb469b5c
--- /dev/null
+++ b/gfx/wr/webrender/res/ps_copy.glsl
@@ -0,0 +1,41 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include base
+
+#ifdef WR_VERTEX_SHADER
+
+attribute vec2 aPosition;
+
+// See CopyInstance struct.
+attribute vec4 a_src_rect;
+attribute vec4 a_dst_rect;
+attribute vec2 a_dst_texture_size;
+
+varying vec2 v_uv;
+
+void main(void) {
+    // We use texel fetch so v_uv is in unnormalized device space.
+    v_uv = mix(a_src_rect.xy, a_src_rect.zw, aPosition.xy);
+
+    // Transform into framebuffer [-1, 1] space.
+    vec2 pos = mix(a_dst_rect.xy, a_dst_rect.zw, aPosition.xy);
+    gl_Position = vec4(pos / (a_dst_texture_size  * 0.5) - vec2(1.0, 1.0), 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+
+out vec4 oFragColor;
+
+varying vec2 v_uv;
+
+uniform sampler2D sColor0;
+
+void main(void) {
+    oFragColor = texelFetch(sColor0, ivec2(v_uv), 0);
+}
+
+#endif
diff --git a/gfx/wr/webrender/res/ps_split_composite.glsl b/gfx/wr/webrender/res/ps_split_composite.glsl
new file mode 100644
index 0000000000..4b134950f5
--- /dev/null
+++ b/gfx/wr/webrender/res/ps_split_composite.glsl
@@ -0,0 +1,134 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,prim_shared
+
+// interpolated UV coordinates to sample.
+varying vec2 vUv;
+
+// Flag to allow perspective interpolation of UV.
+// Packed in to a vector to work around bug 1630356.
+flat varying vec2 vPerspective;
+
+flat varying vec4 vUvSampleBounds;
+
+#ifdef WR_VERTEX_SHADER
+struct SplitGeometry {
+    vec2 local[4];
+};
+
+SplitGeometry fetch_split_geometry(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+
+    vec4 data0 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0));
+    vec4 data1 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0));
+
+    SplitGeometry geo;
+    geo.local = vec2[4](
+        data0.xy,
+        data0.zw,
+        data1.xy,
+        data1.zw
+    );
+
+    return geo;
+}
+
+vec2 bilerp(vec2 a, vec2 b, vec2 c, vec2 d, float s, float t) {
+    vec2 x = mix(a, b, t);
+    vec2 y = mix(c, d, t);
+    return mix(x, y, s);
+}
+
+struct SplitCompositeInstance {
+    int prim_header_index;
+    int polygons_address;
+    float z;
+    int render_task_index;
+};
+
+SplitCompositeInstance fetch_composite_instance() {
+    SplitCompositeInstance ci;
+
+    ci.prim_header_index = aData.x;
+    ci.polygons_address = aData.y;
+    ci.z = float(aData.z);
+    ci.render_task_index = aData.w;
+
+    return ci;
+}
+
+void main(void) {
+    SplitCompositeInstance ci = fetch_composite_instance();
+    SplitGeometry geometry = fetch_split_geometry(ci.polygons_address);
+    PrimitiveHeader ph = fetch_prim_header(ci.prim_header_index);
+    PictureTask dest_task = fetch_picture_task(ci.render_task_index);
+    Transform transform = fetch_transform(ph.transform_id);
+    ImageSource res = fetch_image_source(ph.user_data.x);
+    ClipArea clip_area = fetch_clip_area(ph.user_data.w);
+
+    vec2 dest_origin = dest_task.task_rect.p0 -
+                       dest_task.content_origin;
+
+    vec2 local_pos = bilerp(geometry.local[0], geometry.local[1],
+                            geometry.local[3], geometry.local[2],
+                            aPosition.y, aPosition.x);
+    vec4 world_pos = transform.m * vec4(local_pos, 0.0, 1.0);
+
+    vec4 final_pos = vec4(
+        dest_origin * world_pos.w + world_pos.xy * dest_task.device_pixel_scale,
+        world_pos.w * ci.z,
+        world_pos.w
+    );
+
+    write_clip(
+        world_pos,
+        clip_area,
+        dest_task
+    );
+
+    gl_Position = uTransform * final_pos;
+
+    vec2 texture_size = vec2(TEX_SIZE(sColor0));
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+
+    vec2 min_uv = min(uv0, uv1);
+    vec2 max_uv = max(uv0, uv1);
+
+    vUvSampleBounds = vec4(
+        min_uv + vec2(0.5),
+        max_uv - vec2(0.5)
+    ) / texture_size.xyxy;
+
+    vec2 f = (local_pos - ph.local_rect.p0) / rect_size(ph.local_rect);
+    f = get_image_quad_uv(ph.user_data.x, f);
+    vec2 uv = mix(uv0, uv1, f);
+    float perspective_interpolate = float(ph.user_data.y);
+
+    vUv = uv / texture_size * mix(gl_Position.w, 1.0, perspective_interpolate);
+    vPerspective.x = perspective_interpolate;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    float alpha = do_clip();
+    float perspective_divisor = mix(gl_FragCoord.w, 1.0, vPerspective.x);
+    vec2 uv = clamp(vUv * perspective_divisor, vUvSampleBounds.xy, vUvSampleBounds.zw);
+    write_output(alpha * texture(sColor0, uv));
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+    float perspective_divisor = mix(swgl_forceScalar(gl_FragCoord.w), 1.0, vPerspective.x);
+    vec2 uv = vUv * perspective_divisor;
+
+    swgl_commitTextureRGBA8(sColor0, uv, vUvSampleBounds);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/ps_text_run.glsl b/gfx/wr/webrender/res/ps_text_run.glsl
new file mode 100644
index 0000000000..e7faa1247b
--- /dev/null
+++ b/gfx/wr/webrender/res/ps_text_run.glsl
@@ -0,0 +1,354 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,prim_shared
+
+flat varying vec4 v_color;
+flat varying vec3 v_mask_swizzle;
+// Normalized bounds of the source image in the texture.
+flat varying vec4 v_uv_bounds;
+
+// Interpolated UV coordinates to sample.
+varying vec2 v_uv;
+
+
+#if defined(WR_FEATURE_GLYPH_TRANSFORM) && !defined(SWGL_CLIP_DIST)
+varying vec4 v_uv_clip;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+
+#define VECS_PER_TEXT_RUN           2
+#define GLYPHS_PER_GPU_BLOCK        2U
+
+#ifdef WR_FEATURE_GLYPH_TRANSFORM
+RectWithEndpoint transform_rect(RectWithEndpoint rect, mat2 transform) {
+    vec2 size = rect_size(rect);
+    vec2 center = transform * (rect.p0 + size * 0.5);
+    vec2 radius = mat2(abs(transform[0]), abs(transform[1])) * (size * 0.5);
+    return RectWithEndpoint(center - radius, center + radius);
+}
+
+bool rect_inside_rect(RectWithEndpoint little, RectWithEndpoint big) {
+    return all(lessThanEqual(vec4(big.p0, little.p1), vec4(little.p0, big.p1)));
+}
+#endif //WR_FEATURE_GLYPH_TRANSFORM
+
+struct Glyph {
+    vec2 offset;
+};
+
+Glyph fetch_glyph(int specific_prim_address,
+                  int glyph_index) {
+    // Two glyphs are packed in each texel in the GPU cache.
+    int glyph_address = specific_prim_address +
+                        VECS_PER_TEXT_RUN +
+                        int(uint(glyph_index) / GLYPHS_PER_GPU_BLOCK);
+    vec4 data = fetch_from_gpu_cache_1(glyph_address);
+    // Select XY or ZW based on glyph index.
+    vec2 glyph = mix(data.xy, data.zw,
+                     bvec2(uint(glyph_index) % GLYPHS_PER_GPU_BLOCK == 1U));
+
+    return Glyph(glyph);
+}
+
+struct GlyphResource {
+    vec4 uv_rect;
+    vec2 offset;
+    float scale;
+};
+
+GlyphResource fetch_glyph_resource(int address) {
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
+    return GlyphResource(data[0], data[1].xy, data[1].z);
+}
+
+struct TextRun {
+    vec4 color;
+    vec4 bg_color;
+};
+
+TextRun fetch_text_run(int address) {
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
+    return TextRun(data[0], data[1]);
+}
+
+vec2 get_snap_bias(int subpx_dir) {
+    // In subpixel mode, the subpixel offset has already been
+    // accounted for while rasterizing the glyph. However, we
+    // must still round with a subpixel bias rather than rounding
+    // to the nearest whole pixel, depending on subpixel direciton.
+    switch (subpx_dir) {
+        case SUBPX_DIR_NONE:
+        default:
+            return vec2(0.5);
+        case SUBPX_DIR_HORIZONTAL:
+            // Glyphs positioned [-0.125, 0.125] get a
+            // subpx position of zero. So include that
+            // offset in the glyph position to ensure
+            // we round to the correct whole position.
+            return vec2(0.125, 0.5);
+        case SUBPX_DIR_VERTICAL:
+            return vec2(0.5, 0.125);
+        case SUBPX_DIR_MIXED:
+            return vec2(0.125);
+    }
+}
+
+void main() {
+    Instance instance = decode_instance_attributes();
+    PrimitiveHeader ph = fetch_prim_header(instance.prim_header_address);
+    Transform transform = fetch_transform(ph.transform_id);
+    ClipArea clip_area = fetch_clip_area(instance.clip_address);
+    PictureTask task = fetch_picture_task(instance.picture_task_address);
+
+    int glyph_index = instance.segment_index;
+    int subpx_dir = (instance.flags >> 8) & 0xff;
+    int color_mode = instance.flags & 0xff;
+
+    // Note that the reference frame relative offset is stored in the prim local
+    // rect size during batching, instead of the actual size of the primitive.
+    TextRun text = fetch_text_run(ph.specific_prim_address);
+    vec2 text_offset = ph.local_rect.p1;
+
+    if (color_mode == COLOR_MODE_FROM_PASS) {
+        color_mode = uMode;
+    }
+
+    // Note that the unsnapped reference frame relative offset has already
+    // been subtracted from the prim local rect origin during batching.
+    // It was done this way to avoid pushing both the snapped and the
+    // unsnapped offsets to the shader.
+    Glyph glyph = fetch_glyph(ph.specific_prim_address, glyph_index);
+    glyph.offset += ph.local_rect.p0;
+
+    GlyphResource res = fetch_glyph_resource(instance.resource_address);
+
+    vec2 snap_bias = get_snap_bias(subpx_dir);
+
+    // Glyph space refers to the pixel space used by glyph rasterization during frame
+    // building. If a non-identity transform was used, WR_FEATURE_GLYPH_TRANSFORM will
+    // be set. Otherwise, regardless of whether the raster space is LOCAL or SCREEN,
+    // we ignored the transform during glyph rasterization, and need to snap just using
+    // the device pixel scale and the raster scale.
+#ifdef WR_FEATURE_GLYPH_TRANSFORM
+    // Transform from local space to glyph space.
+    mat2 glyph_transform = mat2(transform.m) * task.device_pixel_scale;
+    vec2 glyph_translation = transform.m[3].xy * task.device_pixel_scale;
+
+    // Transform from glyph space back to local space.
+    mat2 glyph_transform_inv = inverse(glyph_transform);
+
+    // Glyph raster pixels include the impact of the transform. This path can only be
+    // entered for 3d transforms that can be coerced into a 2d transform; they have no
+    // perspective, and have a 2d inverse. This is a looser condition than axis aligned
+    // transforms because it also allows 2d rotations.
+    vec2 raster_glyph_offset = floor(glyph_transform * glyph.offset + snap_bias);
+
+    // We want to eliminate any subpixel translation in device space to ensure glyph
+    // snapping is stable for equivalent glyph subpixel positions. Note that we must take
+    // into account the translation from the transform for snapping purposes.
+    vec2 raster_text_offset = floor(glyph_transform * text_offset + glyph_translation + 0.5) - glyph_translation;
+
+    vec2 glyph_origin = res.offset + raster_glyph_offset + raster_text_offset;
+    // Compute the glyph rect in glyph space.
+    RectWithEndpoint glyph_rect = RectWithEndpoint(
+        glyph_origin,
+        glyph_origin + res.uv_rect.zw - res.uv_rect.xy
+    );
+
+    // The glyph rect is in glyph space, so transform it back to local space.
+    RectWithEndpoint local_rect = transform_rect(glyph_rect, glyph_transform_inv);
+
+    // Select the corner of the glyph's local space rect that we are processing.
+    vec2 local_pos = mix(local_rect.p0, local_rect.p1, aPosition.xy);
+
+    // If the glyph's local rect would fit inside the local clip rect, then select a corner from
+    // the device space glyph rect to reduce overdraw of clipped pixels in the fragment shader.
+    // Otherwise, fall back to clamping the glyph's local rect to the local clip rect.
+    if (rect_inside_rect(local_rect, ph.local_clip_rect)) {
+        local_pos = glyph_transform_inv * mix(glyph_rect.p0, glyph_rect.p1, aPosition.xy);
+    }
+#else
+    float raster_scale = float(ph.user_data.x) / 65535.0;
+
+    // Scale in which the glyph is snapped when rasterized.
+    float glyph_raster_scale = raster_scale * task.device_pixel_scale;
+
+    // Scale from glyph space to local space.
+    float glyph_scale_inv = res.scale / glyph_raster_scale;
+
+    // Glyph raster pixels do not include the impact of the transform. Instead it was
+    // replaced with an identity transform during glyph rasterization. As such only the
+    // impact of the raster scale (if in local space) and the device pixel scale (for both
+    // local and screen space) are included.
+    //
+    // This implies one or more of the following conditions:
+    // - The transform is an identity. In that case, setting WR_FEATURE_GLYPH_TRANSFORM
+    //   should have the same output result as not. We just distingush which path to use
+    //   based on the transform used during glyph rasterization. (Screen space).
+    // - The transform contains an animation. We will imply local raster space in such
+    //   cases to avoid constantly rerasterizing the glyphs.
+    // - The transform has perspective or does not have a 2d inverse (Screen or local space).
+    // - The transform's scale will result in result in very large rasterized glyphs and
+    //   we clamped the size. This will imply local raster space.
+    vec2 raster_glyph_offset = floor(glyph.offset * glyph_raster_scale + snap_bias) / res.scale;
+
+    // Compute the glyph rect in local space.
+    //
+    // The transform may be animated, so we don't want to do any snapping here for the
+    // text offset to avoid glyphs wiggling. The text offset should have been snapped
+    // already for axis aligned transforms excluding any animations during frame building.
+    vec2 glyph_origin = glyph_scale_inv * (res.offset + raster_glyph_offset) + text_offset;
+    RectWithEndpoint glyph_rect = RectWithEndpoint(
+        glyph_origin,
+        glyph_origin + glyph_scale_inv * (res.uv_rect.zw - res.uv_rect.xy)
+    );
+
+    // Select the corner of the glyph rect that we are processing.
+    vec2 local_pos = mix(glyph_rect.p0, glyph_rect.p1, aPosition.xy);
+#endif
+
+    VertexInfo vi = write_vertex(
+        local_pos,
+        ph.local_clip_rect,
+        ph.z,
+        transform,
+        task
+    );
+
+#ifdef WR_FEATURE_GLYPH_TRANSFORM
+    vec2 f = (glyph_transform * vi.local_pos - glyph_rect.p0) / rect_size(glyph_rect);
+    #ifdef SWGL_CLIP_DIST
+        gl_ClipDistance[0] = f.x;
+        gl_ClipDistance[1] = f.y;
+        gl_ClipDistance[2] = 1.0 - f.x;
+        gl_ClipDistance[3] = 1.0 - f.y;
+    #else
+        v_uv_clip = vec4(f, 1.0 - f);
+    #endif
+#else
+    vec2 f = (vi.local_pos - glyph_rect.p0) / rect_size(glyph_rect);
+#endif
+
+    write_clip(vi.world_pos, clip_area, task);
+
+    switch (color_mode) {
+        case COLOR_MODE_ALPHA:
+            v_mask_swizzle = vec3(0.0, 1.0, 1.0);
+            v_color = text.color;
+            break;
+        case COLOR_MODE_BITMAP_SHADOW:
+            #ifdef SWGL_BLEND
+                swgl_blendDropShadow(text.color);
+                v_mask_swizzle = vec3(1.0, 0.0, 0.0);
+                v_color = vec4(1.0);
+            #else
+                v_mask_swizzle = vec3(0.0, 1.0, 0.0);
+                v_color = text.color;
+            #endif
+            break;
+        case COLOR_MODE_SUBPX_BG_PASS2:
+            v_mask_swizzle = vec3(1.0, 0.0, 0.0);
+            v_color = text.color;
+            break;
+        case COLOR_MODE_SUBPX_BG_PASS0:
+        case COLOR_MODE_COLOR_BITMAP:
+            v_mask_swizzle = vec3(1.0, 0.0, 0.0);
+            v_color = vec4(text.color.a);
+            break;
+        case COLOR_MODE_SUBPX_BG_PASS1:
+            v_mask_swizzle = vec3(-1.0, 1.0, 0.0);
+            v_color = vec4(text.color.a) * text.bg_color;
+            break;
+        case COLOR_MODE_SUBPX_DUAL_SOURCE:
+            #ifdef SWGL_BLEND
+                swgl_blendSubpixelText(text.color);
+                v_mask_swizzle = vec3(1.0, 0.0, 0.0);
+                v_color = vec4(1.0);
+            #else
+                v_mask_swizzle = vec3(text.color.a, 0.0, 0.0);
+                v_color = text.color;
+            #endif
+            break;
+        default:
+            v_mask_swizzle = vec3(0.0, 0.0, 0.0);
+            v_color = vec4(1.0);
+    }
+
+    vec2 texture_size = vec2(TEX_SIZE(sColor0));
+    vec2 st0 = res.uv_rect.xy / texture_size;
+    vec2 st1 = res.uv_rect.zw / texture_size;
+
+    v_uv = mix(st0, st1, f);
+    v_uv_bounds = (res.uv_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
+}
+
+#endif // WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+
+Fragment text_fs(void) {
+    Fragment frag;
+
+    vec2 tc = clamp(v_uv, v_uv_bounds.xy, v_uv_bounds.zw);
+    vec4 mask = texture(sColor0, tc);
+    // v_mask_swizzle.z != 0 means we are using an R8 texture as alpha,
+    // and therefore must swizzle from the r channel to all channels.
+    mask = mix(mask, mask.rrrr, bvec4(v_mask_swizzle.z != 0.0));
+    #ifndef WR_FEATURE_DUAL_SOURCE_BLENDING
+        mask.rgb = mask.rgb * v_mask_swizzle.x + mask.aaa * v_mask_swizzle.y;
+    #endif
+
+    #if defined(WR_FEATURE_GLYPH_TRANSFORM) && !defined(SWGL_CLIP_DIST)
+        mask *= float(all(greaterThanEqual(v_uv_clip, vec4(0.0))));
+    #endif
+
+    frag.color = v_color * mask;
+
+    #if defined(WR_FEATURE_DUAL_SOURCE_BLENDING) && !defined(SWGL_BLEND)
+        frag.blend = mask * v_mask_swizzle.x + mask.aaaa * v_mask_swizzle.y;
+    #endif
+
+    return frag;
+}
+
+
+void main() {
+    Fragment frag = text_fs();
+
+    float clip_mask = do_clip();
+    frag.color *= clip_mask;
+
+    #if defined(WR_FEATURE_DEBUG_OVERDRAW)
+        oFragColor = WR_DEBUG_OVERDRAW_COLOR;
+    #elif defined(WR_FEATURE_DUAL_SOURCE_BLENDING) && !defined(SWGL_BLEND)
+        oFragColor = frag.color;
+        oFragBlend = frag.blend * clip_mask;
+    #else
+        write_output(frag.color);
+    #endif
+}
+
+#if defined(SWGL_DRAW_SPAN) && defined(SWGL_BLEND) && defined(SWGL_CLIP_DIST)
+void swgl_drawSpanRGBA8() {
+    // Only support simple swizzles for now. More complex swizzles must either
+    // be handled by blend overrides or the slow path.
+    if (v_mask_swizzle.x != 0.0 && v_mask_swizzle.x != 1.0) {
+        return;
+    }
+
+    #ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+        swgl_commitTextureLinearRGBA8(sColor0, v_uv, v_uv_bounds);
+    #else
+        if (swgl_isTextureR8(sColor0)) {
+            swgl_commitTextureLinearColorR8ToRGBA8(sColor0, v_uv, v_uv_bounds, v_color);
+        } else {
+            swgl_commitTextureLinearColorRGBA8(sColor0, v_uv, v_uv_bounds, v_color);
+        }
+    #endif
+}
+#endif
+
+#endif // WR_FRAGMENT_SHADER
diff --git a/gfx/wr/webrender/res/rect.glsl b/gfx/wr/webrender/res/rect.glsl
new file mode 100644
index 0000000000..2a080ee393
--- /dev/null
+++ b/gfx/wr/webrender/res/rect.glsl
@@ -0,0 +1,40 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+struct RectWithSize {
+    vec2 p0;
+    vec2 size;
+};
+
+struct RectWithEndpoint {
+    vec2 p0;
+    vec2 p1;
+};
+
+float point_inside_rect(vec2 p, vec2 p0, vec2 p1) {
+    vec2 s = step(p0, p) - step(p1, p);
+    return s.x * s.y;
+}
+
+vec2 signed_distance_rect_xy(vec2 pos, vec2 p0, vec2 p1) {
+    // Instead of using a true signed distance to rect here, we just use the
+    // simpler approximation of the maximum distance on either axis from the
+    // outside of the rectangle. This avoids expensive use of length() and only
+    // causes mostly imperceptible differences at corner pixels.
+    return max(p0 - pos, pos - p1);
+}
+
+float signed_distance_rect(vec2 pos, vec2 p0, vec2 p1) {
+    // Collapse the per-axis distances to edges to a single approximate value.
+    vec2 d = signed_distance_rect_xy(pos, p0, p1);
+    return max(d.x, d.y);
+}
+
+vec2 rect_clamp(RectWithEndpoint rect, vec2 pt) {
+    return clamp(pt, rect.p0, rect.p1);
+}
+
+vec2 rect_size(RectWithEndpoint rect) {
+    return rect.p1 - rect.p0;
+}
diff --git a/gfx/wr/webrender/res/render_task.glsl b/gfx/wr/webrender/res/render_task.glsl
new file mode 100644
index 0000000000..cd9aea402c
--- /dev/null
+++ b/gfx/wr/webrender/res/render_task.glsl
@@ -0,0 +1,102 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+#ifdef WR_VERTEX_SHADER
+#define VECS_PER_RENDER_TASK        2U
+
+uniform HIGHP_SAMPLER_FLOAT sampler2D sRenderTasks;
+
+struct RenderTaskData {
+    RectWithEndpoint task_rect;
+    vec4 user_data;
+};
+
+// See RenderTaskData in render_task.rs
+RenderTaskData fetch_render_task_data(int index) {
+    ivec2 uv = get_fetch_uv(index, VECS_PER_RENDER_TASK);
+
+    vec4 texel0 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(0, 0));
+    vec4 texel1 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(1, 0));
+
+    RectWithEndpoint task_rect = RectWithEndpoint(
+        texel0.xy,
+        texel0.zw
+    );
+
+    RenderTaskData data = RenderTaskData(
+        task_rect,
+        texel1
+    );
+
+    return data;
+}
+
+RectWithEndpoint fetch_render_task_rect(int index) {
+    ivec2 uv = get_fetch_uv(index, VECS_PER_RENDER_TASK);
+
+    vec4 texel0 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(0, 0));
+    vec4 texel1 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(1, 0));
+
+    RectWithEndpoint task_rect = RectWithEndpoint(
+        texel0.xy,
+        texel0.zw
+    );
+
+    return task_rect;
+}
+
+#define PIC_TYPE_IMAGE          1
+#define PIC_TYPE_TEXT_SHADOW    2
+
+/*
+ The dynamic picture that this brush exists on. Right now, it
+ contains minimal information. In the future, it will describe
+ the transform mode of primitives on this picture, among other things.
+ */
+struct PictureTask {
+    RectWithEndpoint task_rect;
+    float device_pixel_scale;
+    vec2 content_origin;
+};
+
+PictureTask fetch_picture_task(int address) {
+    RenderTaskData task_data = fetch_render_task_data(address);
+
+    PictureTask task = PictureTask(
+        task_data.task_rect,
+        task_data.user_data.x,
+        task_data.user_data.yz
+    );
+
+    return task;
+}
+
+#define CLIP_TASK_EMPTY 0x7FFF
+
+struct ClipArea {
+    RectWithEndpoint task_rect;
+    float device_pixel_scale;
+    vec2 screen_origin;
+};
+
+ClipArea fetch_clip_area(int index) {
+    ClipArea area;
+
+    if (index >= CLIP_TASK_EMPTY) {
+        area.task_rect = RectWithEndpoint(vec2(0.0), vec2(0.0));
+        area.device_pixel_scale = 0.0;
+        area.screen_origin = vec2(0.0);
+    } else {
+        RenderTaskData task_data = fetch_render_task_data(index);
+
+        area.task_rect = task_data.task_rect;
+        area.device_pixel_scale = task_data.user_data.x;
+        area.screen_origin = task_data.user_data.yz;
+    }
+
+    return area;
+}
+
+#endif //WR_VERTEX_SHADER
diff --git a/gfx/wr/webrender/res/shared.glsl b/gfx/wr/webrender/res/shared.glsl
new file mode 100644
index 0000000000..4f21bd205d
--- /dev/null
+++ b/gfx/wr/webrender/res/shared.glsl
@@ -0,0 +1,230 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef WR_FEATURE_TEXTURE_EXTERNAL
+// Please check https://www.khronos.org/registry/OpenGL/extensions/OES/OES_EGL_image_external_essl3.txt
+// for this extension.
+#extension GL_OES_EGL_image_external_essl3 : require
+#endif
+
+#ifdef WR_FEATURE_TEXTURE_EXTERNAL_ESSL1
+// Some GLES 3 devices do not support GL_OES_EGL_image_external_essl3, so we
+// must use GL_OES_EGL_image_external instead and make the shader ESSL1
+// compatible.
+#extension GL_OES_EGL_image_external : require
+#endif
+
+#ifdef WR_FEATURE_ADVANCED_BLEND
+#extension GL_KHR_blend_equation_advanced : require
+#endif
+
+#ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+#ifdef GL_ES
+#extension GL_EXT_blend_func_extended : require
+#else
+#extension GL_ARB_explicit_attrib_location : require
+#endif
+#endif
+
+#include base
+
+#if defined(WR_FEATURE_TEXTURE_EXTERNAL_ESSL1)
+#define TEX_SAMPLE(sampler, tex_coord) texture2D(sampler, tex_coord.xy)
+#else
+#define TEX_SAMPLE(sampler, tex_coord) texture(sampler, tex_coord.xy)
+#endif
+
+#if defined(WR_FEATURE_TEXTURE_EXTERNAL) && defined(PLATFORM_ANDROID)
+// On some Mali GPUs we have encountered crashes in glDrawElements when using
+// textureSize(samplerExternalOES) in a vertex shader without potentially
+// sampling from the texture. This tricks the driver in to thinking the texture
+// may be sampled from, avoiding the crash. See bug 1692848.
+uniform bool u_mali_workaround_dummy;
+#define TEX_SIZE(sampler) (u_mali_workaround_dummy ? ivec2(texture(sampler, vec2(0.0, 0.0)).rr) : textureSize(sampler, 0))
+#else
+#define TEX_SIZE(sampler) textureSize(sampler, 0)
+#endif
+
+//======================================================================================
+// Vertex shader attributes and uniforms
+//======================================================================================
+#ifdef WR_VERTEX_SHADER
+    // A generic uniform that shaders can optionally use to configure
+    // an operation mode for this batch.
+    uniform int uMode;
+
+    // Uniform inputs
+    uniform mat4 uTransform;       // Orthographic projection
+
+    // Attribute inputs
+    attribute vec2 aPosition;
+
+    // get_fetch_uv is a macro to work around a macOS Intel driver parsing bug.
+    // TODO: convert back to a function once the driver issues are resolved, if ever.
+    // https://github.com/servo/webrender/pull/623
+    // https://github.com/servo/servo/issues/13953
+    // Do the division with unsigned ints because that's more efficient with D3D
+    #define get_fetch_uv(i, vpi)  ivec2(int(vpi * (uint(i) % (WR_MAX_VERTEX_TEXTURE_WIDTH/vpi))), int(uint(i) / (WR_MAX_VERTEX_TEXTURE_WIDTH/vpi)))
+#endif
+
+//======================================================================================
+// Fragment shader attributes and uniforms
+//======================================================================================
+#ifdef WR_FRAGMENT_SHADER
+    // Uniform inputs
+
+    // Fragment shader outputs
+    #ifdef WR_FEATURE_ADVANCED_BLEND
+        layout(blend_support_all_equations) out;
+    #endif
+
+    #if __VERSION__ == 100
+        #define oFragColor gl_FragColor
+    #elif defined(WR_FEATURE_DUAL_SOURCE_BLENDING)
+        layout(location = 0, index = 0) out vec4 oFragColor;
+        layout(location = 0, index = 1) out vec4 oFragBlend;
+    #else
+        out vec4 oFragColor;
+    #endif
+
+    // Write an output color in normal shaders.
+    void write_output(vec4 color) {
+        oFragColor = color;
+    }
+
+    #define EPSILON                     0.0001
+
+    // "Show Overdraw" color. Premultiplied.
+    #define WR_DEBUG_OVERDRAW_COLOR     vec4(0.110, 0.077, 0.027, 0.125)
+
+    float distance_to_line(vec2 p0, vec2 perp_dir, vec2 p) {
+        vec2 dir_to_p0 = p0 - p;
+        return dot(normalize(perp_dir), dir_to_p0);
+    }
+
+// fwidth is not defined in ESSL 1, but that's okay because we don't need
+// it for any ESSL 1 shader variants.
+#if __VERSION__ != 100
+    /// Find the appropriate half range to apply the AA approximation over.
+    /// This range represents a coefficient to go from one CSS pixel to half a device pixel.
+    vec2 compute_aa_range_xy(vec2 position) {
+        return fwidth(position);
+    }
+
+    float compute_aa_range(vec2 position) {
+        // The constant factor is chosen to compensate for the fact that length(fw) is equal
+        // to sqrt(2) times the device pixel ratio in the typical case.
+        //
+        // This coefficient is chosen to ensure that any sample 0.5 pixels or more inside of
+        // the shape has no anti-aliasing applied to it (since pixels are sampled at their center,
+        // such a pixel (axis aligned) is fully inside the border). We need this so that antialiased
+        // curves properly connect with non-antialiased vertical or horizontal lines, among other things.
+        //
+        // Lines over a half-pixel away from the pixel center *can* intersect with the pixel square;
+        // indeed, unless they are horizontal or vertical, they are guaranteed to. However, choosing
+        // a nonzero area for such pixels causes noticeable artifacts at the junction between an anti-
+        // aliased corner and a straight edge.
+        //
+        // We may want to adjust this constant in specific scenarios (for example keep the principled
+        // value for straight edges where we want pixel-perfect equivalence with non antialiased lines
+        // when axis aligned, while selecting a larger and smoother aa range on curves).
+        //
+        // As a further optimization, we compute the reciprocal of this range, such that we
+        // can then use the cheaper inversesqrt() instead of length(). This also elides a
+        // division that would otherwise be necessary inside distance_aa.
+        #ifdef SWGL
+            // SWGL uses an approximation for fwidth() such that it returns equal x and y.
+            // Thus, sqrt(2)/length(w) = sqrt(2)/sqrt(x*x + x*x) = recip(x).
+            return recip(fwidth(position).x);
+        #else
+            // sqrt(2)/length(w) = inversesqrt(0.5 * dot(w, w))
+            vec2 w = fwidth(position);
+            return inversesqrt(0.5 * dot(w, w));
+        #endif
+    }
+#endif
+
+    /// Return the blending coefficient for distance antialiasing.
+    ///
+    /// 0.0 means inside the shape, 1.0 means outside.
+    ///
+    /// This makes the simplifying assumption that the area of a 1x1 pixel square
+    /// under a line is reasonably similar to just the signed Euclidian distance
+    /// from the center of the square to that line. This diverges slightly from
+    /// better approximations of the exact area, but the difference between the
+    /// methods is not perceptibly noticeable, while this approximation is much
+    /// faster to compute.
+    ///
+    /// See the comments in `compute_aa_range()` for more information on the
+    /// cutoff values of -0.5 and 0.5.
+    float distance_aa_xy(vec2 aa_range, vec2 signed_distance) {
+        // The aa_range is the raw per-axis filter width, so we need to divide
+        // the local signed distance by the filter width to get an approximation
+        // of screen distance.
+        #ifdef SWGL
+            // The SWGL fwidth() approximation returns uniform X and Y ranges.
+            vec2 dist = signed_distance * recip(aa_range.x);
+        #else
+            vec2 dist = signed_distance / aa_range;
+        #endif
+        // Choose whichever axis is further outside the rectangle for AA.
+        return clamp(0.5 - max(dist.x, dist.y), 0.0, 1.0);
+    }
+
+    float distance_aa(float aa_range, float signed_distance) {
+        // The aa_range is already stored as a reciprocal with uniform scale,
+        // so just multiply it, then use that for AA.
+        float dist = signed_distance * aa_range;
+        return clamp(0.5 - dist, 0.0, 1.0);
+    }
+
+    /// Component-wise selection.
+    ///
+    /// The idea of using this is to ensure both potential branches are executed before
+    /// selecting the result, to avoid observable timing differences based on the condition.
+    ///
+    /// Example usage: color = if_then_else(LessThanEqual(color, vec3(0.5)), vec3(0.0), vec3(1.0));
+    ///
+    /// The above example sets each component to 0.0 or 1.0 independently depending on whether
+    /// their values are below or above 0.5.
+    ///
+    /// This is written as a macro in order to work with vectors of any dimension.
+    ///
+    /// Note: Some older android devices don't support mix with bvec. If we ever run into them
+    /// the only option we have is to polyfill it with a branch per component.
+    #define if_then_else(cond, then_branch, else_branch) mix(else_branch, then_branch, cond)
+#endif
+
+//======================================================================================
+// Shared shader uniforms
+//======================================================================================
+#ifdef WR_FEATURE_TEXTURE_2D
+uniform sampler2D sColor0;
+uniform sampler2D sColor1;
+uniform sampler2D sColor2;
+#elif defined WR_FEATURE_TEXTURE_RECT
+uniform sampler2DRect sColor0;
+uniform sampler2DRect sColor1;
+uniform sampler2DRect sColor2;
+#elif defined(WR_FEATURE_TEXTURE_EXTERNAL) || defined(WR_FEATURE_TEXTURE_EXTERNAL_ESSL1)
+uniform samplerExternalOES sColor0;
+uniform samplerExternalOES sColor1;
+uniform samplerExternalOES sColor2;
+#endif
+
+#ifdef WR_FEATURE_DITHERING
+uniform sampler2D sDither;
+#endif
+
+//======================================================================================
+// Interpolator definitions
+//======================================================================================
+
+//======================================================================================
+// VS only types and UBOs
+//======================================================================================
+
+//======================================================================================
+// VS only functions
+//======================================================================================
diff --git a/gfx/wr/webrender/res/shared_other.glsl b/gfx/wr/webrender/res/shared_other.glsl
new file mode 100644
index 0000000000..03cad173cd
--- /dev/null
+++ b/gfx/wr/webrender/res/shared_other.glsl
@@ -0,0 +1,33 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//======================================================================================
+// Vertex shader attributes and uniforms
+//======================================================================================
+#ifdef WR_VERTEX_SHADER
+#endif
+
+//======================================================================================
+// Fragment shader attributes and uniforms
+//======================================================================================
+#ifdef WR_FRAGMENT_SHADER
+#endif
+
+//======================================================================================
+// Interpolator definitions
+//======================================================================================
+
+//======================================================================================
+// VS only types and UBOs
+//======================================================================================
+
+//======================================================================================
+// VS only functions
+//======================================================================================
+
+//======================================================================================
+// FS only functions
+//======================================================================================
+#ifdef WR_FRAGMENT_SHADER
+#endif
diff --git a/gfx/wr/webrender/res/transform.glsl b/gfx/wr/webrender/res/transform.glsl
new file mode 100644
index 0000000000..a3329918b6
--- /dev/null
+++ b/gfx/wr/webrender/res/transform.glsl
@@ -0,0 +1,140 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+flat varying vec4 vTransformBounds;
+
+#ifdef WR_VERTEX_SHADER
+
+#define VECS_PER_TRANSFORM   8U
+uniform HIGHP_SAMPLER_FLOAT sampler2D sTransformPalette;
+
+void init_transform_vs(vec4 local_bounds) {
+    vTransformBounds = local_bounds;
+}
+
+struct Transform {
+    mat4 m;
+    mat4 inv_m;
+    bool is_axis_aligned;
+};
+
+Transform fetch_transform(int id) {
+    Transform transform;
+
+    transform.is_axis_aligned = (id >> 24) == 0;
+    int index = id & 0x00ffffff;
+
+    // Create a UV base coord for each 8 texels.
+    // This is required because trying to use an offset
+    // of more than 8 texels doesn't work on some versions
+    // of macOS.
+    ivec2 uv = get_fetch_uv(index, VECS_PER_TRANSFORM);
+    ivec2 uv0 = ivec2(uv.x + 0, uv.y);
+
+    transform.m[0] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(0, 0));
+    transform.m[1] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(1, 0));
+    transform.m[2] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(2, 0));
+    transform.m[3] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(3, 0));
+
+    transform.inv_m[0] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(4, 0));
+    transform.inv_m[1] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(5, 0));
+    transform.inv_m[2] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(6, 0));
+    transform.inv_m[3] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(7, 0));
+
+    return transform;
+}
+
+// Return the intersection of the plane (set up by "normal" and "point")
+// with the ray (set up by "ray_origin" and "ray_dir"),
+// writing the resulting scaler into "t".
+bool ray_plane(vec3 normal, vec3 pt, vec3 ray_origin, vec3 ray_dir, out float t)
+{
+    float denom = dot(normal, ray_dir);
+    if (abs(denom) > 1e-6) {
+        vec3 d = pt - ray_origin;
+        t = dot(d, normal) / denom;
+        return t >= 0.0;
+    }
+
+    return false;
+}
+
+// Apply the inverse transform "inv_transform"
+// to the reference point "ref" in CSS space,
+// producing a local point on a Transform plane,
+// set by a base point "a" and a normal "n".
+vec4 untransform(vec2 ref, vec3 n, vec3 a, mat4 inv_transform) {
+    vec3 p = vec3(ref, -10000.0);
+    vec3 d = vec3(0, 0, 1.0);
+
+    float t = 0.0;
+    // get an intersection of the Transform plane with Z axis vector,
+    // originated from the "ref" point
+    ray_plane(n, a, p, d, t);
+    float z = p.z + d.z * t; // Z of the visible point on the Transform
+
+    vec4 r = inv_transform * vec4(ref, z, 1.0);
+    return r;
+}
+
+// Given a CSS space position, transform it back into the Transform space.
+vec4 get_node_pos(vec2 pos, Transform transform) {
+    // get a point on the scroll node plane
+    vec4 ah = transform.m * vec4(0.0, 0.0, 0.0, 1.0);
+    vec3 a = ah.xyz / ah.w;
+
+    // get the normal to the scroll node plane
+    vec3 n = transpose(mat3(transform.inv_m)) * vec3(0.0, 0.0, 1.0);
+    return untransform(pos, n, a, transform.inv_m);
+}
+
+#endif //WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+
+// Assume transform bounds are set to a large scale to signal they are invalid.
+bool has_valid_transform_bounds() {
+    return vTransformBounds.w < 1.0e15;
+}
+
+float init_transform_fs(vec2 local_pos) {
+    // Ideally we want to track distances in screen space after transformation
+    // as signed distance calculations lose context about the direction vector
+    // to exit the geometry, merely remembering the minimum distance to the
+    // exit. However, we can't always sanely track distances in screen space
+    // due to perspective transforms, clipping, and other concerns, so we do
+    // this in local space. However, this causes problems tracking distances
+    // in local space when attempting to scale by a uniform AA range later in
+    // the presence of a transform which actually has non-uniform scaling.
+    //
+    // To work around this, we independently track the distances on the local
+    // space X and Y axes and then scale them by the independent AA ranges (as
+    // computed from fwidth derivatives) for the X and Y axes. This can break
+    // down at certain angles (45 degrees or close to it), but still gives a
+    // better approximation of screen-space distances in the presence of non-
+    // uniform scaling for other rotations.
+    //
+    // Get signed distance from local rect bounds.
+    vec2 d = signed_distance_rect_xy(
+        local_pos,
+        vTransformBounds.xy,
+        vTransformBounds.zw
+    );
+
+    // Find the appropriate distance to apply the AA smoothstep over.
+    vec2 aa_range = compute_aa_range_xy(local_pos);
+
+    // Only apply AA to fragments outside the signed distance field.
+    return distance_aa_xy(aa_range, d);
+}
+
+float init_transform_rough_fs(vec2 local_pos) {
+    return point_inside_rect(
+        local_pos,
+        vTransformBounds.xy,
+        vTransformBounds.zw
+    );
+}
+
+#endif //WR_FRAGMENT_SHADER
diff --git a/gfx/wr/webrender/res/yuv.glsl b/gfx/wr/webrender/res/yuv.glsl
new file mode 100644
index 0000000000..064ba3b8af
--- /dev/null
+++ b/gfx/wr/webrender/res/yuv.glsl
@@ -0,0 +1,237 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared
+
+#define YUV_FORMAT_NV12             0
+#define YUV_FORMAT_P010             1
+#define YUV_FORMAT_PLANAR           2
+#define YUV_FORMAT_INTERLEAVED      3
+
+//#define YUV_PRECISION mediump
+#define YUV_PRECISION
+
+#ifdef WR_VERTEX_SHADER
+
+#ifdef WR_FEATURE_TEXTURE_RECT
+    #define TEX_SIZE_YUV(sampler) vec2(1.0)
+#else
+    #define TEX_SIZE_YUV(sampler) vec2(TEX_SIZE(sampler).xy)
+#endif
+
+// `YuvRangedColorSpace`
+#define YUV_COLOR_SPACE_REC601_NARROW  0
+#define YUV_COLOR_SPACE_REC601_FULL    1
+#define YUV_COLOR_SPACE_REC709_NARROW  2
+#define YUV_COLOR_SPACE_REC709_FULL    3
+#define YUV_COLOR_SPACE_REC2020_NARROW 4
+#define YUV_COLOR_SPACE_REC2020_FULL   5
+#define YUV_COLOR_SPACE_GBR_IDENTITY   6
+
+// The constants added to the Y, U and V components are applied in the fragment shader.
+
+// `rgbFromYuv` from https://jdashg.github.io/misc/colors/from-coeffs.html
+// The matrix is stored in column-major.
+const mat3 RgbFromYuv_Rec601 = mat3(
+  1.00000, 1.00000, 1.00000,
+  0.00000,-0.17207, 0.88600,
+  0.70100,-0.35707, 0.00000
+);
+const mat3 RgbFromYuv_Rec709 = mat3(
+  1.00000, 1.00000, 1.00000,
+  0.00000,-0.09366, 0.92780,
+  0.78740,-0.23406, 0.00000
+);
+const mat3 RgbFromYuv_Rec2020 = mat3(
+  1.00000, 1.00000, 1.00000,
+  0.00000,-0.08228, 0.94070,
+  0.73730,-0.28568, 0.00000
+);
+
+// The matrix is stored in column-major.
+// Identity is stored as GBR
+const mat3 RgbFromYuv_GbrIdentity = mat3(
+    0.0              ,  1.0,                0.0,
+    0.0              ,  0.0,                1.0,
+    1.0              ,  0.0,                0.0
+);
+
+// -
+
+struct YuvPrimitive {
+    int channel_bit_depth;
+    int color_space;
+    int yuv_format;
+};
+
+struct YuvColorSamplingInfo {
+    mat3 rgb_from_yuv;
+    vec4 packed_zero_one_vals;
+};
+
+struct YuvColorMatrixInfo {
+    vec3 ycbcr_bias;
+    mat3 rgb_from_debiased_ycbrc;
+};
+
+// -
+
+vec4 yuv_channel_zero_one_identity(int bit_depth, float channel_max) {
+    float all_ones_normalized = float((1 << bit_depth) - 1) / channel_max;
+    return vec4(0.0, 0.0, all_ones_normalized, all_ones_normalized);
+}
+
+vec4 yuv_channel_zero_one_narrow_range(int bit_depth, float channel_max) {
+    // Note: 512/1023 != 128/255
+    ivec4 zero_one_ints = ivec4(16, 128, 235, 240) << (bit_depth - 8);
+    return vec4(zero_one_ints) / channel_max;
+}
+
+vec4 yuv_channel_zero_one_full_range(int bit_depth, float channel_max) {
+    vec4 narrow = yuv_channel_zero_one_narrow_range(bit_depth, channel_max);
+    vec4 identity = yuv_channel_zero_one_identity(bit_depth, channel_max);
+    return vec4(0.0, narrow.y, identity.z, identity.w);
+}
+
+YuvColorSamplingInfo get_yuv_color_info(YuvPrimitive prim) {
+    float channel_max = 255.0;
+    if (prim.channel_bit_depth > 8) {
+        if (prim.yuv_format == YUV_FORMAT_P010) {
+            // This is an MSB format.
+            channel_max = float((1 << prim.channel_bit_depth) - 1);
+        } else {
+            // For >8bpc, we get the low bits, not the high bits:
+            // 10bpc(1.0): 0b0000_0011_1111_1111
+            channel_max = 65535.0;
+        }
+    }
+    if (prim.color_space == YUV_COLOR_SPACE_REC601_NARROW) {
+        return YuvColorSamplingInfo(RgbFromYuv_Rec601,
+                yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, channel_max));
+    } else if (prim.color_space == YUV_COLOR_SPACE_REC601_FULL) {
+        return YuvColorSamplingInfo(RgbFromYuv_Rec601,
+                yuv_channel_zero_one_full_range(prim.channel_bit_depth, channel_max));
+
+    } else if (prim.color_space == YUV_COLOR_SPACE_REC709_NARROW) {
+        return YuvColorSamplingInfo(RgbFromYuv_Rec709,
+                yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, channel_max));
+    } else if (prim.color_space == YUV_COLOR_SPACE_REC709_FULL) {
+        return YuvColorSamplingInfo(RgbFromYuv_Rec709,
+                yuv_channel_zero_one_full_range(prim.channel_bit_depth, channel_max));
+
+    } else if (prim.color_space == YUV_COLOR_SPACE_REC2020_NARROW) {
+        return YuvColorSamplingInfo(RgbFromYuv_Rec2020,
+                yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, channel_max));
+    } else if (prim.color_space == YUV_COLOR_SPACE_REC2020_FULL) {
+        return YuvColorSamplingInfo(RgbFromYuv_Rec2020,
+                yuv_channel_zero_one_full_range(prim.channel_bit_depth, channel_max));
+
+    } else {
+        // Identity
+        return YuvColorSamplingInfo(RgbFromYuv_GbrIdentity,
+                yuv_channel_zero_one_identity(prim.channel_bit_depth, channel_max));
+    }
+}
+
+YuvColorMatrixInfo get_rgb_from_ycbcr_info(YuvPrimitive prim) {
+    YuvColorSamplingInfo info = get_yuv_color_info(prim);
+
+    vec2 zero = info.packed_zero_one_vals.xy;
+    vec2 one = info.packed_zero_one_vals.zw;
+    // Such that yuv_value = (ycbcr_sample - zero) / (one - zero)
+    vec2 scale = 1.0 / (one - zero);
+
+    YuvColorMatrixInfo mat_info;
+    mat_info.ycbcr_bias = zero.xyy;
+    mat3 yuv_from_debiased_ycbcr = mat3(scale.x,     0.0,     0.0,
+                                            0.0, scale.y,     0.0,
+                                            0.0,     0.0, scale.y);
+    mat_info.rgb_from_debiased_ycbrc = info.rgb_from_yuv * yuv_from_debiased_ycbcr;
+    return mat_info;
+}
+
+void write_uv_rect(
+    vec2 uv0,
+    vec2 uv1,
+    vec2 f,
+    vec2 texture_size,
+    out vec2 uv,
+    out vec4 uv_bounds
+) {
+    uv = mix(uv0, uv1, f);
+
+    uv_bounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5));
+
+    #ifndef WR_FEATURE_TEXTURE_RECT
+        uv /= texture_size;
+        uv_bounds /= texture_size.xyxy;
+    #endif
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+vec4 sample_yuv(
+    int format,
+    YUV_PRECISION vec3 ycbcr_bias,
+    YUV_PRECISION mat3 rgb_from_debiased_ycbrc,
+    vec2 in_uv_y,
+    vec2 in_uv_u,
+    vec2 in_uv_v,
+    vec4 uv_bounds_y,
+    vec4 uv_bounds_u,
+    vec4 uv_bounds_v
+) {
+    YUV_PRECISION vec3 ycbcr_sample;
+
+    switch (format) {
+        case YUV_FORMAT_PLANAR:
+            {
+                // The yuv_planar format should have this third texture coordinate.
+                vec2 uv_y = clamp(in_uv_y, uv_bounds_y.xy, uv_bounds_y.zw);
+                vec2 uv_u = clamp(in_uv_u, uv_bounds_u.xy, uv_bounds_u.zw);
+                vec2 uv_v = clamp(in_uv_v, uv_bounds_v.xy, uv_bounds_v.zw);
+                ycbcr_sample.x = TEX_SAMPLE(sColor0, uv_y).r;
+                ycbcr_sample.y = TEX_SAMPLE(sColor1, uv_u).r;
+                ycbcr_sample.z = TEX_SAMPLE(sColor2, uv_v).r;
+            }
+            break;
+
+        case YUV_FORMAT_NV12:
+        case YUV_FORMAT_P010:
+            {
+                vec2 uv_y = clamp(in_uv_y, uv_bounds_y.xy, uv_bounds_y.zw);
+                vec2 uv_uv = clamp(in_uv_u, uv_bounds_u.xy, uv_bounds_u.zw);
+                ycbcr_sample.x = TEX_SAMPLE(sColor0, uv_y).r;
+                ycbcr_sample.yz = TEX_SAMPLE(sColor1, uv_uv).rg;
+            }
+            break;
+
+        case YUV_FORMAT_INTERLEAVED:
+            {
+                // "The Y, Cb and Cr color channels within the 422 data are mapped into
+                // the existing green, blue and red color channels."
+                // https://www.khronos.org/registry/OpenGL/extensions/APPLE/APPLE_rgb_422.txt
+                vec2 uv_y = clamp(in_uv_y, uv_bounds_y.xy, uv_bounds_y.zw);
+                ycbcr_sample = TEX_SAMPLE(sColor0, uv_y).gbr;
+            }
+            break;
+
+        default:
+            ycbcr_sample = vec3(0.0);
+            break;
+    }
+    //if (true) return vec4(ycbcr_sample, 1.0);
+
+    // See the YuvColorMatrix definition for an explanation of where the constants come from.
+    YUV_PRECISION vec3 rgb = rgb_from_debiased_ycbrc * (ycbcr_sample - ycbcr_bias);
+
+    #if defined(WR_FEATURE_ALPHA_PASS) && defined(SWGL_CLIP_MASK)
+        // Avoid out-of-range RGB values that can mess with blending. These occur due to invalid
+        // YUV values outside the mappable space that never the less can be generated.
+        rgb = clamp(rgb, 0.0, 1.0);
+    #endif
+    return vec4(rgb, 1.0);
+}
+#endif
diff --git a/gfx/wr/webrender/src/api_resources.rs b/gfx/wr/webrender/src/api_resources.rs
new file mode 100644
index 0000000000..bf22fc8a23
--- /dev/null
+++ b/gfx/wr/webrender/src/api_resources.rs
@@ -0,0 +1,282 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::api::{BlobImageKey, ImageDescriptor, DirtyRect, TileSize};
+use crate::api::{BlobImageHandler, AsyncBlobImageRasterizer, BlobImageData, BlobImageParams};
+use crate::api::{BlobImageRequest, BlobImageDescriptor, FontTemplate};
+use crate::api::units::*;
+use glyph_rasterizer::{SharedFontResources, BaseFontInstance};
+use crate::render_api::{ResourceUpdate, TransactionMsg, AddFont};
+use crate::image_tiling::*;
+use crate::profiler;
+
+use std::collections::HashMap;
+use std::mem;
+use std::sync::Arc;
+
+/// We use this to generate the async blob rendering requests.
+struct BlobImageTemplate {
+    descriptor: ImageDescriptor,
+    tile_size: TileSize,
+    dirty_rect: BlobDirtyRect,
+    /// See ImageResource::visible_rect.
+    visible_rect: DeviceIntRect,
+    // If the active rect of the blob changes, this represents the
+    // range of tiles that remain valid. This must be taken into
+    // account in addition to the valid rect when submitting blob
+    // rasterization requests.
+    // `None` means the bounds have not changed (tiles are still valid).
+    // `Some(TileRange::zero())` means all of the tiles are invalid.
+    valid_tiles_after_bounds_change: Option<TileRange>,
+}
+
+pub struct ApiResources {
+    blob_image_templates: HashMap<BlobImageKey, BlobImageTemplate>,
+    pub blob_image_handler: Option<Box<dyn BlobImageHandler>>,
+    fonts: SharedFontResources,
+}
+
+impl ApiResources {
+    pub fn new(
+        blob_image_handler: Option<Box<dyn BlobImageHandler>>,
+        fonts: SharedFontResources,
+    ) -> Self {
+        ApiResources {
+            blob_image_templates: HashMap::new(),
+            blob_image_handler,
+            fonts,
+        }
+    }
+
+    pub fn get_fonts(&self) -> SharedFontResources {
+        self.fonts.clone()
+    }
+
+    pub fn update(&mut self, transaction: &mut TransactionMsg) {
+        let mut blobs_to_rasterize = Vec::new();
+        for update in &mut transaction.resource_updates {
+            match *update {
+                ResourceUpdate::AddBlobImage(ref img) => {
+                    self.blob_image_handler
+                        .as_mut()
+                        .expect("no blob image handler")
+                        .add(img.key, Arc::clone(&img.data), &img.visible_rect, img.tile_size);
+
+                    self.blob_image_templates.insert(
+                        img.key,
+                        BlobImageTemplate {
+                            descriptor: img.descriptor,
+                            tile_size: img.tile_size,
+                            dirty_rect: DirtyRect::All,
+                            valid_tiles_after_bounds_change: None,
+                            visible_rect: img.visible_rect,
+                        },
+                    );
+                    blobs_to_rasterize.push(img.key);
+                }
+                ResourceUpdate::UpdateBlobImage(ref img) => {
+                    debug_assert_eq!(img.visible_rect.size(), img.descriptor.size);
+                    self.update_blob_image(
+                        img.key,
+                        Some(&img.descriptor),
+                        Some(&img.dirty_rect),
+                        Some(Arc::clone(&img.data)),
+                        &img.visible_rect,
+                    );
+                    blobs_to_rasterize.push(img.key);
+                }
+                ResourceUpdate::DeleteBlobImage(key) => {
+                    transaction.use_scene_builder_thread = true;
+                    self.blob_image_templates.remove(&key);
+                    if let Some(ref mut handler) = self.blob_image_handler {
+                        handler.delete(key);
+                    }
+                }
+                ResourceUpdate::SetBlobImageVisibleArea(ref key, ref area) => {
+                    self.update_blob_image(*key, None, None, None, area);
+                    blobs_to_rasterize.push(*key);
+                }
+                ResourceUpdate::AddFont(ref font) => {
+                    let (key, template) = match font {
+                        AddFont::Raw(key, bytes, index) => {
+                            (key, FontTemplate::Raw(Arc::clone(bytes), *index))
+                        }
+                        AddFont::Native(key, native_font_handle) => {
+                            (key, FontTemplate::Native(native_font_handle.clone()))
+                        }
+                    };
+                    if let Some(shared_key) = self.fonts.font_keys.add_key(key, &template) {
+                        self.fonts.templates.add_font(shared_key, template);
+                    }
+                }
+                ResourceUpdate::AddFontInstance(ref mut instance) => {
+                    let shared_font_key = self.fonts.font_keys.map_key(&instance.font_key);
+                    assert!(self.fonts.templates.has_font(&shared_font_key));
+                    // AddFontInstance will only be processed here, not in the resource cache, so it
+                    // is safe to take the options rather than clone them.
+                    let base = BaseFontInstance::new(
+                        instance.key,
+                        shared_font_key,
+                        instance.glyph_size,
+                        mem::take(&mut instance.options),
+                        mem::take(&mut instance.platform_options),
+                        mem::take(&mut instance.variations),
+                    );
+                    if let Some(shared_instance) = self.fonts.instance_keys.add_key(base) {
+                        self.fonts.instances.add_font_instance(shared_instance);
+                    }
+                }
+                ResourceUpdate::DeleteFont(_key) => {
+                    transaction.use_scene_builder_thread = true;
+                }
+                ResourceUpdate::DeleteFontInstance(_key) => {
+                    transaction.use_scene_builder_thread = true;
+                    // We will delete from the shared font instance map in the resource cache
+                    // after scene swap.
+                }
+                ResourceUpdate::DeleteImage(..) => {
+                    transaction.use_scene_builder_thread = true;
+                }
+                _ => {}
+            }
+        }
+
+        let (rasterizer, requests) = self.create_blob_scene_builder_requests(&blobs_to_rasterize);
+        transaction.profile.set(profiler::RASTERIZED_BLOBS, blobs_to_rasterize.len());
+        transaction.profile.set(profiler::RASTERIZED_BLOB_TILES, requests.len());
+        transaction.use_scene_builder_thread |= !requests.is_empty();
+        transaction.use_scene_builder_thread |= !transaction.scene_ops.is_empty();
+        transaction.blob_rasterizer = rasterizer;
+        transaction.blob_requests = requests;
+    }
+
+    pub fn enable_multithreading(&mut self, enable: bool) {
+        if let Some(ref mut handler) = self.blob_image_handler {
+            handler.enable_multithreading(enable);
+        }
+    }
+
+    fn update_blob_image(
+        &mut self,
+        key: BlobImageKey,
+        descriptor: Option<&ImageDescriptor>,
+        dirty_rect: Option<&BlobDirtyRect>,
+        data: Option<Arc<BlobImageData>>,
+        visible_rect: &DeviceIntRect,
+    ) {
+        if let Some(data) = data {
+            let dirty_rect = dirty_rect.expect("no dirty rect");
+            self.blob_image_handler
+                .as_mut()
+                .expect("no blob image handler")
+                .update(key, data, visible_rect, dirty_rect);
+        }
+
+        let image = self.blob_image_templates
+            .get_mut(&key)
+            .expect("Attempt to update non-existent blob image");
+
+        let mut valid_tiles_after_bounds_change = compute_valid_tiles_if_bounds_change(
+            &image.visible_rect,
+            visible_rect,
+            image.tile_size,
+        );
+
+        match (image.valid_tiles_after_bounds_change, valid_tiles_after_bounds_change) {
+            (Some(old), Some(ref mut new)) => {
+                *new = new.intersection(&old).unwrap_or_else(TileRange::zero);
+            }
+            (Some(old), None) => {
+                valid_tiles_after_bounds_change = Some(old);
+            }
+            _ => {}
+        }
+
+        let blob_size = visible_rect.size();
+
+        if let Some(descriptor) = descriptor {
+            image.descriptor = *descriptor;
+        } else {
+            // make sure the descriptor size matches the visible rect.
+            // This might not be necessary but let's stay on the safe side.
+            image.descriptor.size = blob_size;
+        }
+
+        if let Some(dirty_rect) = dirty_rect {
+            image.dirty_rect = image.dirty_rect.union(dirty_rect);
+        }
+
+        image.valid_tiles_after_bounds_change = valid_tiles_after_bounds_change;
+        image.visible_rect = *visible_rect;
+    }
+
+    pub fn create_blob_scene_builder_requests(
+        &mut self,
+        keys: &[BlobImageKey]
+    ) -> (Option<Box<dyn AsyncBlobImageRasterizer>>, Vec<BlobImageParams>) {
+        if self.blob_image_handler.is_none() || keys.is_empty() {
+            return (None, Vec::new());
+        }
+
+        let mut blob_request_params = Vec::new();
+        for key in keys {
+            let template = self.blob_image_templates.get_mut(key)
+                .expect("no blob image template");
+
+            // If we know that only a portion of the blob image is in the viewport,
+            // only request these visible tiles since blob images can be huge.
+            let tiles = compute_tile_range(
+                &template.visible_rect,
+                template.tile_size,
+            );
+
+            // Don't request tiles that weren't invalidated.
+            let dirty_tiles = match template.dirty_rect {
+                DirtyRect::Partial(dirty_rect) => {
+                    compute_tile_range(
+                        &dirty_rect.cast_unit(),
+                        template.tile_size,
+                    )
+                }
+                DirtyRect::All => tiles,
+            };
+
+            for_each_tile_in_range(&tiles, |tile| {
+                let still_valid = template.valid_tiles_after_bounds_change
+                    .map(|valid_tiles| valid_tiles.contains(tile))
+                    .unwrap_or(true);
+
+                if still_valid && !dirty_tiles.contains(tile) {
+                    return;
+                }
+
+                let descriptor = BlobImageDescriptor {
+                    rect: compute_tile_rect(
+                        &template.visible_rect,
+                        template.tile_size,
+                        tile,
+                    ).cast_unit(),
+                    format: template.descriptor.format,
+                };
+
+                assert!(descriptor.rect.width() > 0 && descriptor.rect.height() > 0);
+                blob_request_params.push(
+                    BlobImageParams {
+                        request: BlobImageRequest { key: *key, tile },
+                        descriptor,
+                        dirty_rect: DirtyRect::All,
+                    }
+                );
+            });
+
+            template.dirty_rect = DirtyRect::empty();
+            template.valid_tiles_after_bounds_change = None;
+        }
+
+        let handler = self.blob_image_handler.as_mut()
+            .expect("no blob image handler");
+        handler.prepare_resources(&self.fonts, &blob_request_params);
+        (Some(handler.create_blob_rasterizer()), blob_request_params)
+    }
+}
diff --git a/gfx/wr/webrender/src/batch.rs b/gfx/wr/webrender/src/batch.rs
new file mode 100644
index 0000000000..9d3f81ff5c
--- /dev/null
+++ b/gfx/wr/webrender/src/batch.rs
@@ -0,0 +1,3783 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{AlphaType, ClipMode, ImageRendering, ImageBufferKind};
+use api::{FontInstanceFlags, YuvColorSpace, YuvFormat, ColorDepth, ColorRange, PremultipliedColorF};
+use api::units::*;
+use crate::clip::{ClipNodeFlags, ClipNodeRange, ClipItemKind, ClipStore};
+use crate::command_buffer::PrimitiveCommand;
+use crate::spatial_tree::{SpatialTree, SpatialNodeIndex, CoordinateSystemId};
+use glyph_rasterizer::{GlyphFormat, SubpixelDirection};
+use crate::gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress};
+use crate::gpu_types::{BrushFlags, BrushInstance, PrimitiveHeaders, ZBufferId, ZBufferIdGenerator};
+use crate::gpu_types::{SplitCompositeInstance};
+use crate::gpu_types::{PrimitiveInstanceData, RasterizationSpace, GlyphInstance};
+use crate::gpu_types::{PrimitiveHeader, PrimitiveHeaderIndex, TransformPaletteId, TransformPalette};
+use crate::gpu_types::{ImageBrushData, get_shader_opacity, BoxShadowData};
+use crate::gpu_types::{ClipMaskInstanceCommon, ClipMaskInstanceImage, ClipMaskInstanceRect, ClipMaskInstanceBoxShadow};
+use crate::internal_types::{FastHashMap, Swizzle, TextureSource, Filter};
+use crate::picture::{Picture3DContext, PictureCompositeMode, calculate_screen_uv};
+use crate::prim_store::{PrimitiveInstanceKind, ClipData};
+use crate::prim_store::{PrimitiveInstance, PrimitiveOpacity, SegmentInstanceIndex};
+use crate::prim_store::{BrushSegment, ClipMaskKind, ClipTaskIndex};
+use crate::prim_store::{VECS_PER_SEGMENT};
+use crate::render_target::RenderTargetContext;
+use crate::render_task_graph::{RenderTaskId, RenderTaskGraph};
+use crate::render_task::{RenderTaskAddress, RenderTaskKind};
+use crate::renderer::{BlendMode, ShaderColorMode};
+use crate::renderer::{MAX_VERTEX_TEXTURE_WIDTH, GpuBufferBuilder};
+use crate::resource_cache::{GlyphFetchResult, ImageProperties, ImageRequest};
+use crate::space::SpaceMapper;
+use crate::visibility::{PrimitiveVisibilityFlags, VisibilityState};
+use smallvec::SmallVec;
+use std::{f32, i32, usize};
+use crate::util::{project_rect, MaxRect, MatrixHelpers, TransformedRectKind, ScaleOffset};
+use crate::segment::EdgeAaSegmentMask;
+
+// Special sentinel value recognized by the shader. It is considered to be
+// a dummy task that doesn't mask out anything.
+const OPAQUE_TASK_ADDRESS: RenderTaskAddress = RenderTaskAddress(0x7fff);
+
+/// Used to signal there are no segments provided with this primitive.
+const INVALID_SEGMENT_INDEX: i32 = 0xffff;
+
+/// Size in device pixels for tiles that clip masks are drawn in.
+const CLIP_RECTANGLE_TILE_SIZE: i32 = 128;
+
+/// The minimum size of a clip mask before trying to draw in tiles.
+const CLIP_RECTANGLE_AREA_THRESHOLD: f32 = (CLIP_RECTANGLE_TILE_SIZE * CLIP_RECTANGLE_TILE_SIZE * 4) as f32;
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum BrushBatchKind {
+    Solid,
+    Image(ImageBufferKind),
+    Blend,
+    MixBlend {
+        task_id: RenderTaskId,
+        backdrop_id: RenderTaskId,
+    },
+    YuvImage(ImageBufferKind, YuvFormat, ColorDepth, YuvColorSpace, ColorRange),
+    LinearGradient,
+    Opacity,
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum BatchKind {
+    SplitComposite,
+    TextRun(GlyphFormat),
+    Brush(BrushBatchKind),
+}
+
+/// Input textures for a primitive, without consideration of clip mask
+#[derive(Copy, Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct TextureSet {
+    pub colors: [TextureSource; 3],
+}
+
+impl TextureSet {
+    const UNTEXTURED: TextureSet = TextureSet {
+        colors: [
+            TextureSource::Invalid,
+            TextureSource::Invalid,
+            TextureSource::Invalid,
+        ],
+    };
+
+    /// A textured primitive
+    fn prim_textured(
+        color: TextureSource,
+    ) -> Self {
+        TextureSet {
+            colors: [
+                color,
+                TextureSource::Invalid,
+                TextureSource::Invalid,
+            ],
+        }
+    }
+
+    fn is_compatible_with(&self, other: &TextureSet) -> bool {
+        self.colors[0].is_compatible(&other.colors[0]) &&
+        self.colors[1].is_compatible(&other.colors[1]) &&
+        self.colors[2].is_compatible(&other.colors[2])
+    }
+}
+
+impl TextureSource {
+    fn combine(&self, other: TextureSource) -> TextureSource {
+        if other == TextureSource::Invalid {
+            *self
+        } else {
+            other
+        }
+    }
+}
+
+/// Optional textures that can be used as a source in the shaders.
+/// Textures that are not used by the batch are equal to TextureId::invalid().
+#[derive(Copy, Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BatchTextures {
+    pub input: TextureSet,
+    pub clip_mask: TextureSource,
+}
+
+impl BatchTextures {
+    /// An empty batch textures (no binding slots set)
+    pub fn empty() -> BatchTextures {
+        BatchTextures {
+            input: TextureSet::UNTEXTURED,
+            clip_mask: TextureSource::Invalid,
+        }
+    }
+
+    /// A textured primitive with optional clip mask
+    pub fn prim_textured(
+        color: TextureSource,
+        clip_mask: TextureSource,
+    ) -> BatchTextures {
+        BatchTextures {
+            input: TextureSet::prim_textured(color),
+            clip_mask,
+        }
+    }
+
+    /// An untextured primitive with optional clip mask
+    pub fn prim_untextured(
+        clip_mask: TextureSource,
+    ) -> BatchTextures {
+        BatchTextures {
+            input: TextureSet::UNTEXTURED,
+            clip_mask,
+        }
+    }
+
+    /// A composite style effect with single input texture
+    pub fn composite_rgb(
+        texture: TextureSource,
+    ) -> BatchTextures {
+        BatchTextures {
+            input: TextureSet {
+                colors: [
+                    texture,
+                    TextureSource::Invalid,
+                    TextureSource::Invalid,
+                ],
+            },
+            clip_mask: TextureSource::Invalid,
+        }
+    }
+
+    /// A composite style effect with up to 3 input textures
+    pub fn composite_yuv(
+        color0: TextureSource,
+        color1: TextureSource,
+        color2: TextureSource,
+    ) -> BatchTextures {
+        BatchTextures {
+            input: TextureSet {
+                colors: [color0, color1, color2],
+            },
+            clip_mask: TextureSource::Invalid,
+        }
+    }
+
+    pub fn is_compatible_with(&self, other: &BatchTextures) -> bool {
+        if !self.clip_mask.is_compatible(&other.clip_mask) {
+            return false;
+        }
+
+        self.input.is_compatible_with(&other.input)
+    }
+
+    pub fn combine_textures(&self, other: BatchTextures) -> Option<BatchTextures> {
+        if !self.is_compatible_with(&other) {
+            return None;
+        }
+
+        let mut new_textures = BatchTextures::empty();
+
+        new_textures.clip_mask = self.clip_mask.combine(other.clip_mask);
+
+        for i in 0 .. 3 {
+            new_textures.input.colors[i] = self.input.colors[i].combine(other.input.colors[i]);
+        }
+
+        Some(new_textures)
+    }
+
+    fn merge(&mut self, other: &BatchTextures) {
+        self.clip_mask = self.clip_mask.combine(other.clip_mask);
+
+        for (s, o) in self.input.colors.iter_mut().zip(other.input.colors.iter()) {
+            *s = s.combine(*o);
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BatchKey {
+    pub kind: BatchKind,
+    pub blend_mode: BlendMode,
+    pub textures: BatchTextures,
+}
+
+impl BatchKey {
+    pub fn new(kind: BatchKind, blend_mode: BlendMode, textures: BatchTextures) -> Self {
+        BatchKey {
+            kind,
+            blend_mode,
+            textures,
+        }
+    }
+
+    pub fn is_compatible_with(&self, other: &BatchKey) -> bool {
+        self.kind == other.kind && self.blend_mode == other.blend_mode && self.textures.is_compatible_with(&other.textures)
+    }
+}
+
+pub struct BatchRects {
+    /// Union of all of the batch's item rects.
+    ///
+    /// Very often we can skip iterating over item rects by testing against
+    /// this one first.
+    batch: PictureRect,
+    /// When the batch rectangle above isn't a good enough approximation, we
+    /// store per item rects.
+    items: Option<Vec<PictureRect>>,
+}
+
+impl BatchRects {
+    fn new() -> Self {
+        BatchRects {
+            batch: PictureRect::zero(),
+            items: None,
+        }
+    }
+
+    #[inline]
+    fn add_rect(&mut self, rect: &PictureRect) {
+        let union = self.batch.union(rect);
+        // If we have already started storing per-item rects, continue doing so.
+        // Otherwise, check whether only storing the batch rect is a good enough
+        // approximation.
+        if let Some(items) = &mut self.items {
+            items.push(*rect);
+        } else if self.batch.area() + rect.area() < union.area() {
+            let mut items = Vec::with_capacity(16);
+            items.push(self.batch);
+            items.push(*rect);
+            self.items = Some(items);
+        }
+
+        self.batch = union;
+    }
+
+    #[inline]
+    fn intersects(&mut self, rect: &PictureRect) -> bool {
+        if !self.batch.intersects(rect) {
+            return false;
+        }
+
+        if let Some(items) = &self.items {
+            items.iter().any(|item| item.intersects(rect))
+        } else {
+            // If we don't have per-item rects it means the batch rect is a good
+            // enough approximation and we didn't bother storing per-rect items.
+            true
+        }
+    }
+}
+
+
+pub struct AlphaBatchList {
+    pub batches: Vec<PrimitiveBatch>,
+    pub batch_rects: Vec<BatchRects>,
+    current_batch_index: usize,
+    current_z_id: ZBufferId,
+    break_advanced_blend_batches: bool,
+}
+
+impl AlphaBatchList {
+    fn new(break_advanced_blend_batches: bool, preallocate: usize) -> Self {
+        AlphaBatchList {
+            batches: Vec::with_capacity(preallocate),
+            batch_rects: Vec::with_capacity(preallocate),
+            current_z_id: ZBufferId::invalid(),
+            current_batch_index: usize::MAX,
+            break_advanced_blend_batches,
+        }
+    }
+
+    /// Clear all current batches in this list. This is typically used
+    /// when a primitive is encountered that occludes all previous
+    /// content in this batch list.
+    fn clear(&mut self) {
+        self.current_batch_index = usize::MAX;
+        self.current_z_id = ZBufferId::invalid();
+        self.batches.clear();
+        self.batch_rects.clear();
+    }
+
+    pub fn set_params_and_get_batch(
+        &mut self,
+        key: BatchKey,
+        features: BatchFeatures,
+        // The bounding box of everything at this Z plane. We expect potentially
+        // multiple primitive segments coming with the same `z_id`.
+        z_bounding_rect: &PictureRect,
+        z_id: ZBufferId,
+    ) -> &mut Vec<PrimitiveInstanceData> {
+        if z_id != self.current_z_id ||
+           self.current_batch_index == usize::MAX ||
+           !self.batches[self.current_batch_index].key.is_compatible_with(&key)
+        {
+            let mut selected_batch_index = None;
+
+            match key.blend_mode {
+                BlendMode::SubpixelWithBgColor => {
+                    for (batch_index, batch) in self.batches.iter().enumerate().rev() {
+                        // Some subpixel batches are drawn in two passes. Because of this, we need
+                        // to check for overlaps with every batch (which is a bit different
+                        // than the normal batching below).
+                        if self.batch_rects[batch_index].intersects(z_bounding_rect) {
+                            break;
+                        }
+
+                        if batch.key.is_compatible_with(&key) {
+                            selected_batch_index = Some(batch_index);
+                            break;
+                        }
+                    }
+                }
+                BlendMode::Advanced(_) if self.break_advanced_blend_batches => {
+                    // don't try to find a batch
+                }
+                _ => {
+                    for (batch_index, batch) in self.batches.iter().enumerate().rev() {
+                        // For normal batches, we only need to check for overlaps for batches
+                        // other than the first batch we consider. If the first batch
+                        // is compatible, then we know there isn't any potential overlap
+                        // issues to worry about.
+                        if batch.key.is_compatible_with(&key) {
+                            selected_batch_index = Some(batch_index);
+                            break;
+                        }
+
+                        // check for intersections
+                        if self.batch_rects[batch_index].intersects(z_bounding_rect) {
+                            break;
+                        }
+                    }
+                }
+            }
+
+            if selected_batch_index.is_none() {
+                // Text runs tend to have a lot of instances per batch, causing a lot of reallocation
+                // churn as items are added one by one, so we give it a head start. Ideally we'd start
+                // with a larger number, closer to 1k but in some bad cases with lots of batch break
+                // we would be wasting a lot of memory.
+                // Generally it is safe to preallocate small-ish values for other batch kinds because
+                // the items are small and there are no zero-sized batches so there will always be
+                // at least one allocation.
+                let prealloc = match key.kind {
+                    BatchKind::TextRun(..) => 128,
+                    _ => 16,
+                };
+                let mut new_batch = PrimitiveBatch::new(key);
+                new_batch.instances.reserve(prealloc);
+                selected_batch_index = Some(self.batches.len());
+                self.batches.push(new_batch);
+                self.batch_rects.push(BatchRects::new());
+            }
+
+            self.current_batch_index = selected_batch_index.unwrap();
+            self.batch_rects[self.current_batch_index].add_rect(z_bounding_rect);
+            self.current_z_id = z_id;
+        }
+
+        let batch = &mut self.batches[self.current_batch_index];
+        batch.features |= features;
+        batch.key.textures.merge(&key.textures);
+
+        &mut batch.instances
+    }
+}
+
+pub struct OpaqueBatchList {
+    pub pixel_area_threshold_for_new_batch: f32,
+    pub batches: Vec<PrimitiveBatch>,
+    pub current_batch_index: usize,
+    lookback_count: usize,
+}
+
+impl OpaqueBatchList {
+    fn new(pixel_area_threshold_for_new_batch: f32, lookback_count: usize) -> Self {
+        OpaqueBatchList {
+            batches: Vec::new(),
+            pixel_area_threshold_for_new_batch,
+            current_batch_index: usize::MAX,
+            lookback_count,
+        }
+    }
+
+    /// Clear all current batches in this list. This is typically used
+    /// when a primitive is encountered that occludes all previous
+    /// content in this batch list.
+    fn clear(&mut self) {
+        self.current_batch_index = usize::MAX;
+        self.batches.clear();
+    }
+
+    pub fn set_params_and_get_batch(
+        &mut self,
+        key: BatchKey,
+        features: BatchFeatures,
+        // The bounding box of everything at the current Z, whatever it is. We expect potentially
+        // multiple primitive segments produced by a primitive, which we allow to check
+        // `current_batch_index` instead of iterating the batches.
+        z_bounding_rect: &PictureRect,
+    ) -> &mut Vec<PrimitiveInstanceData> {
+        if self.current_batch_index == usize::MAX ||
+           !self.batches[self.current_batch_index].key.is_compatible_with(&key) {
+            let mut selected_batch_index = None;
+            let item_area = z_bounding_rect.area();
+
+            // If the area of this primitive is larger than the given threshold,
+            // then it is large enough to warrant breaking a batch for. In this
+            // case we just see if it can be added to the existing batch or
+            // create a new one.
+            if item_area > self.pixel_area_threshold_for_new_batch {
+                if let Some(batch) = self.batches.last() {
+                    if batch.key.is_compatible_with(&key) {
+                        selected_batch_index = Some(self.batches.len() - 1);
+                    }
+                }
+            } else {
+                // Otherwise, look back through a reasonable number of batches.
+                for (batch_index, batch) in self.batches.iter().enumerate().rev().take(self.lookback_count) {
+                    if batch.key.is_compatible_with(&key) {
+                        selected_batch_index = Some(batch_index);
+                        break;
+                    }
+                }
+            }
+
+            if selected_batch_index.is_none() {
+                let new_batch = PrimitiveBatch::new(key);
+                selected_batch_index = Some(self.batches.len());
+                self.batches.push(new_batch);
+            }
+
+            self.current_batch_index = selected_batch_index.unwrap();
+        }
+
+        let batch = &mut self.batches[self.current_batch_index];
+        batch.features |= features;
+        batch.key.textures.merge(&key.textures);
+
+        &mut batch.instances
+    }
+
+    fn finalize(&mut self) {
+        // Reverse the instance arrays in the opaque batches
+        // to get maximum z-buffer efficiency by drawing
+        // front-to-back.
+        // TODO(gw): Maybe we can change the batch code to
+        //           build these in reverse and avoid having
+        //           to reverse the instance array here.
+        for batch in &mut self.batches {
+            batch.instances.reverse();
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveBatch {
+    pub key: BatchKey,
+    pub instances: Vec<PrimitiveInstanceData>,
+    pub features: BatchFeatures,
+}
+
+bitflags! {
+    /// Features of the batch that, if not requested, may allow a fast-path.
+    ///
+    /// Rather than breaking batches when primitives request different features,
+    /// we always request the minimum amount of features to satisfy all items in
+    /// the batch.
+    /// The goal is to let the renderer be optionally select more specialized
+    /// versions of a shader if the batch doesn't require code certain code paths.
+    /// Not all shaders necessarily implement all of these features.
+    #[cfg_attr(feature = "capture", derive(Serialize))]
+    #[cfg_attr(feature = "replay", derive(Deserialize))]
+    pub struct BatchFeatures: u8 {
+        const ALPHA_PASS = 1 << 0;
+        const ANTIALIASING = 1 << 1;
+        const REPETITION = 1 << 2;
+        /// Indicates a primitive in this batch may use a clip mask.
+        const CLIP_MASK = 1 << 3;
+    }
+}
+
+impl PrimitiveBatch {
+    fn new(key: BatchKey) -> PrimitiveBatch {
+        PrimitiveBatch {
+            key,
+            instances: Vec::new(),
+            features: BatchFeatures::empty(),
+        }
+    }
+
+    fn merge(&mut self, other: PrimitiveBatch) {
+        self.instances.extend(other.instances);
+        self.features |= other.features;
+        self.key.textures.merge(&other.key.textures);
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct AlphaBatchContainer {
+    pub opaque_batches: Vec<PrimitiveBatch>,
+    pub alpha_batches: Vec<PrimitiveBatch>,
+    /// The overall scissor rect for this render task, if one
+    /// is required.
+    pub task_scissor_rect: Option<DeviceIntRect>,
+    /// The rectangle of the owning render target that this
+    /// set of batches affects.
+    pub task_rect: DeviceIntRect,
+}
+
+impl AlphaBatchContainer {
+    pub fn new(
+        task_scissor_rect: Option<DeviceIntRect>,
+    ) -> AlphaBatchContainer {
+        AlphaBatchContainer {
+            opaque_batches: Vec::new(),
+            alpha_batches: Vec::new(),
+            task_scissor_rect,
+            task_rect: DeviceIntRect::zero(),
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.opaque_batches.is_empty() &&
+        self.alpha_batches.is_empty()
+    }
+
+    fn merge(&mut self, builder: AlphaBatchBuilder, task_rect: &DeviceIntRect) {
+        self.task_rect = self.task_rect.union(task_rect);
+
+        for other_batch in builder.opaque_batch_list.batches {
+            let batch_index = self.opaque_batches.iter().position(|batch| {
+                batch.key.is_compatible_with(&other_batch.key)
+            });
+
+            match batch_index {
+                Some(batch_index) => {
+                    self.opaque_batches[batch_index].merge(other_batch);
+                }
+                None => {
+                    self.opaque_batches.push(other_batch);
+                }
+            }
+        }
+
+        let mut min_batch_index = 0;
+
+        for other_batch in builder.alpha_batch_list.batches {
+            let batch_index = self.alpha_batches.iter().skip(min_batch_index).position(|batch| {
+                batch.key.is_compatible_with(&other_batch.key)
+            });
+
+            match batch_index {
+                Some(batch_index) => {
+                    let index = batch_index + min_batch_index;
+                    self.alpha_batches[index].merge(other_batch);
+                    min_batch_index = index;
+                }
+                None => {
+                    self.alpha_batches.push(other_batch);
+                    min_batch_index = self.alpha_batches.len();
+                }
+            }
+        }
+    }
+}
+
+/// Each segment can optionally specify a per-segment
+/// texture set and one user data field.
+#[derive(Debug, Copy, Clone)]
+struct SegmentInstanceData {
+    textures: TextureSet,
+    specific_resource_address: i32,
+}
+
+/// Encapsulates the logic of building batches for items that are blended.
+pub struct AlphaBatchBuilder {
+    pub alpha_batch_list: AlphaBatchList,
+    pub opaque_batch_list: OpaqueBatchList,
+    pub render_task_id: RenderTaskId,
+    render_task_address: RenderTaskAddress,
+}
+
+impl AlphaBatchBuilder {
+    pub fn new(
+        screen_size: DeviceIntSize,
+        break_advanced_blend_batches: bool,
+        lookback_count: usize,
+        render_task_id: RenderTaskId,
+        render_task_address: RenderTaskAddress,
+    ) -> Self {
+        // The threshold for creating a new batch is
+        // one quarter the screen size.
+        let batch_area_threshold = (screen_size.width * screen_size.height) as f32 / 4.0;
+
+        AlphaBatchBuilder {
+            alpha_batch_list: AlphaBatchList::new(break_advanced_blend_batches, 128),
+            opaque_batch_list: OpaqueBatchList::new(batch_area_threshold, lookback_count),
+            render_task_id,
+            render_task_address,
+        }
+    }
+
+    /// Clear all current batches in this builder. This is typically used
+    /// when a primitive is encountered that occludes all previous
+    /// content in this batch list.
+    fn clear(&mut self) {
+        self.alpha_batch_list.clear();
+        self.opaque_batch_list.clear();
+    }
+
+    pub fn build(
+        mut self,
+        batch_containers: &mut Vec<AlphaBatchContainer>,
+        merged_batches: &mut AlphaBatchContainer,
+        task_rect: DeviceIntRect,
+        task_scissor_rect: Option<DeviceIntRect>,
+    ) {
+        self.opaque_batch_list.finalize();
+
+        if task_scissor_rect.is_none() {
+            merged_batches.merge(self, &task_rect);
+        } else {
+            batch_containers.push(AlphaBatchContainer {
+                alpha_batches: self.alpha_batch_list.batches,
+                opaque_batches: self.opaque_batch_list.batches,
+                task_scissor_rect,
+                task_rect,
+            });
+        }
+    }
+
+    pub fn push_single_instance(
+        &mut self,
+        key: BatchKey,
+        features: BatchFeatures,
+        bounding_rect: &PictureRect,
+        z_id: ZBufferId,
+        instance: PrimitiveInstanceData,
+    ) {
+        self.set_params_and_get_batch(key, features, bounding_rect, z_id)
+            .push(instance);
+    }
+
+    pub fn set_params_and_get_batch(
+        &mut self,
+        key: BatchKey,
+        features: BatchFeatures,
+        bounding_rect: &PictureRect,
+        z_id: ZBufferId,
+    ) -> &mut Vec<PrimitiveInstanceData> {
+        match key.blend_mode {
+            BlendMode::None => {
+                self.opaque_batch_list
+                    .set_params_and_get_batch(key, features, bounding_rect)
+            }
+            BlendMode::Alpha |
+            BlendMode::PremultipliedAlpha |
+            BlendMode::PremultipliedDestOut |
+            BlendMode::SubpixelWithBgColor |
+            BlendMode::SubpixelDualSource |
+            BlendMode::Advanced(_) |
+            BlendMode::MultiplyDualSource |
+            BlendMode::Screen |
+            BlendMode::Exclusion |
+            BlendMode::PlusLighter => {
+                self.alpha_batch_list
+                    .set_params_and_get_batch(key, features, bounding_rect, z_id)
+            }
+        }
+    }
+}
+
+/// Supports (recursively) adding a list of primitives and pictures to an alpha batch
+/// builder. In future, it will support multiple dirty regions / slices, allowing the
+/// contents of a picture to be spliced into multiple batch builders.
+pub struct BatchBuilder {
+    /// A temporary buffer that is used during glyph fetching, stored here
+    /// to reduce memory allocations.
+    glyph_fetch_buffer: Vec<GlyphFetchResult>,
+
+    batcher: AlphaBatchBuilder,
+}
+
+impl BatchBuilder {
+    pub fn new(batcher: AlphaBatchBuilder) -> Self {
+        BatchBuilder {
+            glyph_fetch_buffer: Vec::new(),
+            batcher,
+        }
+    }
+
+    pub fn finalize(self) -> AlphaBatchBuilder {
+        self.batcher
+    }
+
+    fn add_brush_instance_to_batches(
+        &mut self,
+        batch_key: BatchKey,
+        features: BatchFeatures,
+        bounding_rect: &PictureRect,
+        z_id: ZBufferId,
+        segment_index: i32,
+        edge_flags: EdgeAaSegmentMask,
+        clip_task_address: RenderTaskAddress,
+        brush_flags: BrushFlags,
+        prim_header_index: PrimitiveHeaderIndex,
+        resource_address: i32,
+    ) {
+        let render_task_address = self.batcher.render_task_address;
+
+        let instance = BrushInstance {
+            segment_index,
+            edge_flags,
+            clip_task_address,
+            render_task_address,
+            brush_flags,
+            prim_header_index,
+            resource_address,
+        };
+
+        self.batcher.push_single_instance(
+            batch_key,
+            features,
+            bounding_rect,
+            z_id,
+            PrimitiveInstanceData::from(instance),
+        );
+    }
+
+    fn add_split_composite_instance_to_batches(
+        &mut self,
+        batch_key: BatchKey,
+        features: BatchFeatures,
+        bounding_rect: &PictureRect,
+        z_id: ZBufferId,
+        prim_header_index: PrimitiveHeaderIndex,
+        polygons_address: i32,
+    ) {
+        let render_task_address = self.batcher.render_task_address;
+
+        self.batcher.push_single_instance(
+            batch_key,
+            features,
+            bounding_rect,
+            z_id,
+            PrimitiveInstanceData::from(SplitCompositeInstance {
+                prim_header_index,
+                render_task_address,
+                polygons_address,
+                z: z_id,
+            }),
+        );
+    }
+
+    /// Clear all current batchers. This is typically used when a primitive
+    /// is encountered that occludes all previous content in this batch list.
+    fn clear_batches(&mut self) {
+        self.batcher.clear();
+    }
+
+    // Adds a primitive to a batch.
+    // It can recursively call itself in some situations, for
+    // example if it encounters a picture where the items
+    // in that picture are being drawn into the same target.
+    pub fn add_prim_to_batch(
+        &mut self,
+        cmd: &PrimitiveCommand,
+        prim_spatial_node_index: SpatialNodeIndex,
+        ctx: &RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskGraph,
+        prim_headers: &mut PrimitiveHeaders,
+        transforms: &mut TransformPalette,
+        root_spatial_node_index: SpatialNodeIndex,
+        surface_spatial_node_index: SpatialNodeIndex,
+        z_generator: &mut ZBufferIdGenerator,
+        prim_instances: &[PrimitiveInstance],
+        _gpu_buffer_builder: &mut GpuBufferBuilder,
+    ) {
+        let (prim_instance_index, extra_prim_gpu_address) = match cmd {
+            PrimitiveCommand::Simple { prim_instance_index } => {
+                (prim_instance_index, None)
+            }
+            PrimitiveCommand::Complex { prim_instance_index, gpu_address } => {
+                (prim_instance_index, Some(gpu_address.as_int()))
+            }
+            PrimitiveCommand::Instance { prim_instance_index, gpu_buffer_address } => {
+                (prim_instance_index, Some(gpu_buffer_address.as_int()))
+            }
+        };
+
+        let prim_instance = &prim_instances[prim_instance_index.0 as usize];
+        let is_anti_aliased = ctx.data_stores.prim_has_anti_aliasing(prim_instance);
+
+        let brush_flags = if is_anti_aliased {
+            BrushFlags::FORCE_AA
+        } else {
+            BrushFlags::empty()
+        };
+
+        let vis_flags = match prim_instance.vis.state {
+            VisibilityState::Culled => {
+                return;
+            }
+            VisibilityState::PassThrough |
+            VisibilityState::Unset => {
+                panic!("bug: invalid visibility state");
+            }
+            VisibilityState::Visible { vis_flags, .. } => {
+                vis_flags
+            }
+        };
+
+        // If this primitive is a backdrop, that means that it is known to cover
+        // the entire picture cache background. In that case, the renderer will
+        // use the backdrop color as a clear color, and so we can drop this
+        // primitive and any prior primitives from the batch lists for this
+        // picture cache slice.
+        if vis_flags.contains(PrimitiveVisibilityFlags::IS_BACKDROP) {
+            self.clear_batches();
+            return;
+        }
+
+        let transform_id = transforms
+            .get_id(
+                prim_spatial_node_index,
+                root_spatial_node_index,
+                ctx.spatial_tree,
+            );
+
+        // TODO(gw): Calculating this for every primitive is a bit
+        //           wasteful. We should probably cache this in
+        //           the scroll node...
+        let transform_kind = transform_id.transform_kind();
+        let prim_info = &prim_instance.vis;
+        let bounding_rect = &prim_info.clip_chain.pic_coverage_rect;
+
+        let z_id = z_generator.next();
+
+        let prim_rect = ctx.data_stores.get_local_prim_rect(
+            prim_instance,
+            &ctx.prim_store.pictures,
+            ctx.surfaces,
+        );
+
+        let mut batch_features = BatchFeatures::empty();
+        if ctx.data_stores.prim_may_need_repetition(prim_instance) {
+            batch_features |= BatchFeatures::REPETITION;
+        }
+
+        if transform_kind != TransformedRectKind::AxisAligned || is_anti_aliased {
+            batch_features |= BatchFeatures::ANTIALIASING;
+        }
+
+        // Check if the primitive might require a clip mask.
+        if prim_info.clip_task_index != ClipTaskIndex::INVALID {
+            batch_features |= BatchFeatures::CLIP_MASK;
+        }
+
+        if !bounding_rect.is_empty() {
+            debug_assert_eq!(prim_info.clip_chain.pic_spatial_node_index, surface_spatial_node_index,
+                "The primitive's bounding box is specified in a different coordinate system from the current batch!");
+        }
+
+        match prim_instance.kind {
+            PrimitiveInstanceKind::Clear { data_handle } => {
+                let prim_data = &ctx.data_stores.prim[data_handle];
+                let prim_cache_address = gpu_cache.get_address(&prim_data.gpu_cache_handle);
+
+                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                    prim_info.clip_task_index,
+                    render_tasks,
+                ).unwrap();
+
+                // TODO(gw): We can abstract some of the common code below into
+                //           helper methods, as we port more primitives to make
+                //           use of interning.
+
+                let prim_header = PrimitiveHeader {
+                    local_rect: prim_rect,
+                    local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                    specific_prim_address: prim_cache_address,
+                    transform_id,
+                };
+
+                let prim_header_index = prim_headers.push(
+                    &prim_header,
+                    z_id,
+                    [get_shader_opacity(1.0), 0, 0, 0],
+                );
+
+                let batch_key = BatchKey {
+                    blend_mode: BlendMode::PremultipliedDestOut,
+                    kind: BatchKind::Brush(BrushBatchKind::Solid),
+                    textures: BatchTextures::prim_untextured(clip_mask_texture_id),
+                };
+
+                self.add_brush_instance_to_batches(
+                    batch_key,
+                    batch_features,
+                    bounding_rect,
+                    z_id,
+                    INVALID_SEGMENT_INDEX,
+                    prim_data.edge_aa_mask,
+                    clip_task_address,
+                    brush_flags | BrushFlags::PERSPECTIVE_INTERPOLATION,
+                    prim_header_index,
+                    0,
+                );
+            }
+            PrimitiveInstanceKind::NormalBorder { data_handle, ref render_task_ids, .. } => {
+                let prim_data = &ctx.data_stores.normal_border[data_handle];
+                let common_data = &prim_data.common;
+                let prim_cache_address = gpu_cache.get_address(&common_data.gpu_cache_handle);
+                let task_ids = &ctx.scratch.border_cache_handles[*render_task_ids];
+                let specified_blend_mode = BlendMode::PremultipliedAlpha;
+                let mut segment_data: SmallVec<[SegmentInstanceData; 8]> = SmallVec::new();
+
+                // Collect the segment instance data from each render
+                // task for each valid edge / corner of the border.
+
+                for task_id in task_ids {
+                    if let Some((uv_rect_address, texture)) = render_tasks.resolve_location(*task_id, gpu_cache) {
+                        segment_data.push(
+                            SegmentInstanceData {
+                                textures: TextureSet::prim_textured(texture),
+                                specific_resource_address: uv_rect_address.as_int(),
+                            }
+                        );
+                    }
+                }
+
+                // TODO: it would be less error-prone to get this info from the texture cache.
+                let image_buffer_kind = ImageBufferKind::Texture2D;
+
+                let blend_mode = if !common_data.opacity.is_opaque ||
+                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
+                    transform_kind == TransformedRectKind::Complex ||
+                    is_anti_aliased
+                {
+                    specified_blend_mode
+                } else {
+                    BlendMode::None
+                };
+
+                let prim_header = PrimitiveHeader {
+                    local_rect: prim_rect,
+                    local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                    specific_prim_address: prim_cache_address,
+                    transform_id,
+                };
+
+                let batch_params = BrushBatchParameters::instanced(
+                    BrushBatchKind::Image(image_buffer_kind),
+                    ImageBrushData {
+                        color_mode: ShaderColorMode::Image,
+                        alpha_type: AlphaType::PremultipliedAlpha,
+                        raster_space: RasterizationSpace::Local,
+                        opacity: 1.0,
+                    }.encode(),
+                    segment_data,
+                );
+
+                let prim_header_index = prim_headers.push(
+                    &prim_header,
+                    z_id,
+                    batch_params.prim_user_data,
+                );
+
+                let border_data = &prim_data.kind;
+                self.add_segmented_prim_to_batch(
+                    Some(border_data.brush_segments.as_slice()),
+                    common_data.opacity,
+                    &batch_params,
+                    blend_mode,
+                    batch_features,
+                    brush_flags,
+                    common_data.edge_aa_mask,
+                    prim_header_index,
+                    bounding_rect,
+                    transform_kind,
+                    z_id,
+                    prim_info.clip_task_index,
+                    ctx,
+                    render_tasks,
+                );
+            }
+            PrimitiveInstanceKind::TextRun { data_handle, run_index, .. } => {
+                let run = &ctx.prim_store.text_runs[run_index];
+                let subpx_dir = run.used_font.get_subpx_dir();
+
+                // The GPU cache data is stored in the template and reused across
+                // frames and display lists.
+                let prim_data = &ctx.data_stores.text_run[data_handle];
+                let prim_cache_address = gpu_cache.get_address(&prim_data.gpu_cache_handle);
+
+                // The local prim rect is only informative for text primitives, as
+                // thus is not directly necessary for any drawing of the text run.
+                // However the glyph offsets are relative to the prim rect origin
+                // less the unsnapped reference frame offset. We also want the
+                // the snapped reference frame offset, because cannot recalculate
+                // it as it ignores the animated components for the transform. As
+                // such, we adjust the prim rect origin here, and replace the size
+                // with the unsnapped and snapped offsets respectively. This has
+                // the added bonus of avoiding quantization effects when storing
+                // floats in the extra header integers.
+                let prim_header = PrimitiveHeader {
+                    local_rect: LayoutRect {
+                        min: prim_rect.min - run.reference_frame_relative_offset,
+                        max: run.snapped_reference_frame_relative_offset.to_point(),
+                    },
+                    local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                    specific_prim_address: prim_cache_address,
+                    transform_id,
+                };
+
+                let glyph_keys = &ctx.scratch.glyph_keys[run.glyph_keys_range];
+                let prim_header_index = prim_headers.push(
+                    &prim_header,
+                    z_id,
+                    [
+                        (run.raster_scale * 65535.0).round() as i32,
+                        0,
+                        0,
+                        0,
+                    ],
+                );
+                let base_instance = GlyphInstance::new(
+                    prim_header_index,
+                );
+                let batcher = &mut self.batcher;
+
+                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                    prim_info.clip_task_index,
+                    render_tasks,
+                ).unwrap();
+
+                // The run.used_font.clone() is here instead of instead of inline in the `fetch_glyph`
+                // function call to work around a miscompilation.
+                // https://github.com/rust-lang/rust/issues/80111
+                let font = run.used_font.clone();
+                ctx.resource_cache.fetch_glyphs(
+                    font,
+                    &glyph_keys,
+                    &mut self.glyph_fetch_buffer,
+                    gpu_cache,
+                    |texture_id, glyph_format, glyphs| {
+                        debug_assert_ne!(texture_id, TextureSource::Invalid);
+
+                        let subpx_dir = subpx_dir.limit_by(glyph_format);
+
+                        let textures = BatchTextures::prim_textured(
+                            texture_id,
+                            clip_mask_texture_id,
+                        );
+
+                        let kind = BatchKind::TextRun(glyph_format);
+
+                        let (blend_mode, color_mode) = match glyph_format {
+                            GlyphFormat::Subpixel |
+                            GlyphFormat::TransformedSubpixel => {
+                                if run.used_font.bg_color.a != 0 {
+                                    (
+                                        BlendMode::SubpixelWithBgColor,
+                                        ShaderColorMode::FromRenderPassMode,
+                                    )
+                                } else {
+                                    debug_assert!(ctx.use_dual_source_blending);
+                                    (
+                                        BlendMode::SubpixelDualSource,
+                                        ShaderColorMode::SubpixelDualSource,
+                                    )
+                                }
+                            }
+                            GlyphFormat::Alpha |
+                            GlyphFormat::TransformedAlpha |
+                            GlyphFormat::Bitmap => {
+                                (
+                                    BlendMode::PremultipliedAlpha,
+                                    ShaderColorMode::Alpha,
+                                )
+                            }
+                            GlyphFormat::ColorBitmap => {
+                                (
+                                    BlendMode::PremultipliedAlpha,
+                                    if run.shadow {
+                                        // Ignore color and only sample alpha when shadowing.
+                                        ShaderColorMode::BitmapShadow
+                                    } else {
+                                        ShaderColorMode::ColorBitmap
+                                    },
+                                )
+                            }
+                        };
+
+                        // Calculate a tighter bounding rect of just the glyphs passed to this
+                        // callback from request_glyphs(), rather than using the bounds of the
+                        // entire text run. This improves batching when glyphs are fragmented
+                        // over multiple textures in the texture cache.
+                        // This code is taken from the ps_text_run shader.
+                        let tight_bounding_rect = {
+                            let snap_bias = match subpx_dir {
+                                SubpixelDirection::None => DeviceVector2D::new(0.5, 0.5),
+                                SubpixelDirection::Horizontal => DeviceVector2D::new(0.125, 0.5),
+                                SubpixelDirection::Vertical => DeviceVector2D::new(0.5, 0.125),
+                                SubpixelDirection::Mixed => DeviceVector2D::new(0.125, 0.125),
+                            };
+                            let text_offset = prim_header.local_rect.max.to_vector();
+
+                            let pic_bounding_rect = if run.used_font.flags.contains(FontInstanceFlags::TRANSFORM_GLYPHS) {
+                                let mut device_bounding_rect = DeviceRect::default();
+
+                                let glyph_transform = ctx.spatial_tree.get_relative_transform(
+                                    prim_spatial_node_index,
+                                    root_spatial_node_index,
+                                ).into_transform()
+                                    .with_destination::<WorldPixel>()
+                                    .then(&euclid::Transform3D::from_scale(ctx.global_device_pixel_scale));
+
+                                let glyph_translation = DeviceVector2D::new(glyph_transform.m41, glyph_transform.m42);
+
+                                let mut use_tight_bounding_rect = true;
+                                for glyph in glyphs {
+                                    let glyph_offset = prim_data.glyphs[glyph.index_in_text_run as usize].point + prim_header.local_rect.min.to_vector();
+
+                                    let transformed_offset = match glyph_transform.transform_point2d(glyph_offset) {
+                                        Some(transformed_offset) => transformed_offset,
+                                        None => {
+                                            use_tight_bounding_rect = false;
+                                            break;
+                                        }
+                                    };
+                                    let raster_glyph_offset = (transformed_offset + snap_bias).floor();
+                                    let raster_text_offset = (
+                                        glyph_transform.transform_vector2d(text_offset) +
+                                        glyph_translation +
+                                        DeviceVector2D::new(0.5, 0.5)
+                                    ).floor() - glyph_translation;
+
+                                    let device_glyph_rect = DeviceRect::from_origin_and_size(
+                                        glyph.offset + raster_glyph_offset.to_vector() + raster_text_offset,
+                                        glyph.size.to_f32(),
+                                    );
+
+                                    device_bounding_rect = device_bounding_rect.union(&device_glyph_rect);
+                                }
+
+                                if use_tight_bounding_rect {
+                                    let map_device_to_surface: SpaceMapper<PicturePixel, DevicePixel> = SpaceMapper::new_with_target(
+                                        root_spatial_node_index,
+                                        surface_spatial_node_index,
+                                        device_bounding_rect,
+                                        ctx.spatial_tree,
+                                    );
+
+                                    match map_device_to_surface.unmap(&device_bounding_rect) {
+                                        Some(r) => r.intersection(bounding_rect),
+                                        None => Some(*bounding_rect),
+                                    }
+                                } else {
+                                    Some(*bounding_rect)
+                                }
+                            } else {
+                                let mut local_bounding_rect = LayoutRect::default();
+
+                                let glyph_raster_scale = run.raster_scale * ctx.global_device_pixel_scale.get();
+
+                                for glyph in glyphs {
+                                    let glyph_offset = prim_data.glyphs[glyph.index_in_text_run as usize].point + prim_header.local_rect.min.to_vector();
+                                    let glyph_scale = LayoutToDeviceScale::new(glyph_raster_scale / glyph.scale);
+                                    let raster_glyph_offset = (glyph_offset * LayoutToDeviceScale::new(glyph_raster_scale) + snap_bias).floor() / glyph.scale;
+                                    let local_glyph_rect = LayoutRect::from_origin_and_size(
+                                        (glyph.offset + raster_glyph_offset.to_vector()) / glyph_scale + text_offset,
+                                        glyph.size.to_f32() / glyph_scale,
+                                    );
+
+                                    local_bounding_rect = local_bounding_rect.union(&local_glyph_rect);
+                                }
+
+                                let map_prim_to_surface: SpaceMapper<LayoutPixel, PicturePixel> = SpaceMapper::new_with_target(
+                                    surface_spatial_node_index,
+                                    prim_spatial_node_index,
+                                    *bounding_rect,
+                                    ctx.spatial_tree,
+                                );
+                                map_prim_to_surface.map(&local_bounding_rect)
+                            };
+
+                            let intersected = match pic_bounding_rect {
+                                // The text run may have been clipped, for example if part of it is offscreen.
+                                // So intersect our result with the original bounding rect.
+                                Some(rect) => rect.intersection(bounding_rect).unwrap_or_else(PictureRect::zero),
+                                // If space mapping went off the rails, fall back to the old behavior.
+                                //TODO: consider skipping the glyph run completely in this case.
+                                None => *bounding_rect,
+                            };
+
+                            intersected
+                        };
+
+                        let key = BatchKey::new(kind, blend_mode, textures);
+
+                        let render_task_address = batcher.render_task_address;
+                        let batch = batcher.alpha_batch_list.set_params_and_get_batch(
+                            key,
+                            batch_features,
+                            &tight_bounding_rect,
+                            z_id,
+                        );
+
+                        batch.reserve(glyphs.len());
+                        for glyph in glyphs {
+                            batch.push(base_instance.build(
+                                render_task_address,
+                                clip_task_address,
+                                subpx_dir,
+                                glyph.index_in_text_run,
+                                glyph.uv_rect_address,
+                                color_mode,
+                            ));
+                        }
+                    },
+                );
+            }
+            PrimitiveInstanceKind::LineDecoration { data_handle, ref render_task, .. } => {
+                // The GPU cache data is stored in the template and reused across
+                // frames and display lists.
+                let common_data = &ctx.data_stores.line_decoration[data_handle].common;
+                let prim_cache_address = gpu_cache.get_address(&common_data.gpu_cache_handle);
+
+                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                    prim_info.clip_task_index,
+                    render_tasks,
+                ).unwrap();
+
+                let (batch_kind, textures, prim_user_data, specific_resource_address) = match render_task {
+                    Some(task_id) => {
+                        let (uv_rect_address, texture) = render_tasks.resolve_location(*task_id, gpu_cache).unwrap();
+                        let textures = BatchTextures::prim_textured(
+                            texture,
+                            clip_mask_texture_id,
+                        );
+                        (
+                            BrushBatchKind::Image(texture.image_buffer_kind()),
+                            textures,
+                            ImageBrushData {
+                                color_mode: ShaderColorMode::Image,
+                                alpha_type: AlphaType::PremultipliedAlpha,
+                                raster_space: RasterizationSpace::Local,
+                                opacity: 1.0,
+                            }.encode(),
+                            uv_rect_address.as_int(),
+                        )
+                    }
+                    None => {
+                        (
+                            BrushBatchKind::Solid,
+                            BatchTextures::prim_untextured(clip_mask_texture_id),
+                            [get_shader_opacity(1.0), 0, 0, 0],
+                            0,
+                        )
+                    }
+                };
+
+                // TODO(gw): We can abstract some of the common code below into
+                //           helper methods, as we port more primitives to make
+                //           use of interning.
+                let blend_mode = if !common_data.opacity.is_opaque ||
+                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
+                    transform_kind == TransformedRectKind::Complex ||
+                    is_anti_aliased
+                {
+                    BlendMode::PremultipliedAlpha
+                } else {
+                    BlendMode::None
+                };
+
+                let prim_header = PrimitiveHeader {
+                    local_rect: prim_rect,
+                    local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                    specific_prim_address: prim_cache_address,
+                    transform_id,
+                };
+
+                let prim_header_index = prim_headers.push(
+                    &prim_header,
+                    z_id,
+                    prim_user_data,
+                );
+
+                let batch_key = BatchKey {
+                    blend_mode,
+                    kind: BatchKind::Brush(batch_kind),
+                    textures,
+                };
+
+                self.add_brush_instance_to_batches(
+                    batch_key,
+                    batch_features,
+                    bounding_rect,
+                    z_id,
+                    INVALID_SEGMENT_INDEX,
+                    common_data.edge_aa_mask,
+                    clip_task_address,
+                    brush_flags | BrushFlags::PERSPECTIVE_INTERPOLATION,
+                    prim_header_index,
+                    specific_resource_address,
+                );
+            }
+            PrimitiveInstanceKind::Picture { pic_index, segment_instance_index, .. } => {
+                let picture = &ctx.prim_store.pictures[pic_index.0];
+                let blend_mode = BlendMode::PremultipliedAlpha;
+                let prim_cache_address = gpu_cache.get_address(&ctx.globals.default_image_handle);
+
+                match picture.raster_config {
+                    Some(ref raster_config) => {
+                        // If the child picture was rendered in local space, we can safely
+                        // interpolate the UV coordinates with perspective correction.
+                        let brush_flags = brush_flags | BrushFlags::PERSPECTIVE_INTERPOLATION;
+
+                        let surface = &ctx.surfaces[raster_config.surface_index.0];
+                        let mut local_clip_rect = prim_info.clip_chain.local_clip_rect;
+
+                        // If we are drawing with snapping enabled, form a simple transform that just applies
+                        // the scale / translation from the raster transform. Otherwise, in edge cases where the
+                        // intermediate surface has a non-identity but axis-aligned transform (e.g. a 180 degree
+                        // rotation) it can be applied twice.
+                        let transform_id = if surface.surface_spatial_node_index == surface.raster_spatial_node_index {
+                            transform_id
+                        } else {
+                            let map_local_to_raster = SpaceMapper::new_with_target(
+                                root_spatial_node_index,
+                                surface.surface_spatial_node_index,
+                                LayoutRect::max_rect(),
+                                ctx.spatial_tree,
+                            );
+
+                            let raster_rect = map_local_to_raster
+                                .map(&prim_rect)
+                                .unwrap();
+
+                            let sx = (raster_rect.max.x - raster_rect.min.x) / (prim_rect.max.x - prim_rect.min.x);
+                            let sy = (raster_rect.max.y - raster_rect.min.y) / (prim_rect.max.y - prim_rect.min.y);
+
+                            let tx = raster_rect.min.x - sx * prim_rect.min.x;
+                            let ty = raster_rect.min.y - sy * prim_rect.min.y;
+
+                            let transform = ScaleOffset::new(sx, sy, tx, ty);
+
+                            let raster_clip_rect = map_local_to_raster
+                                .map(&prim_info.clip_chain.local_clip_rect)
+                                .unwrap();
+                            local_clip_rect = transform.unmap_rect(&raster_clip_rect);
+
+                            transforms.get_custom(transform.to_transform())
+                        };
+
+                        let prim_header = PrimitiveHeader {
+                            local_rect: prim_rect,
+                            local_clip_rect,
+                            specific_prim_address: prim_cache_address,
+                            transform_id,
+                        };
+
+                        let mut is_opaque = prim_info.clip_task_index == ClipTaskIndex::INVALID
+                            && surface.is_opaque
+                            && transform_kind == TransformedRectKind::AxisAligned
+                            && !is_anti_aliased;
+
+                        let pic_task_id = picture.primary_render_task_id.unwrap();
+
+                        match raster_config.composite_mode {
+                            PictureCompositeMode::TileCache { .. } => {
+                                // TODO(gw): For now, TileCache is still a composite mode, even though
+                                //           it will only exist as a top level primitive and never
+                                //           be encountered during batching. Consider making TileCache
+                                //           a standalone type, not a picture.
+                            }
+                            PictureCompositeMode::IntermediateSurface { .. } => {
+                                // TODO(gw): As an optimization, support making this a pass-through
+                                //           and/or drawing directly from here when possible
+                                //           (e.g. if not wrapped by filters / different spatial node).
+                            }
+                            PictureCompositeMode::Filter(ref filter) => {
+                                assert!(filter.is_visible());
+                                match filter {
+                                    Filter::Blur { .. } => {
+                                        let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                                            prim_info.clip_task_index,
+                                            render_tasks,
+                                        ).unwrap();
+
+                                        let kind = BatchKind::Brush(
+                                            BrushBatchKind::Image(ImageBufferKind::Texture2D)
+                                        );
+
+                                        let (uv_rect_address, texture) = render_tasks.resolve_location(
+                                            pic_task_id,
+                                            gpu_cache,
+                                        ).unwrap();
+                                        let textures = BatchTextures::prim_textured(
+                                            texture,
+                                            clip_mask_texture_id,
+                                        );
+
+                                        let key = BatchKey::new(
+                                            kind,
+                                            blend_mode,
+                                            textures,
+                                        );
+                                        let prim_header_index = prim_headers.push(
+                                            &prim_header,
+                                            z_id,
+                                            ImageBrushData {
+                                                color_mode: ShaderColorMode::Image,
+                                                alpha_type: AlphaType::PremultipliedAlpha,
+                                                raster_space: RasterizationSpace::Screen,
+                                                opacity: 1.0,
+                                            }.encode(),
+                                        );
+
+                                        self.add_brush_instance_to_batches(
+                                            key,
+                                            batch_features,
+                                            bounding_rect,
+                                            z_id,
+                                            INVALID_SEGMENT_INDEX,
+                                            EdgeAaSegmentMask::all(),
+                                            clip_task_address,
+                                            brush_flags,
+                                            prim_header_index,
+                                            uv_rect_address.as_int(),
+                                        );
+                                    }
+                                    Filter::DropShadows(shadows) => {
+                                        let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                                            prim_info.clip_task_index,
+                                            render_tasks,
+                                        ).unwrap();
+
+                                        // Draw an instance per shadow first, following by the content.
+
+                                        // The shadows and the content get drawn as a brush image.
+                                        let kind = BatchKind::Brush(
+                                            BrushBatchKind::Image(ImageBufferKind::Texture2D),
+                                        );
+
+                                        // Gets the saved render task ID of the content, which is
+                                        // deeper in the render task graph than the direct child.
+                                        let secondary_id = picture.secondary_render_task_id.expect("no secondary!?");
+                                        let content_source = {
+                                            let secondary_task = &render_tasks[secondary_id];
+                                            let texture_id = secondary_task.get_target_texture();
+                                            TextureSource::TextureCache(
+                                                texture_id,
+                                                Swizzle::default(),
+                                            )
+                                        };
+
+                                        // Retrieve the UV rect addresses for shadow/content.
+                                        let (shadow_uv_rect_address, shadow_texture) = render_tasks.resolve_location(
+                                            pic_task_id,
+                                            gpu_cache,
+                                        ).unwrap();
+                                        let shadow_textures = BatchTextures::prim_textured(
+                                            shadow_texture,
+                                            clip_mask_texture_id,
+                                        );
+
+                                        let content_uv_rect_address = render_tasks[secondary_id]
+                                            .get_texture_address(gpu_cache)
+                                            .as_int();
+
+                                        // Build BatchTextures for shadow/content
+                                        let content_textures = BatchTextures::prim_textured(
+                                            content_source,
+                                            clip_mask_texture_id,
+                                        );
+
+                                        // Build batch keys for shadow/content
+                                        let shadow_key = BatchKey::new(kind, blend_mode, shadow_textures);
+                                        let content_key = BatchKey::new(kind, blend_mode, content_textures);
+
+                                        for (shadow, shadow_gpu_data) in shadows.iter().zip(picture.extra_gpu_data_handles.iter()) {
+                                            // Get the GPU cache address of the extra data handle.
+                                            let shadow_prim_address = gpu_cache.get_address(shadow_gpu_data);
+
+                                            let shadow_rect = prim_header.local_rect.translate(shadow.offset);
+
+                                            let shadow_prim_header = PrimitiveHeader {
+                                                local_rect: shadow_rect,
+                                                specific_prim_address: shadow_prim_address,
+                                                ..prim_header
+                                            };
+
+                                            let shadow_prim_header_index = prim_headers.push(
+                                                &shadow_prim_header,
+                                                z_id,
+                                                ImageBrushData {
+                                                    color_mode: ShaderColorMode::Alpha,
+                                                    alpha_type: AlphaType::PremultipliedAlpha,
+                                                    raster_space: RasterizationSpace::Screen,
+                                                    opacity: 1.0,
+                                                }.encode(),
+                                            );
+
+                                            self.add_brush_instance_to_batches(
+                                                shadow_key,
+                                                batch_features,
+                                                bounding_rect,
+                                                z_id,
+                                                INVALID_SEGMENT_INDEX,
+                                                EdgeAaSegmentMask::all(),
+                                                clip_task_address,
+                                                brush_flags,
+                                                shadow_prim_header_index,
+                                                shadow_uv_rect_address.as_int(),
+                                            );
+                                        }
+                                        let z_id_content = z_generator.next();
+
+                                        let content_prim_header_index = prim_headers.push(
+                                            &prim_header,
+                                            z_id_content,
+                                            ImageBrushData {
+                                                color_mode: ShaderColorMode::Image,
+                                                alpha_type: AlphaType::PremultipliedAlpha,
+                                                raster_space: RasterizationSpace::Screen,
+                                                opacity: 1.0,
+                                            }.encode(),
+                                        );
+
+                                        self.add_brush_instance_to_batches(
+                                            content_key,
+                                            batch_features,
+                                            bounding_rect,
+                                            z_id_content,
+                                            INVALID_SEGMENT_INDEX,
+                                            EdgeAaSegmentMask::all(),
+                                            clip_task_address,
+                                            brush_flags,
+                                            content_prim_header_index,
+                                            content_uv_rect_address,
+                                        );
+                                    }
+                                    Filter::Opacity(_, amount) => {
+                                        let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                                            prim_info.clip_task_index,
+                                            render_tasks,
+                                        ).unwrap();
+
+                                        let amount = (amount * 65536.0) as i32;
+
+                                        let (uv_rect_address, texture) = render_tasks.resolve_location(
+                                            pic_task_id,
+                                            gpu_cache,
+                                        ).unwrap();
+                                        let textures = BatchTextures::prim_textured(
+                                            texture,
+                                            clip_mask_texture_id,
+                                        );
+
+
+                                        let key = BatchKey::new(
+                                            BatchKind::Brush(BrushBatchKind::Opacity),
+                                            BlendMode::PremultipliedAlpha,
+                                            textures,
+                                        );
+
+                                        let prim_header_index = prim_headers.push(&prim_header, z_id, [
+                                            uv_rect_address.as_int(),
+                                            amount,
+                                            0,
+                                            0,
+                                        ]);
+
+                                        self.add_brush_instance_to_batches(
+                                            key,
+                                            batch_features,
+                                            bounding_rect,
+                                            z_id,
+                                            INVALID_SEGMENT_INDEX,
+                                            EdgeAaSegmentMask::all(),
+                                            clip_task_address,
+                                            brush_flags,
+                                            prim_header_index,
+                                            0,
+                                        );
+                                    }
+                                    _ => {
+                                        let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                                            prim_info.clip_task_index,
+                                            render_tasks,
+                                        ).unwrap();
+
+                                        // Must be kept in sync with brush_blend.glsl
+                                        let filter_mode = filter.as_int();
+
+                                        let user_data = match filter {
+                                            Filter::Identity => 0x10000i32, // matches `Contrast(1)`
+                                            Filter::Contrast(amount) |
+                                            Filter::Grayscale(amount) |
+                                            Filter::Invert(amount) |
+                                            Filter::Saturate(amount) |
+                                            Filter::Sepia(amount) |
+                                            Filter::Brightness(amount) => {
+                                                (amount * 65536.0) as i32
+                                            }
+                                            Filter::SrgbToLinear | Filter::LinearToSrgb => 0,
+                                            Filter::HueRotate(angle) => {
+                                                (0.01745329251 * angle * 65536.0) as i32
+                                            }
+                                            Filter::ColorMatrix(_) => {
+                                                picture.extra_gpu_data_handles[0].as_int(gpu_cache)
+                                            }
+                                            Filter::Flood(_) => {
+                                                picture.extra_gpu_data_handles[0].as_int(gpu_cache)
+                                            }
+
+                                            // These filters are handled via different paths.
+                                            Filter::ComponentTransfer |
+                                            Filter::Blur { .. } |
+                                            Filter::DropShadows(..) |
+                                            Filter::Opacity(..) => unreachable!(),
+                                        };
+
+                                        // Other filters that may introduce opacity are handled via different
+                                        // paths.
+                                        if let Filter::ColorMatrix(..) = filter {
+                                            is_opaque = false;
+                                        }
+
+                                        let (uv_rect_address, texture) = render_tasks.resolve_location(
+                                            pic_task_id,
+                                            gpu_cache,
+                                        ).unwrap();
+                                        let textures = BatchTextures::prim_textured(
+                                            texture,
+                                            clip_mask_texture_id,
+                                        );
+
+                                        let blend_mode = if is_opaque {
+                                            BlendMode::None
+                                        } else {
+                                            BlendMode::PremultipliedAlpha
+                                        };
+
+                                        let key = BatchKey::new(
+                                            BatchKind::Brush(BrushBatchKind::Blend),
+                                            blend_mode,
+                                            textures,
+                                        );
+
+                                        let prim_header_index = prim_headers.push(&prim_header, z_id, [
+                                            uv_rect_address.as_int(),
+                                            filter_mode,
+                                            user_data,
+                                            0,
+                                        ]);
+
+                                        self.add_brush_instance_to_batches(
+                                            key,
+                                            batch_features,
+                                            bounding_rect,
+                                            z_id,
+                                            INVALID_SEGMENT_INDEX,
+                                            EdgeAaSegmentMask::all(),
+                                            clip_task_address,
+                                            brush_flags,
+                                            prim_header_index,
+                                            0,
+                                        );
+                                    }
+                                }
+                            }
+                            PictureCompositeMode::ComponentTransferFilter(handle) => {
+                                // This is basically the same as the general filter case above
+                                // except we store a little more data in the filter mode and
+                                // a gpu cache handle in the user data.
+                                let filter_data = &ctx.data_stores.filter_data[handle];
+                                let filter_mode : i32 = Filter::ComponentTransfer.as_int() |
+                                    ((filter_data.data.r_func.to_int() << 28 |
+                                      filter_data.data.g_func.to_int() << 24 |
+                                      filter_data.data.b_func.to_int() << 20 |
+                                      filter_data.data.a_func.to_int() << 16) as i32);
+
+                                let user_data = filter_data.gpu_cache_handle.as_int(gpu_cache);
+
+                                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                                    prim_info.clip_task_index,
+                                    render_tasks,
+                                ).unwrap();
+
+                                let (uv_rect_address, texture) = render_tasks.resolve_location(
+                                    pic_task_id,
+                                    gpu_cache,
+                                ).unwrap();
+                                let textures = BatchTextures::prim_textured(
+                                    texture,
+                                    clip_mask_texture_id,
+                                );
+
+                                let key = BatchKey::new(
+                                    BatchKind::Brush(BrushBatchKind::Blend),
+                                    BlendMode::PremultipliedAlpha,
+                                    textures,
+                                );
+
+                                let prim_header_index = prim_headers.push(&prim_header, z_id, [
+                                    uv_rect_address.as_int(),
+                                    filter_mode,
+                                    user_data,
+                                    0,
+                                ]);
+
+                                self.add_brush_instance_to_batches(
+                                    key,
+                                    batch_features,
+                                    bounding_rect,
+                                    z_id,
+                                    INVALID_SEGMENT_INDEX,
+                                    EdgeAaSegmentMask::all(),
+                                    clip_task_address,
+                                    brush_flags,
+                                    prim_header_index,
+                                    0,
+                                );
+                            }
+                            PictureCompositeMode::MixBlend(mode) if BlendMode::from_mix_blend_mode(
+                                mode,
+                                ctx.use_advanced_blending,
+                                !ctx.break_advanced_blend_batches,
+                                ctx.use_dual_source_blending,
+                            ).is_some() => {
+                                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                                    prim_info.clip_task_index,
+                                    render_tasks,
+                                ).unwrap();
+
+                                let (uv_rect_address, texture) = render_tasks.resolve_location(
+                                    pic_task_id,
+                                    gpu_cache,
+                                ).unwrap();
+                                let textures = BatchTextures::prim_textured(
+                                    texture,
+                                    clip_mask_texture_id,
+                                );
+
+
+                                let key = BatchKey::new(
+                                    BatchKind::Brush(
+                                        BrushBatchKind::Image(ImageBufferKind::Texture2D),
+                                    ),
+                                    BlendMode::from_mix_blend_mode(
+                                        mode,
+                                        ctx.use_advanced_blending,
+                                        !ctx.break_advanced_blend_batches,
+                                        ctx.use_dual_source_blending,
+                                    ).unwrap(),
+                                    textures,
+                                );
+                                let prim_header_index = prim_headers.push(
+                                    &prim_header,
+                                    z_id,
+                                    ImageBrushData {
+                                        color_mode: match key.blend_mode {
+                                            BlendMode::MultiplyDualSource => ShaderColorMode::MultiplyDualSource,
+                                            _ => ShaderColorMode::Image,
+                                        },
+                                        alpha_type: AlphaType::PremultipliedAlpha,
+                                        raster_space: RasterizationSpace::Screen,
+                                        opacity: 1.0,
+                                    }.encode(),
+                                );
+
+                                self.add_brush_instance_to_batches(
+                                    key,
+                                    batch_features,
+                                    bounding_rect,
+                                    z_id,
+                                    INVALID_SEGMENT_INDEX,
+                                    EdgeAaSegmentMask::all(),
+                                    clip_task_address,
+                                    brush_flags,
+                                    prim_header_index,
+                                    uv_rect_address.as_int(),
+                                );
+                            }
+                            PictureCompositeMode::MixBlend(mode) => {
+                                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                                    prim_info.clip_task_index,
+                                    render_tasks,
+                                ).unwrap();
+                                let backdrop_id = picture.secondary_render_task_id.expect("no backdrop!?");
+
+                                let color0 = render_tasks[backdrop_id].get_target_texture();
+                                let color1 = render_tasks[pic_task_id].get_target_texture();
+
+                                // Create a separate brush instance for each batcher. For most cases,
+                                // there is only one batcher. However, in the case of drawing onto
+                                // a picture cache, there is one batcher per tile. Although not
+                                // currently used, the implementation of mix-blend-mode now supports
+                                // doing partial readbacks per-tile. In future, this will be enabled
+                                // and allow mix-blends to operate on picture cache surfaces without
+                                // a separate isolated intermediate surface.
+
+                                let render_task_address = self.batcher.render_task_address;
+
+                                let batch_key = BatchKey::new(
+                                    BatchKind::Brush(
+                                        BrushBatchKind::MixBlend {
+                                            task_id: self.batcher.render_task_id,
+                                            backdrop_id,
+                                        },
+                                    ),
+                                    BlendMode::PremultipliedAlpha,
+                                    BatchTextures {
+                                        input: TextureSet {
+                                            colors: [
+                                                TextureSource::TextureCache(
+                                                    color0,
+                                                    Swizzle::default(),
+                                                ),
+                                                TextureSource::TextureCache(
+                                                    color1,
+                                                    Swizzle::default(),
+                                                ),
+                                                TextureSource::Invalid,
+                                            ],
+                                        },
+                                        clip_mask: clip_mask_texture_id,
+                                    },
+                                );
+                                let src_uv_address = render_tasks[pic_task_id].get_texture_address(gpu_cache);
+                                let readback_uv_address = render_tasks[backdrop_id].get_texture_address(gpu_cache);
+                                let prim_header_index = prim_headers.push(&prim_header, z_id, [
+                                    mode as u32 as i32,
+                                    readback_uv_address.as_int(),
+                                    src_uv_address.as_int(),
+                                    0,
+                                ]);
+
+                                let instance = BrushInstance {
+                                    segment_index: INVALID_SEGMENT_INDEX,
+                                    edge_flags: EdgeAaSegmentMask::all(),
+                                    clip_task_address,
+                                    render_task_address,
+                                    brush_flags,
+                                    prim_header_index,
+                                    resource_address: 0,
+                                };
+
+                                self.batcher.push_single_instance(
+                                    batch_key,
+                                    batch_features,
+                                    bounding_rect,
+                                    z_id,
+                                    PrimitiveInstanceData::from(instance),
+                                );
+                            }
+                            PictureCompositeMode::Blit(_) => {
+                                match picture.context_3d {
+                                    Picture3DContext::In { root_data: Some(_), .. } => {
+                                        unreachable!("bug: should not have a raster_config");
+                                    }
+                                    Picture3DContext::In { root_data: None, .. } => {
+                                        // TODO(gw): Store this inside the split picture so that we
+                                        //           don't need to pass in extra_prim_gpu_address for
+                                        //           every prim instance.
+                                        // TODO(gw): Ideally we'd skip adding 3d child prims to batches
+                                        //           without gpu cache address but it's currently
+                                        //           used by the prepare pass. Refactor this!
+                                        let extra_prim_gpu_address = match extra_prim_gpu_address {
+                                            Some(prim_address) => prim_address,
+                                            None => return,
+                                        };
+
+                                        // Get clip task, if set, for the picture primitive.
+                                        let (child_clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                                            prim_info.clip_task_index,
+                                            render_tasks,
+                                        ).unwrap();
+
+                                        let prim_header = PrimitiveHeader {
+                                            local_rect: prim_rect,
+                                            local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                                            specific_prim_address: GpuCacheAddress::INVALID,
+                                            transform_id: transforms
+                                                .get_id(
+                                                    prim_spatial_node_index,
+                                                    root_spatial_node_index,
+                                                    ctx.spatial_tree,
+                                                ),
+                                        };
+
+                                        let child_pic_task_id = picture
+                                            .primary_render_task_id
+                                            .unwrap();
+
+                                        let (uv_rect_address, texture) = render_tasks.resolve_location(
+                                            child_pic_task_id,
+                                            gpu_cache,
+                                        ).unwrap();
+                                        let textures = BatchTextures::prim_textured(
+                                            texture,
+                                            clip_mask_texture_id,
+                                        );
+
+                                        // Need a new z-id for each child preserve-3d context added
+                                        // by this inner loop.
+                                        let z_id = z_generator.next();
+
+                                        let prim_header_index = prim_headers.push(&prim_header, z_id, [
+                                            uv_rect_address.as_int(),
+                                            BrushFlags::PERSPECTIVE_INTERPOLATION.bits() as i32,
+                                            0,
+                                            child_clip_task_address.0 as i32,
+                                        ]);
+
+                                        let key = BatchKey::new(
+                                            BatchKind::SplitComposite,
+                                            BlendMode::PremultipliedAlpha,
+                                            textures,
+                                        );
+
+                                        self.add_split_composite_instance_to_batches(
+                                            key,
+                                            BatchFeatures::CLIP_MASK,
+                                            &prim_info.clip_chain.pic_coverage_rect,
+                                            z_id,
+                                            prim_header_index,
+                                            extra_prim_gpu_address,
+                                        );
+                                    }
+                                    Picture3DContext::Out { .. } => {
+                                        let uv_rect_address = render_tasks[pic_task_id]
+                                            .get_texture_address(gpu_cache)
+                                            .as_int();
+                                        let cache_render_task = &render_tasks[pic_task_id];
+                                        let texture_id = cache_render_task.get_target_texture();
+                                        let textures = TextureSet {
+                                            colors: [
+                                                TextureSource::TextureCache(
+                                                    texture_id,
+                                                    Swizzle::default(),
+                                                ),
+                                                TextureSource::Invalid,
+                                                TextureSource::Invalid,
+                                            ],
+                                        };
+                                        let batch_params = BrushBatchParameters::shared(
+                                            BrushBatchKind::Image(ImageBufferKind::Texture2D),
+                                            textures,
+                                            ImageBrushData {
+                                                color_mode: ShaderColorMode::Image,
+                                                alpha_type: AlphaType::PremultipliedAlpha,
+                                                raster_space: RasterizationSpace::Screen,
+                                                opacity: 1.0,
+                                            }.encode(),
+                                            uv_rect_address,
+                                        );
+
+                                        let is_segmented =
+                                            segment_instance_index != SegmentInstanceIndex::INVALID &&
+                                            segment_instance_index != SegmentInstanceIndex::UNUSED;
+
+                                        let (prim_cache_address, segments) = if is_segmented {
+                                            let segment_instance = &ctx.scratch.segment_instances[segment_instance_index];
+                                            let segments = Some(&ctx.scratch.segments[segment_instance.segments_range]);
+                                            (gpu_cache.get_address(&segment_instance.gpu_cache_handle), segments)
+                                        } else {
+                                            (prim_cache_address, None)
+                                        };
+
+                                        let prim_header = PrimitiveHeader {
+                                            specific_prim_address: prim_cache_address,
+                                            ..prim_header
+                                        };
+
+                                        let prim_header_index = prim_headers.push(
+                                            &prim_header,
+                                            z_id,
+                                            batch_params.prim_user_data,
+                                        );
+
+                                        let (opacity, blend_mode) = if is_opaque {
+                                            (PrimitiveOpacity::opaque(), BlendMode::None)
+                                        } else {
+                                            (PrimitiveOpacity::translucent(), BlendMode::PremultipliedAlpha)
+                                        };
+
+                                        self.add_segmented_prim_to_batch(
+                                            segments,
+                                            opacity,
+                                            &batch_params,
+                                            blend_mode,
+                                            batch_features,
+                                            brush_flags,
+                                            EdgeAaSegmentMask::all(),
+                                            prim_header_index,
+                                            bounding_rect,
+                                            transform_kind,
+                                            z_id,
+                                            prim_info.clip_task_index,
+                                            ctx,
+                                            render_tasks,
+                                        );
+                                    }
+                                }
+                            }
+                            PictureCompositeMode::SvgFilter(..) => {
+                                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                                    prim_info.clip_task_index,
+                                    render_tasks,
+                                ).unwrap();
+
+                                let kind = BatchKind::Brush(
+                                    BrushBatchKind::Image(ImageBufferKind::Texture2D)
+                                );
+                                let (uv_rect_address, texture) = render_tasks.resolve_location(
+                                    pic_task_id,
+                                    gpu_cache,
+                                ).unwrap();
+                                let textures = BatchTextures::prim_textured(
+                                    texture,
+                                    clip_mask_texture_id,
+                                );
+                                let key = BatchKey::new(
+                                    kind,
+                                    blend_mode,
+                                    textures,
+                                );
+                                let prim_header_index = prim_headers.push(
+                                    &prim_header,
+                                    z_id,
+                                    ImageBrushData {
+                                        color_mode: ShaderColorMode::Image,
+                                        alpha_type: AlphaType::PremultipliedAlpha,
+                                        raster_space: RasterizationSpace::Screen,
+                                        opacity: 1.0,
+                                    }.encode(),
+                                );
+
+                                self.add_brush_instance_to_batches(
+                                    key,
+                                    batch_features,
+                                    bounding_rect,
+                                    z_id,
+                                    INVALID_SEGMENT_INDEX,
+                                    EdgeAaSegmentMask::all(),
+                                    clip_task_address,
+                                    brush_flags,
+                                    prim_header_index,
+                                    uv_rect_address.as_int(),
+                                );
+                            }
+                        }
+                    }
+                    None => {
+                        unreachable!();
+                    }
+                }
+            }
+            PrimitiveInstanceKind::ImageBorder { data_handle, .. } => {
+                let prim_data = &ctx.data_stores.image_border[data_handle];
+                let common_data = &prim_data.common;
+                let border_data = &prim_data.kind;
+
+                let (uv_rect_address, texture) = match render_tasks.resolve_location(border_data.src_color, gpu_cache) {
+                    Some(src) => src,
+                    None => {
+                        return;
+                    }
+                };
+
+                let textures = TextureSet::prim_textured(texture);
+                let prim_cache_address = gpu_cache.get_address(&common_data.gpu_cache_handle);
+                let blend_mode = if !common_data.opacity.is_opaque ||
+                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
+                    transform_kind == TransformedRectKind::Complex ||
+                    is_anti_aliased
+                {
+                    BlendMode::PremultipliedAlpha
+                } else {
+                    BlendMode::None
+                };
+
+                let prim_header = PrimitiveHeader {
+                    local_rect: prim_rect,
+                    local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                    specific_prim_address: prim_cache_address,
+                    transform_id,
+                };
+
+                let batch_params = BrushBatchParameters::shared(
+                    BrushBatchKind::Image(texture.image_buffer_kind()),
+                    textures,
+                    ImageBrushData {
+                        color_mode: ShaderColorMode::Image,
+                        alpha_type: AlphaType::PremultipliedAlpha,
+                        raster_space: RasterizationSpace::Local,
+                        opacity: 1.0,
+                    }.encode(),
+                    uv_rect_address.as_int(),
+                );
+
+                let prim_header_index = prim_headers.push(
+                    &prim_header,
+                    z_id,
+                    batch_params.prim_user_data,
+                );
+
+                self.add_segmented_prim_to_batch(
+                    Some(border_data.brush_segments.as_slice()),
+                    common_data.opacity,
+                    &batch_params,
+                    blend_mode,
+                    batch_features,
+                    brush_flags,
+                    common_data.edge_aa_mask,
+                    prim_header_index,
+                    bounding_rect,
+                    transform_kind,
+                    z_id,
+                    prim_info.clip_task_index,
+                    ctx,
+                    render_tasks,
+                );
+            }
+            PrimitiveInstanceKind::Rectangle { data_handle, segment_instance_index, .. } => {
+                let prim_data = &ctx.data_stores.prim[data_handle];
+
+                let blend_mode = if !prim_data.opacity.is_opaque ||
+                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
+                    transform_kind == TransformedRectKind::Complex ||
+                    is_anti_aliased
+                {
+                    BlendMode::PremultipliedAlpha
+                } else {
+                    BlendMode::None
+                };
+
+                let batch_params = BrushBatchParameters::shared(
+                    BrushBatchKind::Solid,
+                    TextureSet::UNTEXTURED,
+                    [get_shader_opacity(1.0), 0, 0, 0],
+                    0,
+                );
+
+                let (prim_cache_address, segments) = if segment_instance_index == SegmentInstanceIndex::UNUSED {
+                    (gpu_cache.get_address(&prim_data.gpu_cache_handle), None)
+                } else {
+                    let segment_instance = &ctx.scratch.segment_instances[segment_instance_index];
+                    let segments = Some(&ctx.scratch.segments[segment_instance.segments_range]);
+                    (gpu_cache.get_address(&segment_instance.gpu_cache_handle), segments)
+                };
+
+                let prim_header = PrimitiveHeader {
+                    local_rect: prim_rect,
+                    local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                    specific_prim_address: prim_cache_address,
+                    transform_id,
+                };
+
+                let prim_header_index = prim_headers.push(
+                    &prim_header,
+                    z_id,
+                    batch_params.prim_user_data,
+                );
+
+                self.add_segmented_prim_to_batch(
+                    segments,
+                    prim_data.opacity,
+                    &batch_params,
+                    blend_mode,
+                    batch_features,
+                    brush_flags,
+                    prim_data.edge_aa_mask,
+                    prim_header_index,
+                    bounding_rect,
+                    transform_kind,
+                    z_id,
+                    prim_info.clip_task_index,
+                    ctx,
+                    render_tasks,
+                );
+            }
+            PrimitiveInstanceKind::YuvImage { data_handle, segment_instance_index, is_compositor_surface, .. } => {
+                debug_assert!(!is_compositor_surface);
+
+                let yuv_image_data = &ctx.data_stores.yuv_image[data_handle].kind;
+                let mut textures = TextureSet::UNTEXTURED;
+                let mut uv_rect_addresses = [0; 3];
+
+                //yuv channel
+                let channel_count = yuv_image_data.format.get_plane_num();
+                debug_assert!(channel_count <= 3);
+                for channel in 0 .. channel_count {
+
+                    let src_channel = render_tasks.resolve_location(yuv_image_data.src_yuv[channel], gpu_cache);
+
+                    let (uv_rect_address, texture_source) = match src_channel {
+                        Some(src) => src,
+                        None => {
+                            warn!("Warnings: skip a PrimitiveKind::YuvImage");
+                            return;
+                        }
+                    };
+
+                    textures.colors[channel] = texture_source;
+                    uv_rect_addresses[channel] = uv_rect_address.as_int();
+                }
+
+                // All yuv textures should be the same type.
+                let buffer_kind = textures.colors[0].image_buffer_kind();
+                assert!(
+                    textures.colors[1 .. yuv_image_data.format.get_plane_num()]
+                        .iter()
+                        .all(|&tid| buffer_kind == tid.image_buffer_kind())
+                );
+
+                let kind = BrushBatchKind::YuvImage(
+                    buffer_kind,
+                    yuv_image_data.format,
+                    yuv_image_data.color_depth,
+                    yuv_image_data.color_space,
+                    yuv_image_data.color_range,
+                );
+
+                let batch_params = BrushBatchParameters::shared(
+                    kind,
+                    textures,
+                    [
+                        uv_rect_addresses[0],
+                        uv_rect_addresses[1],
+                        uv_rect_addresses[2],
+                        0,
+                    ],
+                    0,
+                );
+
+                let prim_common_data = ctx.data_stores.as_common_data(&prim_instance);
+
+                let blend_mode = if !prim_common_data.opacity.is_opaque ||
+                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
+                    transform_kind == TransformedRectKind::Complex ||
+                    is_anti_aliased
+                {
+                    BlendMode::PremultipliedAlpha
+                } else {
+                    BlendMode::None
+                };
+
+                debug_assert_ne!(segment_instance_index, SegmentInstanceIndex::INVALID);
+                let (prim_cache_address, segments) = if segment_instance_index == SegmentInstanceIndex::UNUSED {
+                    (gpu_cache.get_address(&prim_common_data.gpu_cache_handle), None)
+                } else {
+                    let segment_instance = &ctx.scratch.segment_instances[segment_instance_index];
+                    let segments = Some(&ctx.scratch.segments[segment_instance.segments_range]);
+                    (gpu_cache.get_address(&segment_instance.gpu_cache_handle), segments)
+                };
+
+                let prim_header = PrimitiveHeader {
+                    local_rect: prim_rect,
+                    local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                    specific_prim_address: prim_cache_address,
+                    transform_id,
+                };
+
+                let prim_header_index = prim_headers.push(
+                    &prim_header,
+                    z_id,
+                    batch_params.prim_user_data,
+                );
+
+                self.add_segmented_prim_to_batch(
+                    segments,
+                    prim_common_data.opacity,
+                    &batch_params,
+                    blend_mode,
+                    batch_features,
+                    brush_flags,
+                    prim_common_data.edge_aa_mask,
+                    prim_header_index,
+                    bounding_rect,
+                    transform_kind,
+                    z_id,
+                    prim_info.clip_task_index,
+                    ctx,
+                    render_tasks,
+                );
+            }
+            PrimitiveInstanceKind::Image { data_handle, image_instance_index, is_compositor_surface, .. } => {
+                debug_assert!(!is_compositor_surface);
+
+                let image_data = &ctx.data_stores.image[data_handle].kind;
+                let common_data = &ctx.data_stores.image[data_handle].common;
+                let image_instance = &ctx.prim_store.images[image_instance_index];
+                let prim_user_data = ImageBrushData {
+                    color_mode: ShaderColorMode::Image,
+                    alpha_type: image_data.alpha_type,
+                    raster_space: RasterizationSpace::Local,
+                    opacity: 1.0,
+                }.encode();
+
+                let blend_mode = if !common_data.opacity.is_opaque ||
+                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
+                    transform_kind == TransformedRectKind::Complex ||
+                    is_anti_aliased
+                {
+                    match image_data.alpha_type {
+                        AlphaType::PremultipliedAlpha => BlendMode::PremultipliedAlpha,
+                        AlphaType::Alpha => BlendMode::Alpha,
+                    }
+                } else {
+                    BlendMode::None
+                };
+
+                if image_instance.visible_tiles.is_empty() {
+                    if cfg!(debug_assertions) {
+                        match ctx.resource_cache.get_image_properties(image_data.key) {
+                            Some(ImageProperties { tiling: None, .. }) | None => (),
+                            other => panic!("Non-tiled image with no visible images detected! Properties {:?}", other),
+                        }
+                    }
+
+                    let src_color = render_tasks.resolve_location(image_instance.src_color, gpu_cache);
+
+                    let (uv_rect_address, texture_source) = match src_color {
+                        Some(src) => src,
+                        None => {
+                            return;
+                        }
+                    };
+
+                    let batch_params = BrushBatchParameters::shared(
+                        BrushBatchKind::Image(texture_source.image_buffer_kind()),
+                        TextureSet::prim_textured(texture_source),
+                        prim_user_data,
+                        uv_rect_address.as_int(),
+                    );
+
+                    debug_assert_ne!(image_instance.segment_instance_index, SegmentInstanceIndex::INVALID);
+                    let (prim_cache_address, segments) = if image_instance.segment_instance_index == SegmentInstanceIndex::UNUSED {
+                        (gpu_cache.get_address(&common_data.gpu_cache_handle), None)
+                    } else {
+                        let segment_instance = &ctx.scratch.segment_instances[image_instance.segment_instance_index];
+                        let segments = Some(&ctx.scratch.segments[segment_instance.segments_range]);
+                        (gpu_cache.get_address(&segment_instance.gpu_cache_handle), segments)
+                    };
+
+                    let prim_header = PrimitiveHeader {
+                        local_rect: prim_rect,
+                        local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                        specific_prim_address: prim_cache_address,
+                        transform_id,
+                    };
+
+                    let prim_header_index = prim_headers.push(
+                        &prim_header,
+                        z_id,
+                        batch_params.prim_user_data,
+                    );
+
+                    self.add_segmented_prim_to_batch(
+                        segments,
+                        common_data.opacity,
+                        &batch_params,
+                        blend_mode,
+                        batch_features,
+                        brush_flags,
+                        common_data.edge_aa_mask,
+                        prim_header_index,
+                        bounding_rect,
+                        transform_kind,
+                        z_id,
+                        prim_info.clip_task_index,
+                        ctx,
+                        render_tasks,
+                    );
+                } else {
+                    const VECS_PER_SPECIFIC_BRUSH: usize = 3;
+                    let max_tiles_per_header = (MAX_VERTEX_TEXTURE_WIDTH - VECS_PER_SPECIFIC_BRUSH) / VECS_PER_SEGMENT;
+
+                    let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                        prim_info.clip_task_index,
+                        render_tasks,
+                    ).unwrap();
+
+                    // use temporary block storage since we don't know the number of visible tiles beforehand
+                    let mut gpu_blocks = Vec::<GpuBlockData>::with_capacity(3 + max_tiles_per_header * 2);
+                    for chunk in image_instance.visible_tiles.chunks(max_tiles_per_header) {
+                        gpu_blocks.clear();
+                        gpu_blocks.push(image_data.color.premultiplied().into()); //color
+                        gpu_blocks.push(PremultipliedColorF::WHITE.into()); //bg color
+                        gpu_blocks.push([-1.0, 0.0, 0.0, 0.0].into()); //stretch size
+                        // negative first value makes the shader code ignore it and use the local size instead
+                        for tile in chunk {
+                            let tile_rect = tile.local_rect.translate(-prim_rect.min.to_vector());
+                            gpu_blocks.push(tile_rect.into());
+                            gpu_blocks.push(GpuBlockData::EMPTY);
+                        }
+
+                        let gpu_handle = gpu_cache.push_per_frame_blocks(&gpu_blocks);
+                        let prim_header = PrimitiveHeader {
+                            local_rect: prim_rect,
+                            local_clip_rect: image_instance.tight_local_clip_rect,
+                            specific_prim_address: gpu_cache.get_address(&gpu_handle),
+                            transform_id,
+                        };
+                        let prim_header_index = prim_headers.push(&prim_header, z_id, prim_user_data);
+
+                        for (i, tile) in chunk.iter().enumerate() {
+                            let (uv_rect_address, texture) = match render_tasks.resolve_location(tile.src_color, gpu_cache) {
+                                Some(result) => result,
+                                None => {
+                                    return;
+                                }
+                            };
+
+                            let textures = BatchTextures::prim_textured(
+                                texture,
+                                clip_mask_texture_id,
+                            );
+
+                            let batch_key = BatchKey {
+                                blend_mode,
+                                kind: BatchKind::Brush(BrushBatchKind::Image(texture.image_buffer_kind())),
+                                textures,
+                            };
+
+                            self.add_brush_instance_to_batches(
+                                batch_key,
+                                batch_features,
+                                bounding_rect,
+                                z_id,
+                                i as i32,
+                                tile.edge_flags,
+                                clip_task_address,
+                                brush_flags | BrushFlags::SEGMENT_RELATIVE | BrushFlags::PERSPECTIVE_INTERPOLATION,
+                                prim_header_index,
+                                uv_rect_address.as_int(),
+                            );
+                        }
+                    }
+                }
+            }
+            PrimitiveInstanceKind::LinearGradient { data_handle, ref visible_tiles_range, .. } => {
+                let prim_data = &ctx.data_stores.linear_grad[data_handle];
+
+                let mut prim_header = PrimitiveHeader {
+                    local_rect: prim_rect,
+                    local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                    specific_prim_address: GpuCacheAddress::INVALID,
+                    transform_id,
+                };
+
+                let blend_mode = if !prim_data.opacity.is_opaque ||
+                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
+                    transform_kind == TransformedRectKind::Complex ||
+                    is_anti_aliased
+                {
+                    BlendMode::PremultipliedAlpha
+                } else {
+                    BlendMode::None
+                };
+
+                let user_data = [extra_prim_gpu_address.unwrap(), 0, 0, 0];
+
+                if visible_tiles_range.is_empty() {
+                    let batch_params = BrushBatchParameters::shared(
+                        BrushBatchKind::LinearGradient,
+                        TextureSet::UNTEXTURED,
+                        user_data,
+                        0,
+                    );
+
+                    prim_header.specific_prim_address = gpu_cache.get_address(&prim_data.gpu_cache_handle);
+
+                    let prim_header_index = prim_headers.push(&prim_header, z_id, user_data);
+
+                    let segments = if prim_data.brush_segments.is_empty() {
+                        None
+                    } else {
+                        Some(prim_data.brush_segments.as_slice())
+                    };
+                    self.add_segmented_prim_to_batch(
+                        segments,
+                        prim_data.opacity,
+                        &batch_params,
+                        blend_mode,
+                        batch_features,
+                        brush_flags,
+                        prim_data.edge_aa_mask,
+                        prim_header_index,
+                        bounding_rect,
+                        transform_kind,
+                        z_id,
+                        prim_info.clip_task_index,
+                        ctx,
+                        render_tasks,
+                    );
+                } else {
+                    let visible_tiles = &ctx.scratch.gradient_tiles[*visible_tiles_range];
+
+                    let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                        prim_info.clip_task_index,
+                        render_tasks,
+                    ).unwrap();
+
+                    let key = BatchKey {
+                        blend_mode,
+                        kind: BatchKind::Brush(BrushBatchKind::LinearGradient),
+                        textures: BatchTextures::prim_untextured(clip_mask_texture_id),
+                    };
+
+                    for tile in visible_tiles {
+                        let tile_prim_header = PrimitiveHeader {
+                            specific_prim_address: gpu_cache.get_address(&tile.handle),
+                            local_rect: tile.local_rect,
+                            local_clip_rect: tile.local_clip_rect,
+                            ..prim_header
+                        };
+                        let prim_header_index = prim_headers.push(&tile_prim_header, z_id, user_data);
+
+                        self.add_brush_instance_to_batches(
+                            key,
+                            batch_features,
+                            bounding_rect,
+                            z_id,
+                            INVALID_SEGMENT_INDEX,
+                            prim_data.edge_aa_mask,
+                            clip_task_address,
+                            brush_flags | BrushFlags::PERSPECTIVE_INTERPOLATION,
+                            prim_header_index,
+                            0,
+                        );
+                    }
+                }
+            }
+            PrimitiveInstanceKind::CachedLinearGradient { data_handle, ref visible_tiles_range, .. } => {
+                let prim_data = &ctx.data_stores.linear_grad[data_handle];
+                let common_data = &prim_data.common;
+
+                let src_color = render_tasks.resolve_location(prim_data.src_color, gpu_cache);
+
+                let (uv_rect_address, texture_source) = match src_color {
+                    Some(src) => src,
+                    None => {
+                        return;
+                    }
+                };
+
+                let textures = TextureSet::prim_textured(texture_source);
+
+                let prim_header = PrimitiveHeader {
+                    local_rect: prim_rect,
+                    local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                    specific_prim_address: gpu_cache.get_address(&common_data.gpu_cache_handle),
+                    transform_id,
+                };
+
+                let prim_user_data = ImageBrushData {
+                    color_mode: ShaderColorMode::Image,
+                    alpha_type: AlphaType::PremultipliedAlpha,
+                    raster_space: RasterizationSpace::Local,
+                    opacity: 1.0,
+                }.encode();
+
+                let blend_mode = if !common_data.opacity.is_opaque ||
+                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
+                    transform_kind == TransformedRectKind::Complex ||
+                    is_anti_aliased
+                {
+                    BlendMode::PremultipliedAlpha
+                } else {
+                    BlendMode::None
+                };
+
+                let batch_kind = BrushBatchKind::Image(texture_source.image_buffer_kind());
+
+                if visible_tiles_range.is_empty() {
+                    let batch_params = BrushBatchParameters::shared(
+                        batch_kind,
+                        textures,
+                        prim_user_data,
+                        uv_rect_address.as_int(),
+                    );
+
+                    let segments = if prim_data.brush_segments.is_empty() {
+                        None
+                    } else {
+                        Some(&prim_data.brush_segments[..])
+                    };
+
+                    let prim_header_index = prim_headers.push(
+                        &prim_header,
+                        z_id,
+                        batch_params.prim_user_data,
+                    );
+
+                    self.add_segmented_prim_to_batch(
+                        segments,
+                        common_data.opacity,
+                        &batch_params,
+                        blend_mode,
+                        batch_features,
+                        brush_flags,
+                        common_data.edge_aa_mask,
+                        prim_header_index,
+                        bounding_rect,
+                        transform_kind,
+                        z_id,
+                        prim_info.clip_task_index,
+                        ctx,
+                        render_tasks,
+                    );
+                } else {
+                    let visible_tiles = &ctx.scratch.gradient_tiles[*visible_tiles_range];
+
+                    let (clip_task_address, clip_mask) = ctx.get_prim_clip_task_and_texture(
+                        prim_info.clip_task_index,
+                        render_tasks,
+                    ).unwrap();
+
+                    let batch_key = BatchKey {
+                        blend_mode,
+                        kind: BatchKind::Brush(batch_kind),
+                        textures: BatchTextures {
+                            input: textures,
+                            clip_mask,
+                        },
+                    };
+
+                    for tile in visible_tiles {
+                        let tile_prim_header = PrimitiveHeader {
+                            local_rect: tile.local_rect,
+                            local_clip_rect: tile.local_clip_rect,
+                            ..prim_header
+                        };
+                        let prim_header_index = prim_headers.push(&tile_prim_header, z_id, prim_user_data);
+
+                        self.add_brush_instance_to_batches(
+                            batch_key,
+                            batch_features,
+                            bounding_rect,
+                            z_id,
+                            INVALID_SEGMENT_INDEX,
+                            prim_data.edge_aa_mask,
+                            clip_task_address,
+                            brush_flags | BrushFlags::PERSPECTIVE_INTERPOLATION,
+                            prim_header_index,
+                            uv_rect_address.as_int(),
+                        );
+                    }
+                }
+            }
+            PrimitiveInstanceKind::RadialGradient { data_handle, ref visible_tiles_range, .. } => {
+                let prim_data = &ctx.data_stores.radial_grad[data_handle];
+                let common_data = &prim_data.common;
+
+                let src_color = render_tasks.resolve_location(prim_data.src_color, gpu_cache);
+
+                let (uv_rect_address, texture_source) = match src_color {
+                    Some(src) => src,
+                    None => {
+                        return;
+                    }
+                };
+
+                let textures = TextureSet::prim_textured(texture_source);
+
+                let prim_header = PrimitiveHeader {
+                    local_rect: prim_rect,
+                    local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                    specific_prim_address: gpu_cache.get_address(&common_data.gpu_cache_handle),
+                    transform_id,
+                };
+
+                let prim_user_data = ImageBrushData {
+                    color_mode: ShaderColorMode::Image,
+                    alpha_type: AlphaType::PremultipliedAlpha,
+                    raster_space: RasterizationSpace::Local,
+                    opacity: 1.0,
+                }.encode();
+
+
+                let blend_mode = if !common_data.opacity.is_opaque ||
+                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
+                    transform_kind == TransformedRectKind::Complex ||
+                    is_anti_aliased
+                {
+                    BlendMode::PremultipliedAlpha
+                } else {
+                    BlendMode::None
+                };
+
+                let batch_kind = BrushBatchKind::Image(texture_source.image_buffer_kind());
+
+                if visible_tiles_range.is_empty() {
+                    let batch_params = BrushBatchParameters::shared(
+                        batch_kind,
+                        textures,
+                        prim_user_data,
+                        uv_rect_address.as_int(),
+                    );
+
+                    let segments = if prim_data.brush_segments.is_empty() {
+                        None
+                    } else {
+                        Some(&prim_data.brush_segments[..])
+                    };
+
+                    let prim_header_index = prim_headers.push(
+                        &prim_header,
+                        z_id,
+                        batch_params.prim_user_data,
+                    );
+
+                    self.add_segmented_prim_to_batch(
+                        segments,
+                        common_data.opacity,
+                        &batch_params,
+                        blend_mode,
+                        batch_features,
+                        brush_flags,
+                        prim_data.edge_aa_mask,
+                        prim_header_index,
+                        bounding_rect,
+                        transform_kind,
+                        z_id,
+                        prim_info.clip_task_index,
+                        ctx,
+                        render_tasks,
+                    );
+                } else {
+                    let visible_tiles = &ctx.scratch.gradient_tiles[*visible_tiles_range];
+
+                    let (clip_task_address, clip_mask) = ctx.get_prim_clip_task_and_texture(
+                        prim_info.clip_task_index,
+                        render_tasks,
+                    ).unwrap();
+
+                    let batch_key = BatchKey {
+                        blend_mode,
+                        kind: BatchKind::Brush(batch_kind),
+                        textures: BatchTextures {
+                            input: textures,
+                            clip_mask,
+                        },
+                    };
+
+                    for tile in visible_tiles {
+                        let tile_prim_header = PrimitiveHeader {
+                            local_rect: tile.local_rect,
+                            local_clip_rect: tile.local_clip_rect,
+                            ..prim_header
+                        };
+                        let prim_header_index = prim_headers.push(&tile_prim_header, z_id, prim_user_data);
+
+                        self.add_brush_instance_to_batches(
+                            batch_key,
+                            batch_features,
+                            bounding_rect,
+                            z_id,
+                            INVALID_SEGMENT_INDEX,
+                            prim_data.edge_aa_mask,
+                            clip_task_address,
+                            brush_flags | BrushFlags::PERSPECTIVE_INTERPOLATION,
+                            prim_header_index,
+                            uv_rect_address.as_int(),
+                        );
+                    }
+                }
+
+            }
+            PrimitiveInstanceKind::ConicGradient { data_handle, ref visible_tiles_range, .. } => {
+                let prim_data = &ctx.data_stores.conic_grad[data_handle];
+                let common_data = &prim_data.common;
+
+                let src_color = render_tasks.resolve_location(prim_data.src_color, gpu_cache);
+
+                let (uv_rect_address, texture_source) = match src_color {
+                    Some(src) => src,
+                    None => {
+                        return;
+                    }
+                };
+
+                let textures = TextureSet::prim_textured(texture_source);
+
+                let prim_header = PrimitiveHeader {
+                    local_rect: prim_rect,
+                    local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                    specific_prim_address: gpu_cache.get_address(&common_data.gpu_cache_handle),
+                    transform_id,
+                };
+
+                let prim_user_data = ImageBrushData {
+                    color_mode: ShaderColorMode::Image,
+                    alpha_type: AlphaType::PremultipliedAlpha,
+                    raster_space: RasterizationSpace::Local,
+                    opacity: 1.0,
+                }.encode();
+
+
+                let blend_mode = if !common_data.opacity.is_opaque ||
+                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
+                    transform_kind == TransformedRectKind::Complex ||
+                    is_anti_aliased
+                {
+                    BlendMode::PremultipliedAlpha
+                } else {
+                    BlendMode::None
+                };
+
+                let batch_kind = BrushBatchKind::Image(texture_source.image_buffer_kind());
+
+                if visible_tiles_range.is_empty() {
+                    let batch_params = BrushBatchParameters::shared(
+                        batch_kind,
+                        textures,
+                        prim_user_data,
+                        uv_rect_address.as_int(),
+                    );
+
+                    let segments = if prim_data.brush_segments.is_empty() {
+                        None
+                    } else {
+                        Some(&prim_data.brush_segments[..])
+                    };
+
+                    let prim_header_index = prim_headers.push(
+                        &prim_header,
+                        z_id,
+                        batch_params.prim_user_data,
+                    );
+
+                    self.add_segmented_prim_to_batch(
+                        segments,
+                        common_data.opacity,
+                        &batch_params,
+                        blend_mode,
+                        batch_features,
+                        brush_flags,
+                        prim_data.edge_aa_mask,
+                        prim_header_index,
+                        bounding_rect,
+                        transform_kind,
+                        z_id,
+                        prim_info.clip_task_index,
+                        ctx,
+                        render_tasks,
+                    );
+                } else {
+                    let visible_tiles = &ctx.scratch.gradient_tiles[*visible_tiles_range];
+
+                    let (clip_task_address, clip_mask) = ctx.get_prim_clip_task_and_texture(
+                        prim_info.clip_task_index,
+                        render_tasks,
+                    ).unwrap();
+
+                    let batch_key = BatchKey {
+                        blend_mode,
+                        kind: BatchKind::Brush(batch_kind),
+                        textures: BatchTextures {
+                            input: textures,
+                            clip_mask,
+                        },
+                    };
+
+                    for tile in visible_tiles {
+                        let tile_prim_header = PrimitiveHeader {
+                            local_rect: tile.local_rect,
+                            local_clip_rect: tile.local_clip_rect,
+                            ..prim_header
+                        };
+                        let prim_header_index = prim_headers.push(&tile_prim_header, z_id, prim_user_data);
+
+                        self.add_brush_instance_to_batches(
+                            batch_key,
+                            batch_features,
+                            bounding_rect,
+                            z_id,
+                            INVALID_SEGMENT_INDEX,
+                            prim_data.edge_aa_mask,
+                            clip_task_address,
+                            brush_flags | BrushFlags::PERSPECTIVE_INTERPOLATION,
+                            prim_header_index,
+                            uv_rect_address.as_int(),
+                        );
+                    }
+                }
+            }
+            PrimitiveInstanceKind::BackdropCapture { .. } => {}
+            PrimitiveInstanceKind::BackdropRender { pic_index, .. } => {
+                let prim_cache_address = gpu_cache.get_address(&ctx.globals.default_image_handle);
+                let blend_mode = BlendMode::PremultipliedAlpha;
+                let pic_task_id = ctx.prim_store.pictures[pic_index.0].primary_render_task_id;
+
+                let prim_header = PrimitiveHeader {
+                    local_rect: prim_rect,
+                    local_clip_rect: prim_info.clip_chain.local_clip_rect,
+                    specific_prim_address: prim_cache_address,
+                    transform_id,
+                };
+
+                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
+                    prim_info.clip_task_index,
+                    render_tasks,
+                ).unwrap();
+
+                let kind = BatchKind::Brush(
+                    BrushBatchKind::Image(ImageBufferKind::Texture2D)
+                );
+                let (_, texture) = render_tasks.resolve_location(
+                    pic_task_id,
+                    gpu_cache,
+                ).unwrap();
+                let textures = BatchTextures::prim_textured(
+                    texture,
+                    clip_mask_texture_id,
+                );
+                let key = BatchKey::new(
+                    kind,
+                    blend_mode,
+                    textures,
+                );
+                let prim_header_index = prim_headers.push(
+                    &prim_header,
+                    z_id,
+                    ImageBrushData {
+                        color_mode: ShaderColorMode::Image,
+                        alpha_type: AlphaType::PremultipliedAlpha,
+                        raster_space: RasterizationSpace::Screen,
+                        opacity: 1.0,
+                    }.encode(),
+                );
+
+                let pic_task = &render_tasks[pic_task_id.unwrap()];
+                let pic_info = match pic_task.kind {
+                    RenderTaskKind::Picture(ref info) => info,
+                    _ => panic!("bug: not a picture"),
+                };
+                let target_rect = pic_task.get_target_rect();
+
+                let backdrop_rect = DeviceRect::from_origin_and_size(
+                    pic_info.content_origin,
+                    target_rect.size().to_f32(),
+                );
+
+                let map_prim_to_backdrop = SpaceMapper::new_with_target(
+                    pic_info.surface_spatial_node_index,
+                    prim_spatial_node_index,
+                    WorldRect::max_rect(),
+                    ctx.spatial_tree,
+                );
+
+                let points = [
+                    map_prim_to_backdrop.map_point(prim_rect.top_left()),
+                    map_prim_to_backdrop.map_point(prim_rect.top_right()),
+                    map_prim_to_backdrop.map_point(prim_rect.bottom_left()),
+                    map_prim_to_backdrop.map_point(prim_rect.bottom_right()),
+                ];
+
+                if points.iter().any(|p| p.is_none()) {
+                    return;
+                }
+
+                let uvs = [
+                    calculate_screen_uv(points[0].unwrap() * pic_info.device_pixel_scale, backdrop_rect),
+                    calculate_screen_uv(points[1].unwrap() * pic_info.device_pixel_scale, backdrop_rect),
+                    calculate_screen_uv(points[2].unwrap() * pic_info.device_pixel_scale, backdrop_rect),
+                    calculate_screen_uv(points[3].unwrap() * pic_info.device_pixel_scale, backdrop_rect),
+                ];
+
+                // TODO (gw): This is a hack that provides the GPU cache blocks for an
+                //            ImageSource. We should update the GPU cache interfaces to
+                //            allow pushing per-frame blocks via a request interface.
+                let gpu_blocks = &[
+                    GpuBlockData::from([
+                        target_rect.min.x as f32,
+                        target_rect.min.y as f32,
+                        target_rect.max.x as f32,
+                        target_rect.max.y as f32,
+                    ]),
+                    GpuBlockData::from([0.0; 4]),
+                    GpuBlockData::from(uvs[0]),
+                    GpuBlockData::from(uvs[1]),
+                    GpuBlockData::from(uvs[2]),
+                    GpuBlockData::from(uvs[3]),
+                ];
+                let uv_rect_handle = gpu_cache.push_per_frame_blocks(gpu_blocks);
+
+                self.add_brush_instance_to_batches(
+                    key,
+                    batch_features,
+                    bounding_rect,
+                    z_id,
+                    INVALID_SEGMENT_INDEX,
+                    EdgeAaSegmentMask::all(),
+                    clip_task_address,
+                    brush_flags,
+                    prim_header_index,
+                    uv_rect_handle.as_int(gpu_cache),
+                );
+            }
+        }
+    }
+
+    /// Add a single segment instance to a batch.
+    ///
+    /// `edge_aa_mask` Specifies the edges that are *allowed* to have anti-aliasing, if and only
+    /// if the segments enable it.
+    /// In other words passing EdgeAaSegmentFlags::all() does not necessarily mean all edges will
+    /// be anti-aliased, only that they could be.
+    fn add_segment_to_batch(
+        &mut self,
+        segment: &BrushSegment,
+        segment_data: &SegmentInstanceData,
+        segment_index: i32,
+        batch_kind: BrushBatchKind,
+        prim_header_index: PrimitiveHeaderIndex,
+        alpha_blend_mode: BlendMode,
+        features: BatchFeatures,
+        brush_flags: BrushFlags,
+        edge_aa_mask: EdgeAaSegmentMask,
+        bounding_rect: &PictureRect,
+        transform_kind: TransformedRectKind,
+        z_id: ZBufferId,
+        prim_opacity: PrimitiveOpacity,
+        clip_task_index: ClipTaskIndex,
+        ctx: &RenderTargetContext,
+        render_tasks: &RenderTaskGraph,
+    ) {
+        debug_assert!(clip_task_index != ClipTaskIndex::INVALID);
+
+        // Get GPU address of clip task for this segment, or None if
+        // the entire segment is clipped out.
+        if let Some((clip_task_address, clip_mask)) = ctx.get_clip_task_and_texture(
+            clip_task_index,
+            segment_index,
+            render_tasks,
+        ) {
+            // If a got a valid (or OPAQUE) clip task address, add the segment.
+            let is_inner = segment.edge_flags.is_empty();
+            let needs_blending = !prim_opacity.is_opaque ||
+                                 clip_task_address != OPAQUE_TASK_ADDRESS ||
+                                 (!is_inner && transform_kind == TransformedRectKind::Complex) ||
+                                 brush_flags.contains(BrushFlags::FORCE_AA);
+
+            let textures = BatchTextures {
+                input: segment_data.textures,
+                clip_mask,
+            };
+
+            let batch_key = BatchKey {
+                blend_mode: if needs_blending { alpha_blend_mode } else { BlendMode::None },
+                kind: BatchKind::Brush(batch_kind),
+                textures,
+            };
+
+            self.add_brush_instance_to_batches(
+                batch_key,
+                features,
+                bounding_rect,
+                z_id,
+                segment_index,
+                segment.edge_flags & edge_aa_mask,
+                clip_task_address,
+                brush_flags | BrushFlags::PERSPECTIVE_INTERPOLATION | segment.brush_flags,
+                prim_header_index,
+                segment_data.specific_resource_address,
+            );
+        }
+    }
+
+    /// Add any segment(s) from a brush to batches.
+    ///
+    /// `edge_aa_mask` Specifies the edges that are *allowed* to have anti-aliasing, if and only
+    /// if the segments enable it.
+    /// In other words passing EdgeAaSegmentFlags::all() does not necessarily mean all edges will
+    /// be anti-aliased, only that they could be.
+    fn add_segmented_prim_to_batch(
+        &mut self,
+        brush_segments: Option<&[BrushSegment]>,
+        prim_opacity: PrimitiveOpacity,
+        params: &BrushBatchParameters,
+        blend_mode: BlendMode,
+        features: BatchFeatures,
+        brush_flags: BrushFlags,
+        edge_aa_mask: EdgeAaSegmentMask,
+        prim_header_index: PrimitiveHeaderIndex,
+        bounding_rect: &PictureRect,
+        transform_kind: TransformedRectKind,
+        z_id: ZBufferId,
+        clip_task_index: ClipTaskIndex,
+        ctx: &RenderTargetContext,
+        render_tasks: &RenderTaskGraph,
+    ) {
+        match (brush_segments, &params.segment_data) {
+            (Some(ref brush_segments), SegmentDataKind::Instanced(ref segment_data)) => {
+                // In this case, we have both a list of segments, and a list of
+                // per-segment instance data. Zip them together to build batches.
+                debug_assert_eq!(brush_segments.len(), segment_data.len());
+                for (segment_index, (segment, segment_data)) in brush_segments
+                    .iter()
+                    .zip(segment_data.iter())
+                    .enumerate()
+                {
+                    self.add_segment_to_batch(
+                        segment,
+                        segment_data,
+                        segment_index as i32,
+                        params.batch_kind,
+                        prim_header_index,
+                        blend_mode,
+                        features,
+                        brush_flags,
+                        edge_aa_mask,
+                        bounding_rect,
+                        transform_kind,
+                        z_id,
+                        prim_opacity,
+                        clip_task_index,
+                        ctx,
+                        render_tasks,
+                    );
+                }
+            }
+            (Some(ref brush_segments), SegmentDataKind::Shared(ref segment_data)) => {
+                // A list of segments, but the per-segment data is common
+                // between all segments.
+                for (segment_index, segment) in brush_segments
+                    .iter()
+                    .enumerate()
+                {
+                    self.add_segment_to_batch(
+                        segment,
+                        segment_data,
+                        segment_index as i32,
+                        params.batch_kind,
+                        prim_header_index,
+                        blend_mode,
+                        features,
+                        brush_flags,
+                        edge_aa_mask,
+                        bounding_rect,
+                        transform_kind,
+                        z_id,
+                        prim_opacity,
+                        clip_task_index,
+                        ctx,
+                        render_tasks,
+                    );
+                }
+            }
+            (None, SegmentDataKind::Shared(ref segment_data)) => {
+                // No segments, and thus no per-segment instance data.
+                // Note: the blend mode already takes opacity into account
+
+                let (clip_task_address, clip_mask) = ctx.get_prim_clip_task_and_texture(
+                    clip_task_index,
+                    render_tasks,
+                ).unwrap();
+
+                let textures = BatchTextures {
+                    input: segment_data.textures,
+                    clip_mask,
+                };
+
+                let batch_key = BatchKey {
+                    blend_mode,
+                    kind: BatchKind::Brush(params.batch_kind),
+                    textures,
+                };
+
+                self.add_brush_instance_to_batches(
+                    batch_key,
+                    features,
+                    bounding_rect,
+                    z_id,
+                    INVALID_SEGMENT_INDEX,
+                    edge_aa_mask,
+                    clip_task_address,
+                    brush_flags | BrushFlags::PERSPECTIVE_INTERPOLATION,
+                    prim_header_index,
+                    segment_data.specific_resource_address,
+                );
+            }
+            (None, SegmentDataKind::Instanced(..)) => {
+                // We should never hit the case where there are no segments,
+                // but a list of segment instance data.
+                unreachable!();
+            }
+        }
+    }
+}
+
+/// Either a single texture / user data for all segments,
+/// or a list of one per segment.
+enum SegmentDataKind {
+    Shared(SegmentInstanceData),
+    Instanced(SmallVec<[SegmentInstanceData; 8]>),
+}
+
+/// The parameters that are specific to a kind of brush,
+/// used by the common method to add a brush to batches.
+struct BrushBatchParameters {
+    batch_kind: BrushBatchKind,
+    prim_user_data: [i32; 4],
+    segment_data: SegmentDataKind,
+}
+
+impl BrushBatchParameters {
+    /// This brush instance has a list of per-segment
+    /// instance data.
+    fn instanced(
+        batch_kind: BrushBatchKind,
+        prim_user_data: [i32; 4],
+        segment_data: SmallVec<[SegmentInstanceData; 8]>,
+    ) -> Self {
+        BrushBatchParameters {
+            batch_kind,
+            prim_user_data,
+            segment_data: SegmentDataKind::Instanced(segment_data),
+        }
+    }
+
+    /// This brush instance shares the per-segment data
+    /// across all segments.
+    fn shared(
+        batch_kind: BrushBatchKind,
+        textures: TextureSet,
+        prim_user_data: [i32; 4],
+        specific_resource_address: i32,
+    ) -> Self {
+        BrushBatchParameters {
+            batch_kind,
+            prim_user_data,
+            segment_data: SegmentDataKind::Shared(
+                SegmentInstanceData {
+                    textures,
+                    specific_resource_address,
+                }
+            ),
+        }
+    }
+}
+
+/// A list of clip instances to be drawn into a target.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipBatchList {
+    /// Rectangle draws fill up the rectangles with rounded corners.
+    pub slow_rectangles: Vec<ClipMaskInstanceRect>,
+    pub fast_rectangles: Vec<ClipMaskInstanceRect>,
+    /// Image draws apply the image masking.
+    pub images: FastHashMap<(TextureSource, Option<DeviceIntRect>), Vec<ClipMaskInstanceImage>>,
+    pub box_shadows: FastHashMap<TextureSource, Vec<ClipMaskInstanceBoxShadow>>,
+}
+
+impl ClipBatchList {
+    fn new() -> Self {
+        ClipBatchList {
+            slow_rectangles: Vec::new(),
+            fast_rectangles: Vec::new(),
+            images: FastHashMap::default(),
+            box_shadows: FastHashMap::default(),
+        }
+    }
+}
+
+/// Batcher managing draw calls into the clip mask (in the RT cache).
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipBatcher {
+    /// The first clip in each clip task. This will overwrite all pixels
+    /// in the clip region, so we can skip doing a clear and write with
+    /// blending disabled, which is a big performance win on Intel GPUs.
+    pub primary_clips: ClipBatchList,
+    /// Any subsequent clip masks (rare) for a clip task get drawn in
+    /// a second pass with multiplicative blending enabled.
+    pub secondary_clips: ClipBatchList,
+
+    gpu_supports_fast_clears: bool,
+}
+
+impl ClipBatcher {
+    pub fn new(
+        gpu_supports_fast_clears: bool,
+    ) -> Self {
+        ClipBatcher {
+            primary_clips: ClipBatchList::new(),
+            secondary_clips: ClipBatchList::new(),
+            gpu_supports_fast_clears,
+        }
+    }
+
+    pub fn add_clip_region(
+        &mut self,
+        local_pos: LayoutPoint,
+        sub_rect: DeviceRect,
+        clip_data: ClipData,
+        task_origin: DevicePoint,
+        screen_origin: DevicePoint,
+        device_pixel_scale: f32,
+    ) {
+        let instance = ClipMaskInstanceRect {
+            common: ClipMaskInstanceCommon {
+                clip_transform_id: TransformPaletteId::IDENTITY,
+                prim_transform_id: TransformPaletteId::IDENTITY,
+                sub_rect,
+                task_origin,
+                screen_origin,
+                device_pixel_scale,
+            },
+            local_pos,
+            clip_data,
+        };
+
+        self.primary_clips.slow_rectangles.push(instance);
+    }
+
+    /// Where appropriate, draw a clip rectangle as a small series of tiles,
+    /// instead of one large rectangle.
+    fn add_tiled_clip_mask(
+        &mut self,
+        mask_screen_rect: DeviceRect,
+        local_clip_rect: LayoutRect,
+        clip_spatial_node_index: SpatialNodeIndex,
+        spatial_tree: &SpatialTree,
+        world_rect: &WorldRect,
+        global_device_pixel_scale: DevicePixelScale,
+        common: &ClipMaskInstanceCommon,
+        is_first_clip: bool,
+    ) -> bool {
+        // Only try to draw in tiles if the clip mark is big enough.
+        if mask_screen_rect.area() < CLIP_RECTANGLE_AREA_THRESHOLD {
+            return false;
+        }
+
+        let mask_screen_rect_size = mask_screen_rect.size().to_i32();
+        let clip_spatial_node = spatial_tree.get_spatial_node(clip_spatial_node_index);
+
+        // Only support clips that are axis-aligned to the root coordinate space,
+        // for now, to simplify the logic below. This handles the vast majority
+        // of real world cases, but could be expanded in future if needed.
+        if clip_spatial_node.coordinate_system_id != CoordinateSystemId::root() {
+            return false;
+        }
+
+        // Get the world rect of the clip rectangle. If we can't transform it due
+        // to the matrix, just fall back to drawing the entire clip mask.
+        let transform = spatial_tree.get_world_transform(
+            clip_spatial_node_index,
+        );
+        let world_clip_rect = match project_rect(
+            &transform.into_transform(),
+            &local_clip_rect,
+            &world_rect,
+        ) {
+            Some(rect) => rect,
+            None => return false,
+        };
+
+        // Work out how many tiles to draw this clip mask in, stretched across the
+        // device rect of the primitive clip mask.
+        let world_device_rect = world_clip_rect * global_device_pixel_scale;
+        let x_tiles = (mask_screen_rect_size.width + CLIP_RECTANGLE_TILE_SIZE-1) / CLIP_RECTANGLE_TILE_SIZE;
+        let y_tiles = (mask_screen_rect_size.height + CLIP_RECTANGLE_TILE_SIZE-1) / CLIP_RECTANGLE_TILE_SIZE;
+
+        // Because we only run this code path for axis-aligned rects (the root coord system check above),
+        // and only for rectangles (not rounded etc), the world_device_rect is not conservative - we know
+        // that there is no inner_rect, and the world_device_rect should be the real, axis-aligned clip rect.
+        let mask_origin = mask_screen_rect.min.to_vector();
+        let clip_list = self.get_batch_list(is_first_clip);
+
+        for y in 0 .. y_tiles {
+            for x in 0 .. x_tiles {
+                let p0 = DeviceIntPoint::new(
+                    x * CLIP_RECTANGLE_TILE_SIZE,
+                    y * CLIP_RECTANGLE_TILE_SIZE,
+                );
+                let p1 = DeviceIntPoint::new(
+                    (p0.x + CLIP_RECTANGLE_TILE_SIZE).min(mask_screen_rect_size.width),
+                    (p0.y + CLIP_RECTANGLE_TILE_SIZE).min(mask_screen_rect_size.height),
+                );
+                let normalized_sub_rect = DeviceIntRect {
+                    min: p0,
+                    max: p1,
+                }.to_f32();
+                let world_sub_rect = normalized_sub_rect.translate(mask_origin);
+
+                // If the clip rect completely contains this tile rect, then drawing
+                // these pixels would be redundant - since this clip can't possibly
+                // affect the pixels in this tile, skip them!
+                if !world_device_rect.contains_box(&world_sub_rect) {
+                    clip_list.slow_rectangles.push(ClipMaskInstanceRect {
+                        common: ClipMaskInstanceCommon {
+                            sub_rect: normalized_sub_rect,
+                            ..*common
+                        },
+                        local_pos: local_clip_rect.min,
+                        clip_data: ClipData::uniform(local_clip_rect.size(), 0.0, ClipMode::Clip),
+                    });
+                }
+            }
+        }
+
+        true
+    }
+
+    /// Retrieve the correct clip batch list to append to, depending
+    /// on whether this is the first clip mask for a clip task.
+    fn get_batch_list(
+        &mut self,
+        is_first_clip: bool,
+    ) -> &mut ClipBatchList {
+        if is_first_clip && !self.gpu_supports_fast_clears {
+            &mut self.primary_clips
+        } else {
+            &mut self.secondary_clips
+        }
+    }
+
+    pub fn add(
+        &mut self,
+        clip_node_range: ClipNodeRange,
+        root_spatial_node_index: SpatialNodeIndex,
+        render_tasks: &RenderTaskGraph,
+        gpu_cache: &GpuCache,
+        clip_store: &ClipStore,
+        transforms: &mut TransformPalette,
+        actual_rect: DeviceRect,
+        surface_device_pixel_scale: DevicePixelScale,
+        task_origin: DevicePoint,
+        screen_origin: DevicePoint,
+        ctx: &RenderTargetContext,
+    ) -> bool {
+        let mut is_first_clip = true;
+        let mut clear_to_one = false;
+
+        for i in 0 .. clip_node_range.count {
+            let clip_instance = clip_store.get_instance_from_range(&clip_node_range, i);
+            let clip_node = &ctx.data_stores.clip[clip_instance.handle];
+
+            let clip_transform_id = transforms.get_id(
+                clip_node.item.spatial_node_index,
+                ctx.root_spatial_node_index,
+                ctx.spatial_tree,
+            );
+
+            // For clip mask images, we need to map from the primitive's layout space to
+            // the target space, as the cs_clip_image shader needs to forward transform
+            // the local image bounds, rather than backwards transform the target bounds
+            // as in done in write_clip_tile_vertex.
+            let prim_transform_id = match clip_node.item.kind {
+                ClipItemKind::Image { .. } => {
+                    transforms.get_id(
+                        clip_node.item.spatial_node_index,
+                        root_spatial_node_index,
+                        ctx.spatial_tree,
+                    )
+                }
+                _ => {
+                    transforms.get_id(
+                        root_spatial_node_index,
+                        ctx.root_spatial_node_index,
+                        ctx.spatial_tree,
+                    )
+                }
+            };
+
+            let common = ClipMaskInstanceCommon {
+                sub_rect: DeviceRect::from_size(actual_rect.size()),
+                task_origin,
+                screen_origin,
+                device_pixel_scale: surface_device_pixel_scale.0,
+                clip_transform_id,
+                prim_transform_id,
+            };
+
+            let added_clip = match clip_node.item.kind {
+                ClipItemKind::Image { image, rect, .. } => {
+                    let request = ImageRequest {
+                        key: image,
+                        rendering: ImageRendering::Auto,
+                        tile: None,
+                    };
+
+                    let map_local_to_raster = SpaceMapper::new_with_target(
+                        root_spatial_node_index,
+                        clip_node.item.spatial_node_index,
+                        WorldRect::max_rect(),
+                        ctx.spatial_tree,
+                    );
+
+                    let mut add_image = |request: ImageRequest, tile_rect: LayoutRect, sub_rect: DeviceRect| {
+                        let cache_item = match ctx.resource_cache.get_cached_image(request) {
+                            Ok(item) => item,
+                            Err(..) => {
+                                warn!("Warnings: skip a image mask");
+                                debug!("request: {:?}", request);
+                                return;
+                            }
+                        };
+
+                        // If the clip transform is axis-aligned, we can skip any need for scissoring
+                        // by clipping the local clip rect with the backwards transformed target bounds.
+                        // If it is not axis-aligned, then we pass the local clip rect through unmodified
+                        // to the shader and also set up a scissor rect for the overall target bounds to
+                        // ensure nothing is drawn outside the target. If for some reason we can't map the
+                        // rect back to local space, we also fall back to just using a scissor rectangle.
+                        let raster_rect =
+                            sub_rect.translate(actual_rect.min.to_vector()) / surface_device_pixel_scale;
+                        let (clip_transform_id, local_rect, scissor) = match map_local_to_raster.unmap(&raster_rect) {
+                            Some(local_rect)
+                                if clip_transform_id.transform_kind() == TransformedRectKind::AxisAligned &&
+                                   !map_local_to_raster.get_transform().has_perspective_component() => {
+                                    match local_rect.intersection(&rect) {
+                                        Some(local_rect) => (clip_transform_id, local_rect, None),
+                                        None => return,
+                                    }
+                            }
+                            _ => {
+                                // If for some reason inverting the transform failed, then don't consider
+                                // the transform to be axis-aligned if it was.
+                                (clip_transform_id.override_transform_kind(TransformedRectKind::Complex),
+                                 rect,
+                                 Some(common.sub_rect
+                                    .translate(task_origin.to_vector())
+                                    .round_out()
+                                    .to_i32()))
+                            }
+                        };
+
+                        self.get_batch_list(is_first_clip)
+                            .images
+                            .entry((cache_item.texture_id, scissor))
+                            .or_insert_with(Vec::new)
+                            .push(ClipMaskInstanceImage {
+                                common: ClipMaskInstanceCommon {
+                                    sub_rect,
+                                    clip_transform_id,
+                                    ..common
+                                },
+                                resource_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
+                                tile_rect,
+                                local_rect,
+                            });
+                    };
+
+                    let clip_spatial_node = ctx.spatial_tree.get_spatial_node(clip_node.item.spatial_node_index);
+                    let clip_is_axis_aligned = clip_spatial_node.coordinate_system_id == CoordinateSystemId::root();
+
+                    if clip_instance.has_visible_tiles() {
+                        let sub_rect_bounds = actual_rect.size().into();
+
+                        for tile in clip_store.visible_mask_tiles(&clip_instance) {
+                            let tile_sub_rect = if clip_is_axis_aligned {
+                                let tile_raster_rect = map_local_to_raster
+                                    .map(&tile.tile_rect)
+                                    .expect("bug: should always map as axis-aligned");
+                                let tile_device_rect = tile_raster_rect * surface_device_pixel_scale;
+                                tile_device_rect
+                                    .translate(-actual_rect.min.to_vector())
+                                    .round_out()
+                                    .intersection(&sub_rect_bounds)
+                            } else {
+                                Some(common.sub_rect)
+                            };
+
+                            if let Some(tile_sub_rect) = tile_sub_rect {
+                                assert!(sub_rect_bounds.contains_box(&tile_sub_rect));
+                                add_image(
+                                    request.with_tile(tile.tile_offset),
+                                    tile.tile_rect,
+                                    tile_sub_rect,
+                                )
+                            }
+                        }
+                    } else {
+                        add_image(request, rect, common.sub_rect)
+                    }
+
+                    // If this is the first clip and either there is a transform or the image rect
+                    // doesn't cover the entire task, then request a clear so that pixels outside
+                    // the image boundaries will be properly initialized.
+                    if is_first_clip &&
+                        (!clip_is_axis_aligned ||
+                         !(map_local_to_raster.map(&rect).expect("bug: should always map as axis-aligned")
+                            * surface_device_pixel_scale).contains_box(&actual_rect)) {
+                        clear_to_one = true;
+                    }
+
+                    true
+                }
+                ClipItemKind::BoxShadow { ref source }  => {
+                    let task_id = source
+                        .render_task
+                        .expect("bug: render task handle not allocated");
+                    let (uv_rect_address, texture) = render_tasks.resolve_location(task_id, gpu_cache).unwrap();
+
+                    self.get_batch_list(is_first_clip)
+                        .box_shadows
+                        .entry(texture)
+                        .or_insert_with(Vec::new)
+                        .push(ClipMaskInstanceBoxShadow {
+                            common,
+                            resource_address: uv_rect_address,
+                            shadow_data: BoxShadowData {
+                                src_rect_size: source.original_alloc_size,
+                                clip_mode: source.clip_mode as i32,
+                                stretch_mode_x: source.stretch_mode_x as i32,
+                                stretch_mode_y: source.stretch_mode_y as i32,
+                                dest_rect: source.prim_shadow_rect,
+                            },
+                        });
+
+                    true
+                }
+                ClipItemKind::Rectangle { rect, mode: ClipMode::ClipOut } => {
+                    self.get_batch_list(is_first_clip)
+                        .slow_rectangles
+                        .push(ClipMaskInstanceRect {
+                            common,
+                            local_pos: rect.min,
+                            clip_data: ClipData::uniform(rect.size(), 0.0, ClipMode::ClipOut),
+                        });
+
+                    true
+                }
+                ClipItemKind::Rectangle { rect, mode: ClipMode::Clip } => {
+                    if clip_instance.flags.contains(ClipNodeFlags::SAME_COORD_SYSTEM) {
+                        false
+                    } else {
+                        if self.add_tiled_clip_mask(
+                            actual_rect,
+                            rect,
+                            clip_node.item.spatial_node_index,
+                            ctx.spatial_tree,
+                            &ctx.screen_world_rect,
+                            ctx.global_device_pixel_scale,
+                            &common,
+                            is_first_clip,
+                        ) {
+                            clear_to_one |= is_first_clip;
+                        } else {
+                            self.get_batch_list(is_first_clip)
+                                .slow_rectangles
+                                .push(ClipMaskInstanceRect {
+                                    common,
+                                    local_pos: rect.min,
+                                    clip_data: ClipData::uniform(rect.size(), 0.0, ClipMode::Clip),
+                                });
+                        }
+
+                        true
+                    }
+                }
+                ClipItemKind::RoundedRectangle { rect, ref radius, mode, .. } => {
+                    let batch_list = self.get_batch_list(is_first_clip);
+                    let instance = ClipMaskInstanceRect {
+                        common,
+                        local_pos: rect.min,
+                        clip_data: ClipData::rounded_rect(rect.size(), radius, mode),
+                    };
+                    if clip_instance.flags.contains(ClipNodeFlags::USE_FAST_PATH) {
+                        batch_list.fast_rectangles.push(instance);
+                    } else {
+                        batch_list.slow_rectangles.push(instance);
+                    }
+
+                    true
+                }
+            };
+
+            is_first_clip &= !added_clip;
+        }
+
+        clear_to_one
+    }
+}
+
+impl<'a, 'rc> RenderTargetContext<'a, 'rc> {
+    /// Retrieve the GPU task address for a given clip task instance.
+    /// Returns None if the segment was completely clipped out.
+    /// Returns Some(OPAQUE_TASK_ADDRESS) if no clip mask is needed.
+    /// Returns Some(task_address) if there was a valid clip mask.
+    fn get_clip_task_and_texture(
+        &self,
+        clip_task_index: ClipTaskIndex,
+        offset: i32,
+        render_tasks: &RenderTaskGraph,
+    ) -> Option<(RenderTaskAddress, TextureSource)> {
+        match self.scratch.clip_mask_instances[clip_task_index.0 as usize + offset as usize] {
+            ClipMaskKind::Mask(task_id) => {
+                Some((
+                    task_id.into(),
+                    TextureSource::TextureCache(
+                        render_tasks[task_id].get_target_texture(),
+                        Swizzle::default(),
+                    )
+                ))
+            }
+            ClipMaskKind::None => {
+                Some((OPAQUE_TASK_ADDRESS, TextureSource::Invalid))
+            }
+            ClipMaskKind::Clipped => {
+                None
+            }
+        }
+    }
+
+    /// Helper function to get the clip task address for a
+    /// non-segmented primitive.
+    fn get_prim_clip_task_and_texture(
+        &self,
+        clip_task_index: ClipTaskIndex,
+        render_tasks: &RenderTaskGraph,
+    ) -> Option<(RenderTaskAddress, TextureSource)> {
+        self.get_clip_task_and_texture(
+            clip_task_index,
+            0,
+            render_tasks,
+        )
+    }
+}
diff --git a/gfx/wr/webrender/src/border.rs b/gfx/wr/webrender/src/border.rs
new file mode 100644
index 0000000000..bbd8a26ef7
--- /dev/null
+++ b/gfx/wr/webrender/src/border.rs
@@ -0,0 +1,1484 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{BorderRadius, BorderSide, BorderStyle, ColorF, ColorU};
+use api::{NormalBorder as ApiNormalBorder, RepeatMode};
+use api::units::*;
+use crate::clip::ClipNodeId;
+use crate::ellipse::Ellipse;
+use euclid::vec2;
+use crate::scene_building::SceneBuilder;
+use crate::spatial_tree::SpatialNodeIndex;
+use crate::gpu_types::{BorderInstance, BorderSegment, BrushFlags};
+use crate::prim_store::{BorderSegmentInfo, BrushSegment, NinePatchDescriptor};
+use crate::prim_store::borders::{NormalBorderPrim, NormalBorderData};
+use crate::util::{lerp, RectHelpers};
+use crate::internal_types::LayoutPrimitiveInfo;
+use crate::segment::EdgeAaSegmentMask;
+
+// Using 2048 as the maximum radius in device space before which we
+// start stretching is up for debate.
+// the value must be chosen so that the corners will not use an
+// unreasonable amount of memory but should allow crisp corners in the
+// common cases.
+
+/// Maximum resolution in device pixels at which borders are rasterized.
+pub const MAX_BORDER_RESOLUTION: u32 = 2048;
+/// Maximum number of dots or dashes per segment to avoid freezing and filling up
+/// memory with unreasonable inputs. It would be better to address this by not building
+/// a list of per-dot information in the first place.
+pub const MAX_DASH_COUNT: u32 = 2048;
+
+// TODO(gw): Perhaps there is a better way to store
+//           the border cache key than duplicating
+//           all the border structs with hashable
+//           variants...
+
+#[derive(Copy, Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BorderRadiusAu {
+    pub top_left: LayoutSizeAu,
+    pub top_right: LayoutSizeAu,
+    pub bottom_left: LayoutSizeAu,
+    pub bottom_right: LayoutSizeAu,
+}
+
+impl From<BorderRadius> for BorderRadiusAu {
+    fn from(radius: BorderRadius) -> BorderRadiusAu {
+        BorderRadiusAu {
+            top_left: radius.top_left.to_au(),
+            top_right: radius.top_right.to_au(),
+            bottom_right: radius.bottom_right.to_au(),
+            bottom_left: radius.bottom_left.to_au(),
+        }
+    }
+}
+
+impl From<BorderRadiusAu> for BorderRadius {
+    fn from(radius: BorderRadiusAu) -> Self {
+        BorderRadius {
+            top_left: LayoutSize::from_au(radius.top_left),
+            top_right: LayoutSize::from_au(radius.top_right),
+            bottom_right: LayoutSize::from_au(radius.bottom_right),
+            bottom_left: LayoutSize::from_au(radius.bottom_left),
+        }
+    }
+}
+
+#[derive(Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BorderSideAu {
+    pub color: ColorU,
+    pub style: BorderStyle,
+}
+
+impl From<BorderSide> for BorderSideAu {
+    fn from(side: BorderSide) -> Self {
+        BorderSideAu {
+            color: side.color.into(),
+            style: side.style,
+        }
+    }
+}
+
+impl From<BorderSideAu> for BorderSide {
+    fn from(side: BorderSideAu) -> Self {
+        BorderSide {
+            color: side.color.into(),
+            style: side.style,
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Hash, Eq, MallocSizeOf, PartialEq)]
+pub struct NormalBorderAu {
+    pub left: BorderSideAu,
+    pub right: BorderSideAu,
+    pub top: BorderSideAu,
+    pub bottom: BorderSideAu,
+    pub radius: BorderRadiusAu,
+    /// Whether to apply anti-aliasing on the border corners.
+    ///
+    /// Note that for this to be `false` and work, this requires the borders to
+    /// be solid, and no border-radius.
+    pub do_aa: bool,
+}
+
+impl NormalBorderAu {
+    // Construct a border based upon self with color
+    pub fn with_color(&self, color: ColorU) -> Self {
+        let mut b = self.clone();
+        b.left.color = color;
+        b.right.color = color;
+        b.top.color = color;
+        b.bottom.color = color;
+        b
+    }
+}
+
+impl From<ApiNormalBorder> for NormalBorderAu {
+    fn from(border: ApiNormalBorder) -> Self {
+        NormalBorderAu {
+            left: border.left.into(),
+            right: border.right.into(),
+            top: border.top.into(),
+            bottom: border.bottom.into(),
+            radius: border.radius.into(),
+            do_aa: border.do_aa,
+        }
+    }
+}
+
+impl From<NormalBorderAu> for ApiNormalBorder {
+    fn from(border: NormalBorderAu) -> Self {
+        ApiNormalBorder {
+            left: border.left.into(),
+            right: border.right.into(),
+            top: border.top.into(),
+            bottom: border.bottom.into(),
+            radius: border.radius.into(),
+            do_aa: border.do_aa,
+        }
+    }
+}
+
+/// Cache key that uniquely identifies a border
+/// segment in the render task cache.
+#[derive(Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BorderSegmentCacheKey {
+    pub size: LayoutSizeAu,
+    pub radius: LayoutSizeAu,
+    pub side0: BorderSideAu,
+    pub side1: BorderSideAu,
+    pub segment: BorderSegment,
+    pub do_aa: bool,
+    pub h_adjacent_corner_outer: LayoutPointAu,
+    pub h_adjacent_corner_radius: LayoutSizeAu,
+    pub v_adjacent_corner_outer: LayoutPointAu,
+    pub v_adjacent_corner_radius: LayoutSizeAu,
+}
+
+pub fn ensure_no_corner_overlap(
+    radius: &mut BorderRadius,
+    size: LayoutSize,
+) {
+    let mut ratio = 1.0;
+    let top_left_radius = &mut radius.top_left;
+    let top_right_radius = &mut radius.top_right;
+    let bottom_right_radius = &mut radius.bottom_right;
+    let bottom_left_radius = &mut radius.bottom_left;
+
+    let sum = top_left_radius.width + top_right_radius.width;
+    if size.width < sum {
+        ratio = f32::min(ratio, size.width / sum);
+    }
+
+    let sum = bottom_left_radius.width + bottom_right_radius.width;
+    if size.width < sum {
+        ratio = f32::min(ratio, size.width / sum);
+    }
+
+    let sum = top_left_radius.height + bottom_left_radius.height;
+    if size.height < sum {
+        ratio = f32::min(ratio, size.height / sum);
+    }
+
+    let sum = top_right_radius.height + bottom_right_radius.height;
+    if size.height < sum {
+        ratio = f32::min(ratio, size.height / sum);
+    }
+
+    if ratio < 1. {
+        top_left_radius.width *= ratio;
+        top_left_radius.height *= ratio;
+
+        top_right_radius.width *= ratio;
+        top_right_radius.height *= ratio;
+
+        bottom_left_radius.width *= ratio;
+        bottom_left_radius.height *= ratio;
+
+        bottom_right_radius.width *= ratio;
+        bottom_right_radius.height *= ratio;
+    }
+}
+
+impl<'a> SceneBuilder<'a> {
+    pub fn add_normal_border(
+        &mut self,
+        info: &LayoutPrimitiveInfo,
+        border: &ApiNormalBorder,
+        widths: LayoutSideOffsets,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+    ) {
+        let mut border = *border;
+        ensure_no_corner_overlap(&mut border.radius, info.rect.size());
+
+        self.add_primitive(
+            spatial_node_index,
+            clip_node_id,
+            info,
+            Vec::new(),
+            NormalBorderPrim {
+                border: border.into(),
+                widths: widths.to_au(),
+            },
+        );
+    }
+}
+
+pub trait BorderSideHelpers {
+    fn border_color(&self, is_inner_border: bool) -> ColorF;
+}
+
+impl BorderSideHelpers for BorderSide {
+    fn border_color(&self, is_inner_border: bool) -> ColorF {
+        let lighter = match self.style {
+            BorderStyle::Inset => is_inner_border,
+            BorderStyle::Outset => !is_inner_border,
+            _ => return self.color,
+        };
+
+        // The modulate colors below are not part of the specification. They are
+        // derived from the Gecko source code and experimentation, and used to
+        // modulate the colors in order to generate colors for the inset/outset
+        // and groove/ridge border styles.
+        //
+        // NOTE(emilio): Gecko at least takes the background color into
+        // account, should we do the same? Looks a bit annoying for this.
+        //
+        // NOTE(emilio): If you change this algorithm, do the same change on
+        // get_colors_for_side in cs_border_segment.glsl.
+        if self.color.r != 0.0 || self.color.g != 0.0 || self.color.b != 0.0 {
+            let scale = if lighter { 1.0 } else { 2.0 / 3.0 };
+            return self.color.scale_rgb(scale)
+        }
+
+        let black = if lighter { 0.7 } else { 0.3 };
+        ColorF::new(black, black, black, self.color.a)
+    }
+}
+
+/// The kind of border corner clip.
+#[repr(C)]
+#[derive(Copy, Debug, Clone, PartialEq)]
+pub enum BorderClipKind {
+    DashCorner = 1,
+    DashEdge = 2,
+    Dot = 3,
+}
+
+fn compute_outer_and_clip_sign(
+    corner_segment: BorderSegment,
+    radius: DeviceSize,
+) -> (DevicePoint, DeviceVector2D) {
+    let outer_scale = match corner_segment {
+        BorderSegment::TopLeft => DeviceVector2D::new(0.0, 0.0),
+        BorderSegment::TopRight => DeviceVector2D::new(1.0, 0.0),
+        BorderSegment::BottomRight => DeviceVector2D::new(1.0, 1.0),
+        BorderSegment::BottomLeft => DeviceVector2D::new(0.0, 1.0),
+        _ => panic!("bug: expected a corner segment"),
+    };
+    let outer = DevicePoint::new(
+        outer_scale.x * radius.width,
+        outer_scale.y * radius.height,
+    );
+
+    let clip_sign = DeviceVector2D::new(
+        1.0 - 2.0 * outer_scale.x,
+        1.0 - 2.0 * outer_scale.y,
+    );
+
+    (outer, clip_sign)
+}
+
+fn write_dashed_corner_instances(
+    corner_radius: DeviceSize,
+    widths: DeviceSize,
+    segment: BorderSegment,
+    base_instance: &BorderInstance,
+    instances: &mut Vec<BorderInstance>,
+) -> Result<(), ()> {
+    let ellipse = Ellipse::new(corner_radius);
+
+    let average_border_width = 0.5 * (widths.width + widths.height);
+
+    let (_half_dash, num_half_dashes) =
+        compute_half_dash(average_border_width, ellipse.total_arc_length);
+
+    if num_half_dashes == 0 {
+        return Err(());
+    }
+
+    let num_half_dashes = num_half_dashes.min(MAX_DASH_COUNT);
+
+    let (outer, clip_sign) = compute_outer_and_clip_sign(segment, corner_radius);
+
+    let instance_count = num_half_dashes / 4 + 1;
+    instances.reserve(instance_count as usize);
+
+    let half_dash_arc_length =
+        ellipse.total_arc_length / num_half_dashes as f32;
+    let dash_length = 2. * half_dash_arc_length;
+
+    let mut current_length = 0.;
+    for i in 0..instance_count {
+        let arc_length0 = current_length;
+        current_length += if i == 0 {
+            half_dash_arc_length
+        } else {
+            dash_length
+        };
+
+        let arc_length1 = current_length;
+        current_length += dash_length;
+
+        let alpha = ellipse.find_angle_for_arc_length(arc_length0);
+        let beta = ellipse.find_angle_for_arc_length(arc_length1);
+
+        let (point0, tangent0) = ellipse.get_point_and_tangent(alpha);
+        let (point1, tangent1) = ellipse.get_point_and_tangent(beta);
+
+        let point0 = DevicePoint::new(
+            outer.x + clip_sign.x * (corner_radius.width - point0.x),
+            outer.y + clip_sign.y * (corner_radius.height - point0.y),
+        );
+
+        let tangent0 = DeviceVector2D::new(
+            -tangent0.x * clip_sign.x,
+            -tangent0.y * clip_sign.y,
+        );
+
+        let point1 = DevicePoint::new(
+            outer.x + clip_sign.x * (corner_radius.width - point1.x),
+            outer.y + clip_sign.y * (corner_radius.height - point1.y),
+        );
+
+        let tangent1 = DeviceVector2D::new(
+            -tangent1.x * clip_sign.x,
+            -tangent1.y * clip_sign.y,
+        );
+
+        instances.push(BorderInstance {
+            flags: base_instance.flags | ((BorderClipKind::DashCorner as i32) << 24),
+            clip_params: [
+                point0.x,
+                point0.y,
+                tangent0.x,
+                tangent0.y,
+                point1.x,
+                point1.y,
+                tangent1.x,
+                tangent1.y,
+            ],
+            .. *base_instance
+        });
+    }
+
+    Ok(())
+}
+
+fn write_dotted_corner_instances(
+    corner_radius: DeviceSize,
+    widths: DeviceSize,
+    segment: BorderSegment,
+    base_instance: &BorderInstance,
+    instances: &mut Vec<BorderInstance>,
+) -> Result<(), ()> {
+    let mut corner_radius = corner_radius;
+    if corner_radius.width < (widths.width / 2.0) {
+        corner_radius.width = 0.0;
+    }
+    if corner_radius.height < (widths.height / 2.0) {
+        corner_radius.height = 0.0;
+    }
+
+    let (ellipse, max_dot_count) =
+        if corner_radius.width == 0. && corner_radius.height == 0. {
+            (Ellipse::new(corner_radius), 1)
+        } else {
+            // The centers of dots follow an ellipse along the middle of the
+            // border radius.
+            let inner_radius = (corner_radius - widths * 0.5).abs();
+            let ellipse = Ellipse::new(inner_radius);
+
+            // Allocate a "worst case" number of dot clips. This can be
+            // calculated by taking the minimum edge radius, since that
+            // will result in the maximum number of dots along the path.
+            let min_diameter = widths.width.min(widths.height);
+
+            // Get the number of circles (assuming spacing of one diameter
+            // between dots).
+            let max_dot_count = 0.5 * ellipse.total_arc_length / min_diameter;
+
+            // Add space for one extra dot since they are centered at the
+            // start of the arc.
+            (ellipse, max_dot_count.ceil() as usize)
+        };
+
+    if max_dot_count == 0 {
+        return Err(());
+    }
+
+    if max_dot_count == 1 {
+        let dot_diameter = lerp(widths.width, widths.height, 0.5);
+        instances.push(BorderInstance {
+            flags: base_instance.flags | ((BorderClipKind::Dot as i32) << 24),
+            clip_params: [
+                widths.width / 2.0, widths.height / 2.0, 0.5 * dot_diameter, 0.,
+                0., 0., 0., 0.,
+            ],
+            .. *base_instance
+        });
+        return Ok(());
+    }
+
+    let max_dot_count = max_dot_count.min(MAX_DASH_COUNT as usize);
+
+    // FIXME(emilio): Should probably use SmallVec.
+    let mut forward_dots = Vec::with_capacity(max_dot_count / 2 + 1);
+    let mut back_dots = Vec::with_capacity(max_dot_count / 2 + 1);
+    let mut leftover_arc_length = 0.0;
+
+    // Alternate between adding dots at the start and end of the
+    // ellipse arc. This ensures that we always end up with an exact
+    // half dot at each end of the arc, to match up with the edges.
+    forward_dots.push(DotInfo::new(widths.width, widths.width));
+    back_dots.push(DotInfo::new(
+        ellipse.total_arc_length - widths.height,
+        widths.height,
+    ));
+
+    let (outer, clip_sign) = compute_outer_and_clip_sign(segment, corner_radius);
+    for dot_index in 0 .. max_dot_count {
+        let prev_forward_pos = *forward_dots.last().unwrap();
+        let prev_back_pos = *back_dots.last().unwrap();
+
+        // Select which end of the arc to place a dot from.
+        // This just alternates between the start and end of
+        // the arc, which ensures that there is always an
+        // exact half-dot at each end of the ellipse.
+        let going_forward = dot_index & 1 == 0;
+
+        let (next_dot_pos, leftover) = if going_forward {
+            let next_dot_pos =
+                prev_forward_pos.arc_pos + 2.0 * prev_forward_pos.diameter;
+            (next_dot_pos, prev_back_pos.arc_pos - next_dot_pos)
+        } else {
+            let next_dot_pos = prev_back_pos.arc_pos - 2.0 * prev_back_pos.diameter;
+            (next_dot_pos, next_dot_pos - prev_forward_pos.arc_pos)
+        };
+
+        // Use a lerp between each edge's dot
+        // diameter, based on the linear distance
+        // along the arc to get the diameter of the
+        // dot at this arc position.
+        let t = next_dot_pos / ellipse.total_arc_length;
+        let dot_diameter = lerp(widths.width, widths.height, t);
+
+        // If we can't fit a dot, bail out.
+        if leftover < dot_diameter {
+            leftover_arc_length = leftover;
+            break;
+        }
+
+        // We can place a dot!
+        let dot = DotInfo::new(next_dot_pos, dot_diameter);
+        if going_forward {
+            forward_dots.push(dot);
+        } else {
+            back_dots.push(dot);
+        }
+    }
+
+    // Now step through the dots, and distribute any extra
+    // leftover space on the arc between them evenly. Once
+    // the final arc position is determined, generate the correct
+    // arc positions and angles that get passed to the clip shader.
+    let number_of_dots = forward_dots.len() + back_dots.len();
+    let extra_space_per_dot = leftover_arc_length / (number_of_dots - 1) as f32;
+
+    let create_dot_data = |arc_length: f32, dot_radius: f32| -> [f32; 8] {
+        // Represents the GPU data for drawing a single dot to a clip mask. The order
+        // these are specified must stay in sync with the way this data is read in the
+        // dot clip shader.
+        let theta = ellipse.find_angle_for_arc_length(arc_length);
+        let (center, _) = ellipse.get_point_and_tangent(theta);
+
+        let center = DevicePoint::new(
+            outer.x + clip_sign.x * (corner_radius.width - center.x),
+            outer.y + clip_sign.y * (corner_radius.height - center.y),
+        );
+
+        [center.x, center.y, dot_radius, 0.0, 0.0, 0.0, 0.0, 0.0]
+    };
+
+    instances.reserve(number_of_dots);
+    for (i, dot) in forward_dots.iter().enumerate() {
+        let extra_dist = i as f32 * extra_space_per_dot;
+        instances.push(BorderInstance {
+            flags: base_instance.flags | ((BorderClipKind::Dot as i32) << 24),
+            clip_params: create_dot_data(dot.arc_pos + extra_dist, 0.5 * dot.diameter),
+            .. *base_instance
+        });
+    }
+
+    for (i, dot) in back_dots.iter().enumerate() {
+        let extra_dist = i as f32 * extra_space_per_dot;
+        instances.push(BorderInstance {
+            flags: base_instance.flags | ((BorderClipKind::Dot as i32) << 24),
+            clip_params: create_dot_data(dot.arc_pos - extra_dist, 0.5 * dot.diameter),
+            .. *base_instance
+        });
+    }
+
+    Ok(())
+}
+
+#[derive(Copy, Clone, Debug)]
+struct DotInfo {
+    arc_pos: f32,
+    diameter: f32,
+}
+
+impl DotInfo {
+    fn new(arc_pos: f32, diameter: f32) -> DotInfo {
+        DotInfo { arc_pos, diameter }
+    }
+}
+
+/// Information needed to place and draw a border edge.
+#[derive(Debug)]
+struct EdgeInfo {
+    /// Offset in local space to place the edge from origin.
+    local_offset: f32,
+    /// Size of the edge in local space.
+    local_size: f32,
+    /// Local stretch size for this edge (repeat past this).
+    stretch_size: f32,
+}
+
+impl EdgeInfo {
+    fn new(
+        local_offset: f32,
+        local_size: f32,
+        stretch_size: f32,
+    ) -> Self {
+        Self {
+            local_offset,
+            local_size,
+            stretch_size,
+        }
+    }
+}
+
+// Given a side width and the available space, compute the half-dash (half of
+// the 'on' segment) and the count of them for a given segment.
+fn compute_half_dash(side_width: f32, total_size: f32) -> (f32, u32) {
+    let half_dash = side_width * 1.5;
+    let num_half_dashes = (total_size / half_dash).ceil() as u32;
+
+    if num_half_dashes == 0 {
+        return (0., 0);
+    }
+
+    // TODO(emilio): Gecko has some other heuristics here to start with a full
+    // dash when the border side is zero, for example. We might consider those
+    // in the future.
+    let num_half_dashes = if num_half_dashes % 4 != 0 {
+        num_half_dashes + 4 - num_half_dashes % 4
+    } else {
+        num_half_dashes
+    };
+
+    let half_dash = total_size / num_half_dashes as f32;
+    (half_dash, num_half_dashes)
+}
+
+
+// Get the needed size in device pixels for an edge,
+// based on the border style of that edge. This is used
+// to determine how big the render task should be.
+fn get_edge_info(
+    style: BorderStyle,
+    side_width: f32,
+    avail_size: f32,
+) -> EdgeInfo {
+    // To avoid division by zero below.
+    if side_width <= 0.0 || avail_size <= 0.0 {
+        return EdgeInfo::new(0.0, 0.0, 0.0);
+    }
+
+    match style {
+        BorderStyle::Dashed => {
+            // Basically, two times the dash size.
+            let (half_dash, _num_half_dashes) =
+                compute_half_dash(side_width, avail_size);
+            let stretch_size = 2.0 * 2.0 * half_dash;
+            EdgeInfo::new(0., avail_size, stretch_size)
+        }
+        BorderStyle::Dotted => {
+            let dot_and_space_size = 2.0 * side_width;
+            if avail_size < dot_and_space_size * 0.75 {
+                return EdgeInfo::new(0.0, 0.0, 0.0);
+            }
+            let approx_dot_count = avail_size / dot_and_space_size;
+            let dot_count = approx_dot_count.floor().max(1.0);
+            let used_size = dot_count * dot_and_space_size;
+            let extra_space = avail_size - used_size;
+            let stretch_size = dot_and_space_size;
+            let offset = (extra_space * 0.5).round();
+            EdgeInfo::new(offset, used_size, stretch_size)
+        }
+        _ => {
+            EdgeInfo::new(0.0, avail_size, 8.0)
+        }
+    }
+}
+
+/// Create the set of border segments and render task
+/// cache keys for a given CSS border.
+pub fn create_border_segments(
+    size: LayoutSize,
+    border: &ApiNormalBorder,
+    widths: &LayoutSideOffsets,
+    border_segments: &mut Vec<BorderSegmentInfo>,
+    brush_segments: &mut Vec<BrushSegment>,
+) {
+    let rect = LayoutRect::from_size(size);
+
+    let overlap = LayoutSize::new(
+        (widths.left + widths.right - size.width).max(0.0),
+        (widths.top + widths.bottom - size.height).max(0.0),
+    );
+    let non_overlapping_widths = LayoutSideOffsets::new(
+        widths.top - overlap.height / 2.0,
+        widths.right - overlap.width / 2.0,
+        widths.bottom - overlap.height / 2.0,
+        widths.left - overlap.width / 2.0,
+    );
+
+    let local_size_tl = LayoutSize::new(
+        border.radius.top_left.width.max(widths.left),
+        border.radius.top_left.height.max(widths.top),
+    );
+    let local_size_tr = LayoutSize::new(
+        border.radius.top_right.width.max(widths.right),
+        border.radius.top_right.height.max(widths.top),
+    );
+    let local_size_br = LayoutSize::new(
+        border.radius.bottom_right.width.max(widths.right),
+        border.radius.bottom_right.height.max(widths.bottom),
+    );
+    let local_size_bl = LayoutSize::new(
+        border.radius.bottom_left.width.max(widths.left),
+        border.radius.bottom_left.height.max(widths.bottom),
+    );
+
+    let top_edge_info = get_edge_info(
+        border.top.style,
+        widths.top,
+        rect.width() - local_size_tl.width - local_size_tr.width,
+    );
+    let bottom_edge_info = get_edge_info(
+        border.bottom.style,
+        widths.bottom,
+        rect.width() - local_size_bl.width - local_size_br.width,
+    );
+
+    let left_edge_info = get_edge_info(
+        border.left.style,
+        widths.left,
+        rect.height() - local_size_tl.height - local_size_bl.height,
+    );
+    let right_edge_info = get_edge_info(
+        border.right.style,
+        widths.right,
+        rect.height() - local_size_tr.height - local_size_br.height,
+    );
+
+    add_edge_segment(
+        LayoutRect::from_floats(
+            rect.min.x,
+            rect.min.y + local_size_tl.height + left_edge_info.local_offset,
+            rect.min.x + non_overlapping_widths.left,
+            rect.min.y + local_size_tl.height + left_edge_info.local_offset + left_edge_info.local_size,
+        ),
+        &left_edge_info,
+        border.left,
+        non_overlapping_widths.left,
+        BorderSegment::Left,
+        EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::RIGHT,
+        brush_segments,
+        border_segments,
+        border.do_aa,
+    );
+    add_edge_segment(
+        LayoutRect::from_floats(
+            rect.min.x + local_size_tl.width + top_edge_info.local_offset,
+            rect.min.y,
+            rect.min.x + local_size_tl.width + top_edge_info.local_offset + top_edge_info.local_size,
+            rect.min.y + non_overlapping_widths.top,
+        ),
+        &top_edge_info,
+        border.top,
+        non_overlapping_widths.top,
+        BorderSegment::Top,
+        EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::BOTTOM,
+        brush_segments,
+        border_segments,
+        border.do_aa,
+    );
+    add_edge_segment(
+        LayoutRect::from_floats(
+            rect.min.x + rect.width() - non_overlapping_widths.right,
+            rect.min.y + local_size_tr.height + right_edge_info.local_offset,
+            rect.min.x + rect.width(),
+            rect.min.y + local_size_tr.height + right_edge_info.local_offset + right_edge_info.local_size,
+        ),
+        &right_edge_info,
+        border.right,
+        non_overlapping_widths.right,
+        BorderSegment::Right,
+        EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::LEFT,
+        brush_segments,
+        border_segments,
+        border.do_aa,
+    );
+    add_edge_segment(
+        LayoutRect::from_floats(
+            rect.min.x + local_size_bl.width + bottom_edge_info.local_offset,
+            rect.min.y + rect.height() - non_overlapping_widths.bottom,
+            rect.min.x + local_size_bl.width + bottom_edge_info.local_offset + bottom_edge_info.local_size,
+            rect.min.y + rect.height(),
+        ),
+        &bottom_edge_info,
+        border.bottom,
+        non_overlapping_widths.bottom,
+        BorderSegment::Bottom,
+        EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::TOP,
+        brush_segments,
+        border_segments,
+        border.do_aa,
+    );
+
+    add_corner_segment(
+        LayoutRect::from_floats(
+            rect.min.x,
+            rect.min.y,
+            rect.min.x + local_size_tl.width,
+            rect.min.y + local_size_tl.height,
+        ),
+        LayoutRect::from_floats(
+            rect.min.x,
+            rect.min.y,
+            rect.max.x - non_overlapping_widths.right,
+            rect.max.y - non_overlapping_widths.bottom
+        ),
+        border.left,
+        border.top,
+        LayoutSize::new(widths.left, widths.top),
+        border.radius.top_left,
+        BorderSegment::TopLeft,
+        EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::LEFT,
+        rect.top_right(),
+        border.radius.top_right,
+        rect.bottom_left(),
+        border.radius.bottom_left,
+        brush_segments,
+        border_segments,
+        border.do_aa,
+    );
+    add_corner_segment(
+        LayoutRect::from_floats(
+            rect.min.x + rect.width() - local_size_tr.width,
+            rect.min.y,
+            rect.min.x + rect.width(),
+            rect.min.y + local_size_tr.height,
+        ),
+        LayoutRect::from_floats(
+            rect.min.x + non_overlapping_widths.left,
+            rect.min.y,
+            rect.max.x,
+            rect.max.y - non_overlapping_widths.bottom,
+        ),
+        border.top,
+        border.right,
+        LayoutSize::new(widths.right, widths.top),
+        border.radius.top_right,
+        BorderSegment::TopRight,
+        EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::RIGHT,
+        rect.min,
+        border.radius.top_left,
+        rect.max,
+        border.radius.bottom_right,
+        brush_segments,
+        border_segments,
+        border.do_aa,
+    );
+    add_corner_segment(
+        LayoutRect::from_floats(
+            rect.min.x + rect.width() - local_size_br.width,
+            rect.min.y + rect.height() - local_size_br.height,
+            rect.min.x + rect.width(),
+            rect.min.y + rect.height(),
+        ),
+        LayoutRect::from_floats(
+            rect.min.x + non_overlapping_widths.left,
+            rect.min.y + non_overlapping_widths.top,
+            rect.max.x,
+            rect.max.y,
+        ),
+        border.right,
+        border.bottom,
+        LayoutSize::new(widths.right, widths.bottom),
+        border.radius.bottom_right,
+        BorderSegment::BottomRight,
+        EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::RIGHT,
+        rect.bottom_left(),
+        border.radius.bottom_left,
+        rect.top_right(),
+        border.radius.top_right,
+        brush_segments,
+        border_segments,
+        border.do_aa,
+    );
+    add_corner_segment(
+        LayoutRect::from_floats(
+            rect.min.x,
+            rect.min.y + rect.height() - local_size_bl.height,
+            rect.min.x + local_size_bl.width,
+            rect.min.y + rect.height(),
+        ),
+        LayoutRect::from_floats(
+            rect.min.x,
+            rect.min.y + non_overlapping_widths.top,
+            rect.max.x - non_overlapping_widths.right,
+            rect.max.y,
+        ),
+        border.bottom,
+        border.left,
+        LayoutSize::new(widths.left, widths.bottom),
+        border.radius.bottom_left,
+        BorderSegment::BottomLeft,
+        EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::LEFT,
+        rect.max,
+        border.radius.bottom_right,
+        rect.min,
+        border.radius.top_left,
+        brush_segments,
+        border_segments,
+        border.do_aa,
+    );
+}
+
+/// Computes the maximum scale that we allow for this set of border parameters.
+/// capping the scale will result in rendering very large corners at a lower
+/// resolution and stretching them, so they will have the right shape, but
+/// blurrier.
+pub fn get_max_scale_for_border(
+    border_data: &NormalBorderData,
+) -> LayoutToDeviceScale {
+    let mut r = 1.0;
+    for segment in &border_data.border_segments {
+        let size = segment.local_task_size;
+        r = size.width.max(size.height.max(r));
+    }
+
+    LayoutToDeviceScale::new(MAX_BORDER_RESOLUTION as f32 / r)
+}
+
+fn add_segment(
+    task_rect: DeviceRect,
+    style0: BorderStyle,
+    style1: BorderStyle,
+    color0: ColorF,
+    color1: ColorF,
+    segment: BorderSegment,
+    instances: &mut Vec<BorderInstance>,
+    widths: DeviceSize,
+    radius: DeviceSize,
+    do_aa: bool,
+    h_adjacent_corner_outer: DevicePoint,
+    h_adjacent_corner_radius: DeviceSize,
+    v_adjacent_corner_outer: DevicePoint,
+    v_adjacent_corner_radius: DeviceSize,
+) {
+    let base_flags = (segment as i32) |
+                     ((style0 as i32) << 8) |
+                     ((style1 as i32) << 16) |
+                     ((do_aa as i32) << 28);
+
+    let base_instance = BorderInstance {
+        task_origin: DevicePoint::zero(),
+        local_rect: task_rect,
+        flags: base_flags,
+        color0: color0.premultiplied(),
+        color1: color1.premultiplied(),
+        widths,
+        radius,
+        clip_params: [0.0; 8],
+    };
+
+    match segment {
+        BorderSegment::TopLeft |
+        BorderSegment::TopRight |
+        BorderSegment::BottomLeft |
+        BorderSegment::BottomRight => {
+            // TODO(gw): Similarly to the old border code, we don't correctly handle a a corner
+            //           that is dashed on one edge, and dotted on another. We can handle this
+            //           in the future by submitting two instances, each one with one side
+            //           color set to have an alpha of 0.
+            if (style0 == BorderStyle::Dotted && style1 == BorderStyle::Dashed) ||
+               (style0 == BorderStyle::Dashed && style0 == BorderStyle::Dotted) {
+                warn!("TODO: Handle a corner with dotted / dashed transition.");
+            }
+
+            let dashed_or_dotted_corner = match style0 {
+                BorderStyle::Dashed => {
+                    write_dashed_corner_instances(
+                        radius,
+                        widths,
+                        segment,
+                        &base_instance,
+                        instances,
+                    )
+                }
+                BorderStyle::Dotted => {
+                    write_dotted_corner_instances(
+                        radius,
+                        widths,
+                        segment,
+                        &base_instance,
+                        instances,
+                    )
+                }
+                _ => Err(()),
+            };
+
+            if dashed_or_dotted_corner.is_err() {
+                let clip_params = [
+                    h_adjacent_corner_outer.x,
+                    h_adjacent_corner_outer.y,
+                    h_adjacent_corner_radius.width,
+                    h_adjacent_corner_radius.height,
+                    v_adjacent_corner_outer.x,
+                    v_adjacent_corner_outer.y,
+                    v_adjacent_corner_radius.width,
+                    v_adjacent_corner_radius.height,
+                ];
+
+                instances.push(BorderInstance {
+                    clip_params,
+                    ..base_instance
+                });
+            }
+        }
+        BorderSegment::Top |
+        BorderSegment::Bottom |
+        BorderSegment::Right |
+        BorderSegment::Left => {
+            let is_vertical = segment == BorderSegment::Left ||
+                              segment == BorderSegment::Right;
+
+            match style0 {
+                BorderStyle::Dashed => {
+                    let (x, y) = if is_vertical {
+                        let half_dash_size = task_rect.height() * 0.25;
+                        (0., half_dash_size)
+                    } else {
+                        let half_dash_size = task_rect.width() * 0.25;
+                        (half_dash_size, 0.)
+                    };
+
+                    instances.push(BorderInstance {
+                        flags: base_flags | ((BorderClipKind::DashEdge as i32) << 24),
+                        clip_params: [
+                            x, y, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+                        ],
+                        ..base_instance
+                    });
+                }
+                BorderStyle::Dotted => {
+                    let (x, y, r) = if is_vertical {
+                        (widths.width * 0.5,
+                         widths.width,
+                         widths.width * 0.5)
+                    } else {
+                        (widths.height,
+                         widths.height * 0.5,
+                         widths.height * 0.5)
+                    };
+
+                    instances.push(BorderInstance {
+                        flags: base_flags | ((BorderClipKind::Dot as i32) << 24),
+                        clip_params: [
+                            x, y, r, 0.0, 0.0, 0.0, 0.0, 0.0,
+                        ],
+                        ..base_instance
+                    });
+                }
+                _ => {
+                    instances.push(base_instance);
+                }
+            }
+        }
+    }
+}
+
+/// Add a corner segment (if valid) to the list of
+/// border segments for this primitive.
+fn add_corner_segment(
+    image_rect: LayoutRect,
+    non_overlapping_rect: LayoutRect,
+    side0: BorderSide,
+    side1: BorderSide,
+    widths: LayoutSize,
+    radius: LayoutSize,
+    segment: BorderSegment,
+    edge_flags: EdgeAaSegmentMask,
+    h_adjacent_corner_outer: LayoutPoint,
+    h_adjacent_corner_radius: LayoutSize,
+    v_adjacent_corner_outer: LayoutPoint,
+    v_adjacent_corner_radius: LayoutSize,
+    brush_segments: &mut Vec<BrushSegment>,
+    border_segments: &mut Vec<BorderSegmentInfo>,
+    do_aa: bool,
+) {
+    if side0.color.a <= 0.0 && side1.color.a <= 0.0 {
+        return;
+    }
+
+    if widths.width <= 0.0 && widths.height <= 0.0 {
+        return;
+    }
+
+    if side0.style.is_hidden() && side1.style.is_hidden() {
+        return;
+    }
+
+    let segment_rect = match image_rect.intersection(&non_overlapping_rect) {
+        Some(rect) => rect,
+        None => {
+            return;
+        }
+    };
+
+    let texture_rect = segment_rect
+        .translate(-image_rect.min.to_vector())
+        .scale(1.0 / image_rect.width(), 1.0 / image_rect.height());
+
+    brush_segments.push(
+        BrushSegment::new(
+            segment_rect,
+            /* may_need_clip_mask = */ true,
+            edge_flags,
+            [texture_rect.min.x, texture_rect.min.y, texture_rect.max.x, texture_rect.max.y],
+            BrushFlags::SEGMENT_RELATIVE | BrushFlags::SEGMENT_TEXEL_RECT,
+        )
+    );
+
+    // If the radii of the adjacent corners do not overlap with this segment,
+    // then set the outer position to this segment's corner and the radii to zero.
+    // That way the cache key is unaffected by non-overlapping corners, resulting
+    // in fewer misses.
+    let (h_corner_outer, h_corner_radius) = match segment {
+        BorderSegment::TopLeft => {
+            if h_adjacent_corner_outer.x - h_adjacent_corner_radius.width < image_rect.max.x {
+                (h_adjacent_corner_outer, h_adjacent_corner_radius)
+            } else {
+                (LayoutPoint::new(image_rect.max.x, image_rect.min.y), LayoutSize::zero())
+            }
+        }
+        BorderSegment::TopRight => {
+            if h_adjacent_corner_outer.x + h_adjacent_corner_radius.width > image_rect.min.x {
+                (h_adjacent_corner_outer, h_adjacent_corner_radius)
+            } else {
+                (LayoutPoint::new(image_rect.min.x, image_rect.min.y), LayoutSize::zero())
+            }
+        }
+        BorderSegment::BottomRight => {
+            if h_adjacent_corner_outer.x + h_adjacent_corner_radius.width > image_rect.min.x {
+                (h_adjacent_corner_outer, h_adjacent_corner_radius)
+            } else {
+                (LayoutPoint::new(image_rect.min.x, image_rect.max.y), LayoutSize::zero())
+            }
+        }
+        BorderSegment::BottomLeft => {
+            if h_adjacent_corner_outer.x - h_adjacent_corner_radius.width < image_rect.max.x {
+                (h_adjacent_corner_outer, h_adjacent_corner_radius)
+            } else {
+                (image_rect.max, LayoutSize::zero())
+            }
+        }
+        _ => unreachable!()
+    };
+
+    let (v_corner_outer, v_corner_radius) = match segment {
+        BorderSegment::TopLeft => {
+            if v_adjacent_corner_outer.y - v_adjacent_corner_radius.height < image_rect.max.y {
+                (v_adjacent_corner_outer, v_adjacent_corner_radius)
+            } else {
+                (LayoutPoint::new(image_rect.min.x, image_rect.max.y), LayoutSize::zero())
+            }
+        }
+        BorderSegment::TopRight => {
+            if v_adjacent_corner_outer.y - v_adjacent_corner_radius.height < image_rect.max.y {
+                (v_adjacent_corner_outer, v_adjacent_corner_radius)
+            } else {
+                (image_rect.max, LayoutSize::zero())
+            }
+        }
+        BorderSegment::BottomRight => {
+            if v_adjacent_corner_outer.y + v_adjacent_corner_radius.height > image_rect.min.y {
+                (v_adjacent_corner_outer, v_adjacent_corner_radius)
+            } else {
+                (LayoutPoint::new(image_rect.max.x, image_rect.min.y), LayoutSize::zero())
+            }
+        }
+        BorderSegment::BottomLeft => {
+            if v_adjacent_corner_outer.y + v_adjacent_corner_radius.height > image_rect.min.y {
+                (v_adjacent_corner_outer, v_adjacent_corner_radius)
+            } else {
+                (LayoutPoint::new(image_rect.min.x, image_rect.min.y), LayoutSize::zero())
+            }
+        }
+        _ => unreachable!()
+    };
+
+    border_segments.push(BorderSegmentInfo {
+        local_task_size: image_rect.size(),
+        cache_key: BorderSegmentCacheKey {
+            do_aa,
+            side0: side0.into(),
+            side1: side1.into(),
+            segment,
+            radius: radius.to_au(),
+            size: widths.to_au(),
+            h_adjacent_corner_outer: (h_corner_outer - image_rect.min).to_point().to_au(),
+            h_adjacent_corner_radius: h_corner_radius.to_au(),
+            v_adjacent_corner_outer: (v_corner_outer - image_rect.min).to_point().to_au(),
+            v_adjacent_corner_radius: v_corner_radius.to_au(),
+        },
+    });
+}
+
+/// Add an edge segment (if valid) to the list of
+/// border segments for this primitive.
+fn add_edge_segment(
+    image_rect: LayoutRect,
+    edge_info: &EdgeInfo,
+    side: BorderSide,
+    width: f32,
+    segment: BorderSegment,
+    edge_flags: EdgeAaSegmentMask,
+    brush_segments: &mut Vec<BrushSegment>,
+    border_segments: &mut Vec<BorderSegmentInfo>,
+    do_aa: bool,
+) {
+    if side.color.a <= 0.0 {
+        return;
+    }
+
+    if side.style.is_hidden() {
+        return;
+    }
+
+    let (size, brush_flags) = match segment {
+        BorderSegment::Left | BorderSegment::Right => {
+            (LayoutSize::new(width, edge_info.stretch_size), BrushFlags::SEGMENT_REPEAT_Y)
+        }
+        BorderSegment::Top | BorderSegment::Bottom => {
+            (LayoutSize::new(edge_info.stretch_size, width), BrushFlags::SEGMENT_REPEAT_X)
+        }
+        _ => {
+            unreachable!();
+        }
+    };
+
+    if image_rect.width() <= 0. || image_rect.height() <= 0. {
+        return;
+    }
+
+    brush_segments.push(
+        BrushSegment::new(
+            image_rect,
+            /* may_need_clip_mask = */ true,
+            edge_flags,
+            [0.0, 0.0, size.width, size.height],
+            BrushFlags::SEGMENT_RELATIVE | brush_flags,
+        )
+    );
+
+    border_segments.push(BorderSegmentInfo {
+        local_task_size: size,
+        cache_key: BorderSegmentCacheKey {
+            do_aa,
+            side0: side.into(),
+            side1: side.into(),
+            radius: LayoutSizeAu::zero(),
+            size: size.to_au(),
+            segment,
+            h_adjacent_corner_outer: LayoutPointAu::zero(),
+            h_adjacent_corner_radius: LayoutSizeAu::zero(),
+            v_adjacent_corner_outer: LayoutPointAu::zero(),
+            v_adjacent_corner_radius: LayoutSizeAu::zero(),
+        },
+    });
+}
+
+/// Build the set of border instances needed to draw a border
+/// segment into the render task cache.
+pub fn build_border_instances(
+    cache_key: &BorderSegmentCacheKey,
+    cache_size: DeviceIntSize,
+    border: &ApiNormalBorder,
+    scale: LayoutToDeviceScale,
+) -> Vec<BorderInstance> {
+    let mut instances = Vec::new();
+
+    let (side0, side1, flip0, flip1) = match cache_key.segment {
+        BorderSegment::Left => (&border.left, &border.left, false, false),
+        BorderSegment::Top => (&border.top, &border.top, false, false),
+        BorderSegment::Right => (&border.right, &border.right, true, true),
+        BorderSegment::Bottom => (&border.bottom, &border.bottom, true, true),
+        BorderSegment::TopLeft => (&border.left, &border.top, false, false),
+        BorderSegment::TopRight => (&border.top, &border.right, false, true),
+        BorderSegment::BottomRight => (&border.right, &border.bottom, true, true),
+        BorderSegment::BottomLeft => (&border.bottom, &border.left, true, false),
+    };
+
+    let style0 = if side0.style.is_hidden() {
+        side1.style
+    } else {
+        side0.style
+    };
+    let style1 = if side1.style.is_hidden() {
+        side0.style
+    } else {
+        side1.style
+    };
+
+    let color0 = side0.border_color(flip0);
+    let color1 = side1.border_color(flip1);
+
+    let widths = (LayoutSize::from_au(cache_key.size) * scale).ceil();
+    let radius = (LayoutSize::from_au(cache_key.radius) * scale).ceil();
+
+    let h_corner_outer = (LayoutPoint::from_au(cache_key.h_adjacent_corner_outer) * scale).round();
+    let h_corner_radius = (LayoutSize::from_au(cache_key.h_adjacent_corner_radius) * scale).ceil();
+    let v_corner_outer = (LayoutPoint::from_au(cache_key.v_adjacent_corner_outer) * scale).round();
+    let v_corner_radius = (LayoutSize::from_au(cache_key.v_adjacent_corner_radius) * scale).ceil();
+
+    add_segment(
+        DeviceRect::from_size(cache_size.to_f32()),
+        style0,
+        style1,
+        color0,
+        color1,
+        cache_key.segment,
+        &mut instances,
+        widths,
+        radius,
+        border.do_aa,
+        h_corner_outer,
+        h_corner_radius,
+        v_corner_outer,
+        v_corner_radius,
+    );
+
+    instances
+}
+
+impl NinePatchDescriptor {
+    pub fn create_segments(
+        &self,
+        size: LayoutSize,
+    ) -> Vec<BrushSegment> {
+        let rect = LayoutRect::from_size(size);
+
+        // Calculate the modified rect as specific by border-image-outset
+        let origin = LayoutPoint::new(
+            rect.min.x - self.outset.left,
+            rect.min.y - self.outset.top,
+        );
+        let size = LayoutSize::new(
+            rect.width() + self.outset.left + self.outset.right,
+            rect.height() + self.outset.top + self.outset.bottom,
+        );
+        let rect = LayoutRect::from_origin_and_size(origin, size);
+
+        // Calculate the local texel coords of the slices.
+        let px0 = 0.0;
+        let px1 = self.slice.left as f32 / self.width as f32;
+        let px2 = (self.width as f32 - self.slice.right as f32) / self.width as f32;
+        let px3 = 1.0;
+
+        let py0 = 0.0;
+        let py1 = self.slice.top as f32 / self.height as f32;
+        let py2 = (self.height as f32 - self.slice.bottom as f32) / self.height as f32;
+        let py3 = 1.0;
+
+        let tl_outer = LayoutPoint::new(rect.min.x, rect.min.y);
+        let tl_inner = tl_outer + vec2(self.widths.left, self.widths.top);
+
+        let tr_outer = LayoutPoint::new(rect.min.x + rect.width(), rect.min.y);
+        let tr_inner = tr_outer + vec2(-self.widths.right, self.widths.top);
+
+        let bl_outer = LayoutPoint::new(rect.min.x, rect.min.y + rect.height());
+        let bl_inner = bl_outer + vec2(self.widths.left, -self.widths.bottom);
+
+        let br_outer = rect.max;
+
+        let br_inner = br_outer - vec2(self.widths.right, self.widths.bottom);
+
+        fn add_segment(
+            segments: &mut Vec<BrushSegment>,
+            rect: LayoutRect,
+            uv_rect: TexelRect,
+            repeat_horizontal: RepeatMode,
+            repeat_vertical: RepeatMode,
+            extra_flags: BrushFlags,
+        ) {
+            if uv_rect.uv1.x <= uv_rect.uv0.x || uv_rect.uv1.y <= uv_rect.uv0.y {
+                return;
+            }
+
+            // Use segment relative interpolation for all
+            // instances in this primitive.
+            let mut brush_flags =
+                BrushFlags::SEGMENT_RELATIVE |
+                BrushFlags::SEGMENT_TEXEL_RECT |
+                extra_flags;
+
+            // Enable repeat modes on the segment.
+            if repeat_horizontal == RepeatMode::Repeat {
+                brush_flags |= BrushFlags::SEGMENT_REPEAT_X;
+            } else if repeat_horizontal == RepeatMode::Round {
+                brush_flags |= BrushFlags::SEGMENT_REPEAT_X | BrushFlags::SEGMENT_REPEAT_X_ROUND;
+            }
+
+            if repeat_vertical == RepeatMode::Repeat {
+                brush_flags |= BrushFlags::SEGMENT_REPEAT_Y;
+            } else if repeat_vertical == RepeatMode::Round {
+                brush_flags |= BrushFlags::SEGMENT_REPEAT_Y | BrushFlags::SEGMENT_REPEAT_Y_ROUND;
+            }
+
+            let segment = BrushSegment::new(
+                rect,
+                true,
+                EdgeAaSegmentMask::empty(),
+                [
+                    uv_rect.uv0.x,
+                    uv_rect.uv0.y,
+                    uv_rect.uv1.x,
+                    uv_rect.uv1.y,
+                ],
+                brush_flags,
+            );
+
+            segments.push(segment);
+        }
+
+        // Build the list of image segments
+        let mut segments = Vec::new();
+
+        // Top left
+        add_segment(
+            &mut segments,
+            LayoutRect::from_floats(tl_outer.x, tl_outer.y, tl_inner.x, tl_inner.y),
+            TexelRect::new(px0, py0, px1, py1),
+            RepeatMode::Stretch,
+            RepeatMode::Stretch,
+            BrushFlags::empty(),
+        );
+        // Top right
+        add_segment(
+            &mut segments,
+            LayoutRect::from_floats(tr_inner.x, tr_outer.y, tr_outer.x, tr_inner.y),
+            TexelRect::new(px2, py0, px3, py1),
+            RepeatMode::Stretch,
+            RepeatMode::Stretch,
+            BrushFlags::empty(),
+        );
+        // Bottom right
+        add_segment(
+            &mut segments,
+            LayoutRect::from_floats(br_inner.x, br_inner.y, br_outer.x, br_outer.y),
+            TexelRect::new(px2, py2, px3, py3),
+            RepeatMode::Stretch,
+            RepeatMode::Stretch,
+            BrushFlags::empty(),
+        );
+        // Bottom left
+        add_segment(
+            &mut segments,
+            LayoutRect::from_floats(bl_outer.x, bl_inner.y, bl_inner.x, bl_outer.y),
+            TexelRect::new(px0, py2, px1, py3),
+            RepeatMode::Stretch,
+            RepeatMode::Stretch,
+            BrushFlags::empty(),
+        );
+
+        // Center
+        if self.fill {
+            add_segment(
+                &mut segments,
+                LayoutRect::from_floats(tl_inner.x, tl_inner.y, tr_inner.x, bl_inner.y),
+                TexelRect::new(px1, py1, px2, py2),
+                self.repeat_horizontal,
+                self.repeat_vertical,
+                BrushFlags::SEGMENT_NINEPATCH_MIDDLE,
+            );
+        }
+
+        // Add edge segments.
+
+        // Top
+        add_segment(
+            &mut segments,
+            LayoutRect::from_floats(tl_inner.x, tl_outer.y, tr_inner.x, tl_inner.y),
+            TexelRect::new(px1, py0, px2, py1),
+            self.repeat_horizontal,
+            RepeatMode::Stretch,
+            BrushFlags::empty(),
+        );
+        // Bottom
+        add_segment(
+            &mut segments,
+            LayoutRect::from_floats(bl_inner.x, bl_inner.y, br_inner.x, bl_outer.y),
+            TexelRect::new(px1, py2, px2, py3),
+            self.repeat_horizontal,
+            RepeatMode::Stretch,
+            BrushFlags::empty(),
+        );
+        // Left
+        add_segment(
+            &mut segments,
+            LayoutRect::from_floats(tl_outer.x, tl_inner.y, tl_inner.x, bl_inner.y),
+            TexelRect::new(px0, py1, px1, py2),
+            RepeatMode::Stretch,
+            self.repeat_vertical,
+            BrushFlags::empty(),
+        );
+        // Right
+        add_segment(
+            &mut segments,
+            LayoutRect::from_floats(tr_inner.x, tr_inner.y, br_outer.x, br_inner.y),
+            TexelRect::new(px2, py1, px3, py2),
+            RepeatMode::Stretch,
+            self.repeat_vertical,
+            BrushFlags::empty(),
+        );
+
+        segments
+    }
+}
diff --git a/gfx/wr/webrender/src/box_shadow.rs b/gfx/wr/webrender/src/box_shadow.rs
new file mode 100644
index 0000000000..b24363addb
--- /dev/null
+++ b/gfx/wr/webrender/src/box_shadow.rs
@@ -0,0 +1,280 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{BorderRadius, BoxShadowClipMode, ClipMode, ColorF, PrimitiveKeyKind};
+use api::PropertyBinding;
+use api::units::*;
+use crate::clip::{ClipItemKey, ClipItemKeyKind, ClipNodeId};
+use crate::scene_building::SceneBuilder;
+use crate::spatial_tree::SpatialNodeIndex;
+use crate::gpu_types::BoxShadowStretchMode;
+use crate::render_task_graph::RenderTaskId;
+use crate::internal_types::LayoutPrimitiveInfo;
+
+#[derive(Debug, Clone, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BoxShadowClipSource {
+    // Parameters that define the shadow and are constant.
+    pub shadow_radius: BorderRadius,
+    pub blur_radius: f32,
+    pub clip_mode: BoxShadowClipMode,
+    pub stretch_mode_x: BoxShadowStretchMode,
+    pub stretch_mode_y: BoxShadowStretchMode,
+
+    // The current cache key (in device-pixels), and handles
+    // to the cached clip region and blurred texture.
+    pub cache_key: Option<(DeviceIntSize, BoxShadowCacheKey)>,
+    pub render_task: Option<RenderTaskId>,
+
+    // Local-space size of the required render task size.
+    pub shadow_rect_alloc_size: LayoutSize,
+
+    // Local-space size of the required render task size without any downscaling
+    // applied. This is needed to stretch the shadow properly.
+    pub original_alloc_size: LayoutSize,
+
+    // The minimal shadow rect for the parameters above,
+    // used when drawing the shadow rect to be blurred.
+    pub minimal_shadow_rect: LayoutRect,
+
+    // Local space rect for the shadow to be drawn or
+    // stretched in the shadow primitive.
+    pub prim_shadow_rect: LayoutRect,
+}
+
+// The blur shader samples BLUR_SAMPLE_SCALE * blur_radius surrounding texels.
+pub const BLUR_SAMPLE_SCALE: f32 = 3.0;
+
+// Maximum blur radius for box-shadows (different than blur filters).
+// Taken from nsCSSRendering.cpp in Gecko.
+pub const MAX_BLUR_RADIUS: f32 = 300.;
+
+// A cache key that uniquely identifies a minimally sized
+// and blurred box-shadow rect that can be stored in the
+// texture cache and applied to clip-masks.
+#[derive(Debug, Clone, Eq, Hash, MallocSizeOf, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BoxShadowCacheKey {
+    pub blur_radius_dp: i32,
+    pub clip_mode: BoxShadowClipMode,
+    // NOTE(emilio): Only the original allocation size needs to be in the cache
+    // key, since the actual size is derived from that.
+    pub original_alloc_size: DeviceIntSize,
+    pub br_top_left: DeviceIntSize,
+    pub br_top_right: DeviceIntSize,
+    pub br_bottom_right: DeviceIntSize,
+    pub br_bottom_left: DeviceIntSize,
+    pub device_pixel_scale: Au,
+}
+
+impl<'a> SceneBuilder<'a> {
+    pub fn add_box_shadow(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+        prim_info: &LayoutPrimitiveInfo,
+        box_offset: &LayoutVector2D,
+        color: ColorF,
+        mut blur_radius: f32,
+        spread_radius: f32,
+        border_radius: BorderRadius,
+        clip_mode: BoxShadowClipMode,
+    ) {
+        if color.a == 0.0 {
+            return;
+        }
+
+        // Inset shadows get smaller as spread radius increases.
+        let (spread_amount, prim_clip_mode) = match clip_mode {
+            BoxShadowClipMode::Outset => (spread_radius, ClipMode::ClipOut),
+            BoxShadowClipMode::Inset => (-spread_radius, ClipMode::Clip),
+        };
+
+        // Ensure the blur radius is somewhat sensible.
+        blur_radius = f32::min(blur_radius, MAX_BLUR_RADIUS);
+
+        // Adjust the border radius of the box shadow per CSS-spec.
+        let shadow_radius = adjust_border_radius_for_box_shadow(border_radius, spread_amount);
+
+        // Apply parameters that affect where the shadow rect
+        // exists in the local space of the primitive.
+        let shadow_rect = self.snap_rect(
+            &prim_info
+                .rect
+                .translate(*box_offset)
+                .inflate(spread_amount, spread_amount),
+            spatial_node_index,
+        );
+
+        // If blur radius is zero, we can use a fast path with
+        // no blur applied.
+        if blur_radius == 0.0 {
+            // Trivial reject of box-shadows that are not visible.
+            if box_offset.x == 0.0 && box_offset.y == 0.0 && spread_amount == 0.0 {
+                return;
+            }
+
+            let mut clips = Vec::with_capacity(2);
+            let (final_prim_rect, clip_radius) = match clip_mode {
+                BoxShadowClipMode::Outset => {
+                    if shadow_rect.is_empty() {
+                        return;
+                    }
+
+                    // TODO(gw): Add a fast path for ClipOut + zero border radius!
+                    clips.push(ClipItemKey {
+                        kind: ClipItemKeyKind::rounded_rect(
+                            prim_info.rect,
+                            border_radius,
+                            ClipMode::ClipOut,
+                        ),
+                        spatial_node_index,
+                    });
+
+                    (shadow_rect, shadow_radius)
+                }
+                BoxShadowClipMode::Inset => {
+                    if !shadow_rect.is_empty() {
+                        clips.push(ClipItemKey {
+                            kind: ClipItemKeyKind::rounded_rect(
+                                shadow_rect,
+                                shadow_radius,
+                                ClipMode::ClipOut,
+                            ),
+                            spatial_node_index,
+                        });
+                    }
+
+                    (prim_info.rect, border_radius)
+                }
+            };
+
+            clips.push(ClipItemKey {
+                kind: ClipItemKeyKind::rounded_rect(
+                    final_prim_rect,
+                    clip_radius,
+                    ClipMode::Clip,
+                ),
+                spatial_node_index,
+            });
+
+            self.add_primitive(
+                spatial_node_index,
+                clip_node_id,
+                &LayoutPrimitiveInfo::with_clip_rect(final_prim_rect, prim_info.clip_rect),
+                clips,
+                PrimitiveKeyKind::Rectangle {
+                    color: PropertyBinding::Value(color.into()),
+                },
+            );
+        } else {
+            // Normal path for box-shadows with a valid blur radius.
+            let blur_offset = (BLUR_SAMPLE_SCALE * blur_radius).ceil();
+            let mut extra_clips = vec![];
+
+            // Add a normal clip mask to clip out the contents
+            // of the surrounding primitive.
+            extra_clips.push(ClipItemKey {
+                kind: ClipItemKeyKind::rounded_rect(
+                    prim_info.rect,
+                    border_radius,
+                    prim_clip_mode,
+                ),
+                spatial_node_index,
+            });
+
+            // Get the local rect of where the shadow will be drawn,
+            // expanded to include room for the blurred region.
+            let dest_rect = shadow_rect.inflate(blur_offset, blur_offset);
+
+            // Draw the box-shadow as a solid rect, using a box-shadow
+            // clip mask item.
+            let prim = PrimitiveKeyKind::Rectangle {
+                color: PropertyBinding::Value(color.into()),
+            };
+
+            // Create the box-shadow clip item.
+            let shadow_clip_source = ClipItemKey {
+                kind: ClipItemKeyKind::box_shadow(
+                    shadow_rect,
+                    shadow_radius,
+                    dest_rect,
+                    blur_radius,
+                    clip_mode,
+                ),
+                spatial_node_index,
+            };
+
+            let prim_info = match clip_mode {
+                BoxShadowClipMode::Outset => {
+                    // Certain spread-radii make the shadow invalid.
+                    if shadow_rect.is_empty() {
+                        return;
+                    }
+
+                    // Add the box-shadow clip source.
+                    extra_clips.push(shadow_clip_source);
+
+                    // Outset shadows are expanded by the shadow
+                    // region from the original primitive.
+                    LayoutPrimitiveInfo::with_clip_rect(dest_rect, prim_info.clip_rect)
+                }
+                BoxShadowClipMode::Inset => {
+                    // If the inner shadow rect contains the prim
+                    // rect, no pixels will be shadowed.
+                    if border_radius.is_zero() && shadow_rect
+                        .inflate(-blur_radius, -blur_radius)
+                        .contains_box(&prim_info.rect)
+                    {
+                        return;
+                    }
+
+                    // Inset shadows are still visible, even if the
+                    // inset shadow rect becomes invalid (they will
+                    // just look like a solid rectangle).
+                    if !shadow_rect.is_empty() {
+                        extra_clips.push(shadow_clip_source);
+                    }
+
+                    // Inset shadows draw inside the original primitive.
+                    prim_info.clone()
+                }
+            };
+
+            self.add_primitive(
+                spatial_node_index,
+                clip_node_id,
+                &prim_info,
+                extra_clips,
+                prim,
+            );
+        }
+    }
+}
+
+fn adjust_border_radius_for_box_shadow(radius: BorderRadius, spread_amount: f32) -> BorderRadius {
+    BorderRadius {
+        top_left: adjust_corner_for_box_shadow(radius.top_left, spread_amount),
+        top_right: adjust_corner_for_box_shadow(radius.top_right, spread_amount),
+        bottom_right: adjust_corner_for_box_shadow(radius.bottom_right, spread_amount),
+        bottom_left: adjust_corner_for_box_shadow(radius.bottom_left, spread_amount),
+    }
+}
+
+fn adjust_corner_for_box_shadow(corner: LayoutSize, spread_amount: f32) -> LayoutSize {
+    LayoutSize::new(
+        adjust_radius_for_box_shadow(corner.width, spread_amount),
+        adjust_radius_for_box_shadow(corner.height, spread_amount),
+    )
+}
+
+fn adjust_radius_for_box_shadow(border_radius: f32, spread_amount: f32) -> f32 {
+    if border_radius > 0.0 {
+        (border_radius + spread_amount).max(0.0)
+    } else {
+        0.0
+    }
+}
diff --git a/gfx/wr/webrender/src/capture.rs b/gfx/wr/webrender/src/capture.rs
new file mode 100644
index 0000000000..5cc1f90bab
--- /dev/null
+++ b/gfx/wr/webrender/src/capture.rs
@@ -0,0 +1,290 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use std::fs::File;
+use std::path::{Path, PathBuf};
+
+use api::{ExternalImageData, ImageDescriptor};
+#[cfg(feature = "png")]
+use api::ImageFormat;
+use api::units::TexelRect;
+#[cfg(feature = "png")]
+use api::units::DeviceIntSize;
+#[cfg(feature = "capture")]
+use crate::print_tree::{PrintableTree, PrintTree};
+use crate::render_api::CaptureBits;
+use ron;
+use serde;
+
+
+#[derive(Clone)]
+pub struct CaptureConfig {
+    pub root: PathBuf,
+    pub bits: CaptureBits,
+    /// Scene sequence ID when capturing multiple frames. Zero for a single frame capture.
+    pub scene_id: u32,
+    /// Frame sequence ID when capturing multiple frames. Zero for a single frame capture.
+    pub frame_id: u32,
+    /// Resource sequence ID when capturing multiple frames. Zero for a single frame capture.
+    pub resource_id: u32,
+    #[cfg(feature = "capture")]
+    pretty: ron::ser::PrettyConfig,
+}
+
+impl CaptureConfig {
+    #[cfg(any(feature = "capture", feature = "replay"))]
+    pub fn new(root: PathBuf, bits: CaptureBits) -> Self {
+        CaptureConfig {
+            root,
+            bits,
+            scene_id: 0,
+            frame_id: 0,
+            resource_id: 0,
+            #[cfg(feature = "capture")]
+            pretty: ron::ser::PrettyConfig::new()
+                .enumerate_arrays(true)
+                .indentor(" ".to_string()),
+        }
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn prepare_scene(&mut self) {
+        use std::fs::create_dir_all;
+        self.scene_id += 1;
+        let _ = create_dir_all(&self.scene_root());
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn prepare_frame(&mut self) {
+        use std::fs::create_dir_all;
+        self.frame_id += 1;
+        let _ = create_dir_all(&self.frame_root());
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn prepare_resource(&mut self) {
+        use std::fs::create_dir_all;
+        self.resource_id += 1;
+        let _ = create_dir_all(&self.resource_root());
+    }
+
+    #[cfg(any(feature = "capture", feature = "replay"))]
+    pub fn scene_root(&self) -> PathBuf {
+        if self.scene_id > 0 {
+            let path = format!("scenes/{:05}", self.scene_id);
+            self.root.join(path)
+        } else {
+            self.root.clone()
+        }
+    }
+
+    #[cfg(any(feature = "capture", feature = "replay"))]
+    pub fn frame_root(&self) -> PathBuf {
+        if self.frame_id > 0 {
+            let path = format!("frames/{:05}", self.frame_id);
+            self.scene_root().join(path)
+        } else {
+            self.root.clone()
+        }
+    }
+
+    #[cfg(any(feature = "capture", feature = "replay"))]
+    pub fn resource_root(&self) -> PathBuf {
+        if self.resource_id > 0 {
+            let path = format!("resources/{:05}", self.resource_id);
+            self.root.join(path)
+        } else {
+            self.root.clone()
+        }
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn serialize_for_scene<T, P>(&self, data: &T, name: P)
+    where
+        T: serde::Serialize,
+        P: AsRef<Path>,
+    {
+        self.serialize(data, self.scene_root(), name)
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn serialize_for_frame<T, P>(&self, data: &T, name: P)
+    where
+        T: serde::Serialize,
+        P: AsRef<Path>,
+    {
+        self.serialize(data, self.frame_root(), name)
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn serialize_for_resource<T, P>(&self, data: &T, name: P)
+    where
+        T: serde::Serialize,
+        P: AsRef<Path>,
+    {
+        self.serialize(data, self.resource_root(), name)
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn file_path_for_frame<P>(&self, name: P, ext: &str) -> PathBuf
+    where P: AsRef<Path> {
+        self.frame_root().join(name).with_extension(ext)
+    }
+
+    #[cfg(feature = "capture")]
+    fn serialize<T, P>(&self, data: &T, path: PathBuf, name: P)
+    where
+        T: serde::Serialize,
+        P: AsRef<Path>,
+    {
+        use std::io::Write;
+        let ron = ron::ser::to_string_pretty(data, self.pretty.clone())
+            .unwrap();
+        let mut file = File::create(path.join(name).with_extension("ron"))
+            .unwrap();
+        write!(file, "{}\n", ron)
+            .unwrap();
+    }
+
+    #[cfg(feature = "capture")]
+    fn serialize_tree<T, P>(data: &T, root: PathBuf, name: P)
+    where
+        T: PrintableTree,
+        P: AsRef<Path>
+    {
+        let path = root
+            .join(name)
+            .with_extension("tree");
+        let file = File::create(path)
+            .unwrap();
+        let mut pt = PrintTree::new_with_sink("", file);
+        data.print_with(&mut pt);
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn serialize_tree_for_frame<T, P>(&self, data: &T, name: P)
+    where
+        T: PrintableTree,
+        P: AsRef<Path>
+    {
+        Self::serialize_tree(data, self.frame_root(), name)
+    }
+
+    #[cfg(feature = "replay")]
+    fn deserialize<T, P>(root: &PathBuf, name: P) -> Option<T>
+    where
+        T: for<'a> serde::Deserialize<'a>,
+        P: AsRef<Path>,
+    {
+        use std::io::Read;
+
+        let mut string = String::new();
+        let path = root
+            .join(name.as_ref())
+            .with_extension("ron");
+        File::open(path)
+            .ok()?
+            .read_to_string(&mut string)
+            .unwrap();
+        match ron::de::from_str(&string) {
+            Ok(out) => Some(out),
+            Err(e) => panic!("File {:?} deserialization failed: {:?}", name.as_ref(), e),
+        }
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn deserialize_for_scene<T, P>(&self, name: P) -> Option<T>
+    where
+        T: for<'a> serde::Deserialize<'a>,
+        P: AsRef<Path>,
+    {
+        Self::deserialize(&self.scene_root(), name)
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn deserialize_for_frame<T, P>(&self, name: P) -> Option<T>
+    where
+        T: for<'a> serde::Deserialize<'a>,
+        P: AsRef<Path>,
+    {
+        Self::deserialize(&self.frame_root(), name)
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn deserialize_for_resource<T, P>(&self, name: P) -> Option<T>
+    where
+        T: for<'a> serde::Deserialize<'a>,
+        P: AsRef<Path>,
+    {
+        Self::deserialize(&self.resource_root(), name)
+    }
+
+    #[cfg(feature = "png")]
+    pub fn save_png(
+        path: PathBuf, size: DeviceIntSize, format: ImageFormat, stride: Option<i32>, data: &[u8],
+    ) {
+        use png::{BitDepth, ColorType, Encoder};
+        use std::io::BufWriter;
+        use std::borrow::Cow;
+
+        // `png` expects
+        let data = match stride {
+            Some(stride) if stride != format.bytes_per_pixel() * size.width => {
+                let mut unstrided = Vec::new();
+                for y in 0..size.height {
+                    let start = (y * stride) as usize;
+                    unstrided.extend_from_slice(&data[start..start+(size.width * format.bytes_per_pixel()) as usize]);
+                }
+                Cow::from(unstrided)
+            }
+            _ => Cow::from(data),
+        };
+
+        let color_type = match format {
+            ImageFormat::RGBA8 => ColorType::RGBA,
+            ImageFormat::BGRA8 => {
+                warn!("Unable to swizzle PNG of BGRA8 type");
+                ColorType::RGBA
+            },
+            ImageFormat::R8 => ColorType::Grayscale,
+            ImageFormat::RG8 => ColorType::GrayscaleAlpha,
+            _ => {
+                error!("Unable to save PNG of {:?}", format);
+                return;
+            }
+        };
+        let w = BufWriter::new(File::create(path).unwrap());
+        let mut enc = Encoder::new(w, size.width as u32, size.height as u32);
+        enc.set_color(color_type);
+        enc.set_depth(BitDepth::Eight);
+        enc
+            .write_header()
+            .unwrap()
+            .write_image_data(&*data)
+            .unwrap();
+    }
+}
+
+/// An image that `ResourceCache` is unable to resolve during a capture.
+/// The image has to be transferred to `Renderer` and locked with the
+/// external image handler to get the actual contents and serialize them.
+#[derive(Deserialize, Serialize)]
+pub struct ExternalCaptureImage {
+    pub short_path: String,
+    pub descriptor: ImageDescriptor,
+    pub external: ExternalImageData,
+}
+
+/// A short description of an external image to be saved separately as
+/// "externals/XX.ron", redirecting into a specific texture/blob with
+/// the corresponding UV rectangle.
+#[derive(Deserialize, Serialize)]
+pub struct PlainExternalImage {
+    /// Path to the RON file describing the texel data.
+    pub data: String,
+    /// External image data source.
+    pub external: ExternalImageData,
+    /// UV sub-rectangle of the image.
+    pub uv: TexelRect,
+}
diff --git a/gfx/wr/webrender/src/clip.rs b/gfx/wr/webrender/src/clip.rs
new file mode 100644
index 0000000000..f6bf55e967
--- /dev/null
+++ b/gfx/wr/webrender/src/clip.rs
@@ -0,0 +1,2310 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Internal representation of clips in WebRender.
+//!
+//! # Data structures
+//!
+//! There are a number of data structures involved in the clip module:
+//!
+//! - ClipStore - Main interface used by other modules.
+//!
+//! - ClipItem - A single clip item (e.g. a rounded rect, or a box shadow).
+//!              These are an exposed API type, stored inline in a ClipNode.
+//!
+//! - ClipNode - A ClipItem with an attached GPU handle. The GPU handle is populated
+//!              when a ClipNodeInstance is built from this node (which happens while
+//!              preparing primitives for render).
+//!
+//! ClipNodeInstance - A ClipNode with attached positioning information (a spatial
+//!                    node index). This is stored as a contiguous array of nodes
+//!                    within the ClipStore.
+//!
+//! ```ascii
+//! +-----------------------+-----------------------+-----------------------+
+//! | ClipNodeInstance      | ClipNodeInstance      | ClipNodeInstance      |
+//! +-----------------------+-----------------------+-----------------------+
+//! | ClipItem              | ClipItem              | ClipItem              |
+//! | Spatial Node Index    | Spatial Node Index    | Spatial Node Index    |
+//! | GPU cache handle      | GPU cache handle      | GPU cache handle      |
+//! | ...                   | ...                   | ...                   |
+//! +-----------------------+-----------------------+-----------------------+
+//!            0                        1                       2
+//!    +----------------+    |                                              |
+//!    | ClipNodeRange  |____|                                              |
+//!    |    index: 1    |                                                   |
+//!    |    count: 2    |___________________________________________________|
+//!    +----------------+
+//! ```
+//!
+//! - ClipNodeRange - A clip item range identifies a range of clip nodes instances.
+//!                   It is stored as an (index, count).
+//!
+//! - ClipChainNode - A clip chain node contains a handle to an interned clip item,
+//!                   positioning information (from where the clip was defined), and
+//!                   an optional parent link to another ClipChainNode. ClipChainId
+//!                   is an index into an array, or ClipChainId::NONE for no parent.
+//!
+//! ```ascii
+//! +----------------+    ____+----------------+    ____+----------------+   /---> ClipChainId::NONE
+//! | ClipChainNode  |   |    | ClipChainNode  |   |    | ClipChainNode  |   |
+//! +----------------+   |    +----------------+   |    +----------------+   |
+//! | ClipDataHandle |   |    | ClipDataHandle |   |    | ClipDataHandle |   |
+//! | Spatial index  |   |    | Spatial index  |   |    | Spatial index  |   |
+//! | Parent Id      |___|    | Parent Id      |___|    | Parent Id      |___|
+//! | ...            |        | ...            |        | ...            |
+//! +----------------+        +----------------+        +----------------+
+//! ```
+//!
+//! - ClipChainInstance - A ClipChain that has been built for a specific primitive + positioning node.
+//!
+//!    When given a clip chain ID, and a local primitive rect and its spatial node, the clip module
+//!    creates a clip chain instance. This is a struct with various pieces of useful information
+//!    (such as a local clip rect). It also contains a (index, count)
+//!    range specifier into an index buffer of the ClipNodeInstance structures that are actually relevant
+//!    for this clip chain instance. The index buffer structure allows a single array to be used for
+//!    all of the clip-chain instances built in a single frame. Each entry in the index buffer
+//!    also stores some flags relevant to the clip node in this positioning context.
+//!
+//! ```ascii
+//! +----------------------+
+//! | ClipChainInstance    |
+//! +----------------------+
+//! | ...                  |
+//! | local_clip_rect      |________________________________________________________________________
+//! | clips_range          |_______________                                                        |
+//! +----------------------+              |                                                        |
+//!                                       |                                                        |
+//! +------------------+------------------+------------------+------------------+------------------+
+//! | ClipNodeInstance | ClipNodeInstance | ClipNodeInstance | ClipNodeInstance | ClipNodeInstance |
+//! +------------------+------------------+------------------+------------------+------------------+
+//! | flags            | flags            | flags            | flags            | flags            |
+//! | ...              | ...              | ...              | ...              | ...              |
+//! +------------------+------------------+------------------+------------------+------------------+
+//! ```
+//!
+//! # Rendering clipped primitives
+//!
+//! See the [`segment` module documentation][segment.rs].
+//!
+//!
+//! [segment.rs]: ../segment/index.html
+//!
+
+use api::{BorderRadius, ClipMode, ComplexClipRegion, ImageMask, ClipId, ClipChainId};
+use api::{BoxShadowClipMode, FillRule, ImageKey, ImageRendering};
+use api::units::*;
+use crate::image_tiling::{self, Repetition};
+use crate::border::{ensure_no_corner_overlap, BorderRadiusAu};
+use crate::box_shadow::{BLUR_SAMPLE_SCALE, BoxShadowClipSource, BoxShadowCacheKey};
+use crate::spatial_tree::{SpatialTree, SpatialNodeIndex};
+use crate::ellipse::Ellipse;
+use crate::gpu_cache::GpuCache;
+use crate::gpu_types::{BoxShadowStretchMode};
+use crate::intern;
+use crate::internal_types::{FastHashMap, FastHashSet, LayoutPrimitiveInfo};
+use crate::prim_store::{VisibleMaskImageTile};
+use crate::prim_store::{PointKey, SizeKey, RectangleKey, PolygonKey};
+use crate::render_task_cache::to_cache_size;
+use crate::resource_cache::{ImageRequest, ResourceCache};
+use crate::scene_builder_thread::Interners;
+use crate::space::SpaceMapper;
+use crate::util::{clamp_to_scale_factor, MaxRect, extract_inner_rect_safe, project_rect, ScaleOffset};
+use euclid::approxeq::ApproxEq;
+use std::{iter, ops, u32, mem};
+
+/// A (non-leaf) node inside a clip-tree
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct ClipTreeNode {
+    pub handle: ClipDataHandle,
+    pub parent: ClipNodeId,
+
+    children: Vec<ClipNodeId>,
+
+    // TODO(gw): Consider adding a default leaf for cases when the local_clip_rect is not relevant,
+    //           that can be shared among primitives (to reduce amount of clip-chain building).
+}
+
+/// A leaf node in a clip-tree. Any primitive that is clipped will have a handle to
+/// a clip-tree leaf.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct ClipTreeLeaf {
+    pub node_id: ClipNodeId,
+
+    // TODO(gw): For now, this preserves the ability to build a culling rect
+    //           from the supplied leaf local clip rect on the primitive. In
+    //           future, we'll expand this to be more efficient by combining
+    //           it will compatible clip rects from the `node_id`.
+    pub local_clip_rect: LayoutRect,
+}
+
+/// ID for a ClipTreeNode
+#[derive(Debug, Copy, Clone, PartialEq, MallocSizeOf, Eq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipNodeId(u32);
+
+impl ClipNodeId {
+    pub const NONE: ClipNodeId = ClipNodeId(0);
+}
+
+/// ID for a ClipTreeLeaf
+#[derive(Debug, Copy, Clone, PartialEq, MallocSizeOf, Eq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipLeafId(u32);
+
+/// A clip-tree built during scene building and used during frame-building to apply clips to primitives.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipTree {
+    nodes: Vec<ClipTreeNode>,
+    leaves: Vec<ClipTreeLeaf>,
+    clip_root_stack: Vec<ClipNodeId>,
+}
+
+impl ClipTree {
+    pub fn new() -> Self {
+        ClipTree {
+            nodes: vec![
+                ClipTreeNode {
+                    handle: ClipDataHandle::INVALID,
+                    children: Vec::new(),
+                    parent: ClipNodeId::NONE,
+                }
+            ],
+            leaves: Vec::new(),
+            clip_root_stack: vec![
+                ClipNodeId::NONE,
+            ],
+        }
+    }
+
+    /// Add a set of clips to the provided tree node id, reusing existing
+    /// nodes in the tree where possible
+    fn add_impl(
+        id: ClipNodeId,
+        clips: &[ClipDataHandle],
+        nodes: &mut Vec<ClipTreeNode>,
+    ) -> ClipNodeId {
+        if clips.is_empty() {
+            return id;
+        }
+
+        let handle = clips[0];
+        let next_clips = &clips[1..];
+
+        let node_index = nodes[id.0 as usize]
+            .children
+            .iter()
+            .find(|n| nodes[n.0 as usize].handle == handle)
+            .cloned();
+
+        let node_index = match node_index {
+            Some(node_index) => node_index,
+            None => {
+                let node_index = ClipNodeId(nodes.len() as u32);
+                nodes[id.0 as usize].children.push(node_index);
+                let node = ClipTreeNode {
+                    handle,
+                    children: Vec::new(),
+                    parent: id,
+                };
+                nodes.push(node);
+                node_index
+            }
+        };
+
+        ClipTree::add_impl(
+            node_index,
+            next_clips,
+            nodes,
+        )
+    }
+
+    /// Add a set of clips to the provided tree node id, reusing existing
+    /// nodes in the tree where possible
+    pub fn add(
+        &mut self,
+        root: ClipNodeId,
+        clips: &[ClipDataHandle],
+    ) -> ClipNodeId {
+        ClipTree::add_impl(
+            root,
+            clips,
+            &mut self.nodes,
+        )
+    }
+
+    /// Get the current clip root (the node in the clip-tree where clips can be
+    /// ignored when building the clip-chain instance for a primitive)
+    pub fn current_clip_root(&self) -> ClipNodeId {
+        self.clip_root_stack.last().cloned().unwrap()
+    }
+
+    /// Push a clip root (e.g. when a surface is encountered) that prevents clips
+    /// from this node and above being applied to primitives within the root.
+    pub fn push_clip_root_leaf(&mut self, clip_leaf_id: ClipLeafId) {
+        let leaf = &self.leaves[clip_leaf_id.0 as usize];
+        self.clip_root_stack.push(leaf.node_id);
+    }
+
+    /// Push a clip root (e.g. when a surface is encountered) that prevents clips
+    /// from this node and above being applied to primitives within the root.
+    pub fn push_clip_root_node(&mut self, clip_node_id: ClipNodeId) {
+        self.clip_root_stack.push(clip_node_id);
+    }
+
+    /// Pop a clip root, when exiting a surface.
+    pub fn pop_clip_root(&mut self) {
+        self.clip_root_stack.pop().unwrap();
+    }
+
+    /// Retrieve a clip tree node by id
+    pub fn get_node(&self, id: ClipNodeId) -> &ClipTreeNode {
+        assert!(id != ClipNodeId::NONE);
+
+        &self.nodes[id.0 as usize]
+    }
+
+    /// Retrieve a clip tree leaf by id
+    pub fn get_leaf(&self, id: ClipLeafId) -> &ClipTreeLeaf {
+        &self.leaves[id.0 as usize]
+    }
+
+    /// Debug print the clip-tree
+    #[allow(unused)]
+    pub fn print(&self) {
+        use crate::print_tree::PrintTree;
+
+        fn print_node<T: crate::print_tree::PrintTreePrinter>(
+            id: ClipNodeId,
+            nodes: &[ClipTreeNode],
+            pt: &mut T,
+        ) {
+            let node = &nodes[id.0 as usize];
+
+            pt.new_level(format!("{:?}", id));
+            pt.add_item(format!("{:?}", node.handle));
+
+            for child_id in &node.children {
+                print_node(*child_id, nodes, pt);
+            }
+
+            pt.end_level();
+        }
+
+        fn print_leaf<T: crate::print_tree::PrintTreePrinter>(
+            id: ClipLeafId,
+            leaves: &[ClipTreeLeaf],
+            pt: &mut T,
+        ) {
+            let leaf = &leaves[id.0 as usize];
+
+            pt.new_level(format!("{:?}", id));
+            pt.add_item(format!("node_id: {:?}", leaf.node_id));
+            pt.add_item(format!("local_clip_rect: {:?}", leaf.local_clip_rect));
+            pt.end_level();
+        }
+
+        let mut pt = PrintTree::new("clip tree");
+        print_node(ClipNodeId::NONE, &self.nodes, &mut pt);
+
+        for i in 0 .. self.leaves.len() {
+            print_leaf(ClipLeafId(i as u32), &self.leaves, &mut pt);
+        }
+    }
+
+    /// Find the lowest common ancestor of two clip tree nodes. This is useful
+    /// to identify shared clips between primitives attached to different clip-leaves.
+    pub fn find_lowest_common_ancestor(
+        &self,
+        mut node1: ClipNodeId,
+        mut node2: ClipNodeId,
+    ) -> ClipNodeId {
+        // TODO(gw): Consider caching / storing the depth in the node?
+        fn get_node_depth(
+            id: ClipNodeId,
+            nodes: &[ClipTreeNode],
+        ) -> usize {
+            let mut depth = 0;
+            let mut current = id;
+
+            while current != ClipNodeId::NONE {
+                let node = &nodes[current.0 as usize];
+                depth += 1;
+                current = node.parent;
+            }
+
+            depth
+        }
+
+        let mut depth1 = get_node_depth(node1, &self.nodes);
+        let mut depth2 = get_node_depth(node2, &self.nodes);
+
+        while depth1 > depth2 {
+            node1 = self.nodes[node1.0 as usize].parent;
+            depth1 -= 1;
+        }
+
+        while depth2 > depth1 {
+            node2 = self.nodes[node2.0 as usize].parent;
+            depth2 -= 1;
+        }
+
+        while node1 != node2 {
+            node1 = self.nodes[node1.0 as usize].parent;
+            node2 = self.nodes[node2.0 as usize].parent;
+        }
+
+        node1
+    }
+}
+
+/// Represents a clip-chain as defined by the public API that we decompose in to
+/// the clip-tree. In future, we would like to remove this and have Gecko directly
+/// build the clip-tree.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipChain {
+    parent: Option<usize>,
+    clips: Vec<ClipDataHandle>,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipStackEntry {
+    /// Cache the previous clip-chain build, since this is a common case
+    last_clip_chain_cache: Option<(ClipChainId, ClipNodeId)>,
+
+    /// Set of clips that were already seen and included in clip_node_id
+    seen_clips: FastHashSet<ClipDataHandle>,
+
+    /// The build clip_node_id for this level of the stack
+    clip_node_id: ClipNodeId,
+}
+
+/// Used by the scene builder to build the clip-tree that is part of the built scene.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipTreeBuilder {
+    /// Clips defined by the display list
+    clip_map: FastHashMap<ClipId, ClipDataHandle>,
+
+    /// Clip-chains defined by the display list
+    clip_chains: Vec<ClipChain>,
+    clip_chain_map: FastHashMap<ClipChainId, usize>,
+
+    /// List of clips pushed/popped by grouping items, such as stacking contexts and iframes
+    clip_stack: Vec<ClipStackEntry>,
+
+    /// The tree we are building
+    tree: ClipTree,
+
+    /// A temporary buffer stored here to avoid constant heap allocs/frees
+    clip_handles_buffer: Vec<ClipDataHandle>,
+}
+
+impl ClipTreeBuilder {
+    pub fn new() -> Self {
+        ClipTreeBuilder {
+            clip_map: FastHashMap::default(),
+            clip_chain_map: FastHashMap::default(),
+            clip_chains: Vec::new(),
+            clip_stack: vec![
+                ClipStackEntry {
+                    clip_node_id: ClipNodeId::NONE,
+                    last_clip_chain_cache: None,
+                    seen_clips: FastHashSet::default(),
+                },
+            ],
+            tree: ClipTree::new(),
+            clip_handles_buffer: Vec::new(),
+        }
+    }
+
+    /// Define a new rect clip
+    pub fn define_rect_clip(
+        &mut self,
+        id: ClipId,
+        handle: ClipDataHandle,
+    ) {
+        self.clip_map.insert(id, handle);
+    }
+
+    /// Define a new rounded rect clip
+    pub fn define_rounded_rect_clip(
+        &mut self,
+        id: ClipId,
+        handle: ClipDataHandle,
+    ) {
+        self.clip_map.insert(id, handle);
+    }
+
+    /// Define a image mask clip
+    pub fn define_image_mask_clip(
+        &mut self,
+        id: ClipId,
+        handle: ClipDataHandle,
+    ) {
+        self.clip_map.insert(id, handle);
+    }
+
+    /// Define a clip-chain
+    pub fn define_clip_chain<I: Iterator<Item = ClipId>>(
+        &mut self,
+        id: ClipChainId,
+        parent: Option<ClipChainId>,
+        clips: I,
+    ) {
+        let parent = parent.map(|ref id| self.clip_chain_map[id]);
+        let index = self.clip_chains.len();
+        let clips = clips.map(|clip_id| {
+            self.clip_map[&clip_id]
+        }).collect();
+        self.clip_chains.push(ClipChain {
+            parent,
+            clips,
+        });
+        self.clip_chain_map.insert(id, index);
+    }
+
+    /// Push a clip-chain that will be applied to any prims built prior to next pop
+    pub fn push_clip_chain(
+        &mut self,
+        clip_chain_id: Option<ClipChainId>,
+        reset_seen: bool,
+    ) {
+        let (mut clip_node_id, mut seen_clips) = {
+            let prev = self.clip_stack.last().unwrap();
+            (prev.clip_node_id, prev.seen_clips.clone())
+        };
+
+        if let Some(clip_chain_id) = clip_chain_id {
+            if clip_chain_id != ClipChainId::INVALID {
+                self.clip_handles_buffer.clear();
+
+                let clip_chain_index = self.clip_chain_map[&clip_chain_id];
+                ClipTreeBuilder::add_clips(
+                    clip_chain_index,
+                    &mut seen_clips,
+                    &mut self.clip_handles_buffer,
+                    &self.clip_chains,
+                );
+
+                clip_node_id = self.tree.add(
+                    clip_node_id,
+                    &self.clip_handles_buffer,
+                );
+            }
+        }
+
+        if reset_seen {
+            seen_clips.clear();
+        }
+
+        self.clip_stack.push(ClipStackEntry {
+            last_clip_chain_cache: None,
+            clip_node_id,
+            seen_clips,
+        });
+    }
+
+    /// Push a clip-id that will be applied to any prims built prior to next pop
+    pub fn push_clip_id(
+        &mut self,
+        clip_id: ClipId,
+    ) {
+        let (clip_node_id, mut seen_clips) = {
+            let prev = self.clip_stack.last().unwrap();
+            (prev.clip_node_id, prev.seen_clips.clone())
+        };
+
+        self.clip_handles_buffer.clear();
+        let clip_index = self.clip_map[&clip_id];
+
+        if seen_clips.insert(clip_index) {
+            self.clip_handles_buffer.push(clip_index);
+        }
+
+        let clip_node_id = self.tree.add(
+            clip_node_id,
+            &self.clip_handles_buffer,
+        );
+
+        self.clip_stack.push(ClipStackEntry {
+            last_clip_chain_cache: None,
+            seen_clips,
+            clip_node_id,
+        });
+    }
+
+    /// Pop a clip off the clip_stack, when exiting a grouping item
+    pub fn pop_clip(&mut self) {
+        self.clip_stack.pop().unwrap();
+    }
+
+    /// Add clips from a given clip-chain to the set of clips for a primitive during clip-set building
+    fn add_clips(
+        clip_chain_index: usize,
+        seen_clips: &mut FastHashSet<ClipDataHandle>,
+        output: &mut Vec<ClipDataHandle>,
+        clip_chains: &[ClipChain],
+    ) {
+        // TODO(gw): It's possible that we may see clip outputs that include identical clips
+        //           (e.g. if there is a clip positioned by two spatial nodes, where one spatial
+        //           node is a child of the other, and has an identity transform). If we ever
+        //           see this in real-world cases, it might be worth checking for that here and
+        //           excluding them, to ensure the shape of the tree matches what we need for
+        //           finding shared_clips for tile caches etc.
+
+        let clip_chain = &clip_chains[clip_chain_index];
+
+        if let Some(parent) = clip_chain.parent {
+            ClipTreeBuilder::add_clips(
+                parent,
+                seen_clips,
+                output,
+                clip_chains,
+            );
+        }
+
+        for clip_index in clip_chain.clips.iter().rev() {
+            if seen_clips.insert(*clip_index) {
+                output.push(*clip_index);
+            }
+        }
+    }
+
+    /// Main entry point to build a path in the clip-tree for a given primitive
+    pub fn build_clip_set(
+        &mut self,
+        clip_chain_id: ClipChainId,
+    ) -> ClipNodeId {
+        let clip_stack = self.clip_stack.last_mut().unwrap();
+
+        if clip_chain_id == ClipChainId::INVALID {
+            clip_stack.clip_node_id
+        } else {
+            if let Some((cached_clip_chain, cached_clip_node)) = clip_stack.last_clip_chain_cache {
+                if cached_clip_chain == clip_chain_id {
+                    return cached_clip_node;
+                }
+            }
+
+            let clip_chain_index = self.clip_chain_map[&clip_chain_id];
+
+            self.clip_handles_buffer.clear();
+
+            ClipTreeBuilder::add_clips(
+                clip_chain_index,
+                &mut clip_stack.seen_clips,
+                &mut self.clip_handles_buffer,
+                &self.clip_chains,
+            );
+
+            // We mutated the `clip_stack.seen_clips` in order to remove duplicate clips from
+            // the supplied `clip_chain_id`. Now step through and remove any clips we added
+            // to the set, so we don't get incorrect results next time `build_clip_set` is
+            // called for a different clip-chain. Doing it this way rather than cloning means
+            // we avoid heap allocations for each `build_clip_set` call.
+            for handle in &self.clip_handles_buffer {
+                clip_stack.seen_clips.remove(handle);
+            }
+
+            let clip_node_id = self.tree.add(
+                clip_stack.clip_node_id,
+                &self.clip_handles_buffer,
+            );
+
+            clip_stack.last_clip_chain_cache = Some((clip_chain_id, clip_node_id));
+
+            clip_node_id
+        }
+    }
+
+    /// Recursive impl to check if a clip-chain has complex (non-rectangular) clips
+    fn has_complex_clips_impl(
+        &self,
+        clip_chain_index: usize,
+        interners: &Interners,
+    ) -> bool {
+        let clip_chain = &self.clip_chains[clip_chain_index];
+
+        for clip_handle in &clip_chain.clips {
+            let clip_info = &interners.clip[*clip_handle];
+
+            if let ClipNodeKind::Complex = clip_info.key.kind.node_kind() {
+                return true;
+            }
+        }
+
+        match clip_chain.parent {
+            Some(parent) => self.has_complex_clips_impl(parent, interners),
+            None => false,
+        }
+    }
+
+    /// Check if a clip-chain has complex (non-rectangular) clips
+    pub fn clip_chain_has_complex_clips(
+        &self,
+        clip_chain_id: ClipChainId,
+        interners: &Interners,
+    ) -> bool {
+        let clip_chain_index = self.clip_chain_map[&clip_chain_id];
+        self.has_complex_clips_impl(clip_chain_index, interners)
+    }
+
+    /// Check if a clip-node has complex (non-rectangular) clips
+    pub fn clip_node_has_complex_clips(
+        &self,
+        clip_node_id: ClipNodeId,
+        interners: &Interners,
+    ) -> bool {
+        let mut current = clip_node_id;
+
+        while current != ClipNodeId::NONE {
+            let node = &self.tree.nodes[current.0 as usize];
+            let clip_info = &interners.clip[node.handle];
+
+            if let ClipNodeKind::Complex = clip_info.key.kind.node_kind() {
+                return true;
+            }
+
+            current = node.parent;
+        }
+
+        false
+    }
+
+    /// Finalize building and return the clip-tree
+    pub fn finalize(self) -> ClipTree {
+        self.tree
+    }
+
+    /// Get a clip node by id
+    pub fn get_node(&self, id: ClipNodeId) -> &ClipTreeNode {
+        assert!(id != ClipNodeId::NONE);
+
+        &self.tree.nodes[id.0 as usize]
+    }
+
+    /// Get a clip leaf by id
+    pub fn get_leaf(&self, id: ClipLeafId) -> &ClipTreeLeaf {
+        &self.tree.leaves[id.0 as usize]
+    }
+
+    /// Build a clip-leaf for a tile-cache
+    pub fn build_for_tile_cache(
+        &mut self,
+        clip_node_id: ClipNodeId,
+        extra_clips: &[ClipId],
+    ) -> ClipLeafId {
+        self.clip_handles_buffer.clear();
+
+        for clip_id in extra_clips {
+            let handle = self.clip_map[clip_id];
+            self.clip_handles_buffer.push(handle);
+        }
+
+        let node_id = self.tree.add(
+            clip_node_id,
+            &self.clip_handles_buffer,
+        );
+
+        let clip_leaf_id = ClipLeafId(self.tree.leaves.len() as u32);
+
+        self.tree.leaves.push(ClipTreeLeaf {
+            node_id,
+            local_clip_rect: LayoutRect::max_rect(),
+        });
+
+        clip_leaf_id
+    }
+
+    /// Build a clip-leaf for a picture
+    pub fn build_for_picture(
+        &mut self,
+        clip_node_id: ClipNodeId,
+    ) -> ClipLeafId {
+        let node_id = self.tree.add(
+            clip_node_id,
+            &[],
+        );
+
+        let clip_leaf_id = ClipLeafId(self.tree.leaves.len() as u32);
+
+        self.tree.leaves.push(ClipTreeLeaf {
+            node_id,
+            local_clip_rect: LayoutRect::max_rect(),
+        });
+
+        clip_leaf_id
+    }
+
+    /// Build a clip-leaf for a normal primitive
+    pub fn build_for_prim(
+        &mut self,
+        clip_node_id: ClipNodeId,
+        info: &LayoutPrimitiveInfo,
+        extra_clips: &[ClipItemKey],
+        interners: &mut Interners,
+    ) -> ClipLeafId {
+
+        let node_id = if extra_clips.is_empty() {
+            clip_node_id
+        } else {
+            // TODO(gw): Cache the previous build of clip-node / clip-leaf to handle cases where we get a
+            //           lot of primitives referencing the same clip set (e.g. dl_mutate and similar tests)
+            self.clip_handles_buffer.clear();
+
+            for item in extra_clips {
+                // Intern this clip item, and store the handle
+                // in the clip chain node.
+                let handle = interners.clip.intern(item, || {
+                    ClipInternData {
+                        key: item.clone(),
+                    }
+                });
+
+                self.clip_handles_buffer.push(handle);
+            }
+
+            self.tree.add(
+                clip_node_id,
+                &self.clip_handles_buffer,
+            )
+        };
+
+        let clip_leaf_id = ClipLeafId(self.tree.leaves.len() as u32);
+
+        self.tree.leaves.push(ClipTreeLeaf {
+            node_id,
+            local_clip_rect: info.clip_rect,
+        });
+
+        clip_leaf_id
+    }
+
+    // Find the LCA for two given clip nodes
+    pub fn find_lowest_common_ancestor(
+        &self,
+        node1: ClipNodeId,
+        node2: ClipNodeId,
+    ) -> ClipNodeId {
+        self.tree.find_lowest_common_ancestor(node1, node2)
+    }
+}
+
+// Type definitions for interning clip nodes.
+
+#[derive(Copy, Clone, Debug, MallocSizeOf, PartialEq, Eq, Hash)]
+#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
+pub enum ClipIntern {}
+
+pub type ClipDataStore = intern::DataStore<ClipIntern>;
+pub type ClipDataHandle = intern::Handle<ClipIntern>;
+
+/// Helper to identify simple clips (normal rects) from other kinds of clips,
+/// which can often be handled via fast code paths.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, MallocSizeOf)]
+pub enum ClipNodeKind {
+    /// A normal clip rectangle, with Clip mode.
+    Rectangle,
+    /// A rectangle with ClipOut, or any other kind of clip.
+    Complex,
+}
+
+// Result of comparing a clip node instance against a local rect.
+#[derive(Debug)]
+enum ClipResult {
+    // The clip does not affect the region at all.
+    Accept,
+    // The clip prevents the region from being drawn.
+    Reject,
+    // The clip affects part of the region. This may
+    // require a clip mask, depending on other factors.
+    Partial,
+}
+
+// A clip node is a single clip source, along with some
+// positioning information and implementation details
+// that control where the GPU data for this clip source
+// can be found.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct ClipNode {
+    pub item: ClipItem,
+}
+
+// Convert from an interning key for a clip item
+// to a clip node, which is cached in the document.
+impl From<ClipItemKey> for ClipNode {
+    fn from(item: ClipItemKey) -> Self {
+        let kind = match item.kind {
+            ClipItemKeyKind::Rectangle(rect, mode) => {
+                ClipItemKind::Rectangle { rect: rect.into(), mode }
+            }
+            ClipItemKeyKind::RoundedRectangle(rect, radius, mode) => {
+                ClipItemKind::RoundedRectangle {
+                    rect: rect.into(),
+                    radius: radius.into(),
+                    mode,
+                }
+            }
+            ClipItemKeyKind::ImageMask(rect, image, polygon_handle) => {
+                ClipItemKind::Image {
+                    image,
+                    rect: rect.into(),
+                    polygon_handle,
+                }
+            }
+            ClipItemKeyKind::BoxShadow(shadow_rect_fract_offset, shadow_rect_size, shadow_radius, prim_shadow_rect, blur_radius, clip_mode) => {
+                ClipItemKind::new_box_shadow(
+                    shadow_rect_fract_offset.into(),
+                    shadow_rect_size.into(),
+                    shadow_radius.into(),
+                    prim_shadow_rect.into(),
+                    blur_radius.to_f32_px(),
+                    clip_mode,
+                )
+            }
+        };
+
+        ClipNode {
+            item: ClipItem {
+                kind,
+                spatial_node_index: item.spatial_node_index,
+            },
+        }
+    }
+}
+
+// Flags that are attached to instances of clip nodes.
+bitflags! {
+    #[cfg_attr(feature = "capture", derive(Serialize))]
+    #[cfg_attr(feature = "replay", derive(Deserialize))]
+    #[derive(MallocSizeOf)]
+    pub struct ClipNodeFlags: u8 {
+        const SAME_SPATIAL_NODE = 0x1;
+        const SAME_COORD_SYSTEM = 0x2;
+        const USE_FAST_PATH = 0x4;
+    }
+}
+
+// When a clip node is found to be valid for a
+// clip chain instance, it's stored in an index
+// buffer style structure. This struct contains
+// an index to the node data itself, as well as
+// some flags describing how this clip node instance
+// is positioned.
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipNodeInstance {
+    pub handle: ClipDataHandle,
+    pub flags: ClipNodeFlags,
+    pub visible_tiles: Option<ops::Range<usize>>,
+}
+
+impl ClipNodeInstance {
+    pub fn has_visible_tiles(&self) -> bool {
+        self.visible_tiles.is_some()
+    }
+}
+
+// A range of clip node instances that were found by
+// building a clip chain instance.
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipNodeRange {
+    pub first: u32,
+    pub count: u32,
+}
+
+impl ClipNodeRange {
+    pub fn to_range(&self) -> ops::Range<usize> {
+        let start = self.first as usize;
+        let end = start + self.count as usize;
+
+        ops::Range {
+            start,
+            end,
+        }
+    }
+}
+
+/// A helper struct for converting between coordinate systems
+/// of clip sources and primitives.
+// todo(gw): optimize:
+//  separate arrays for matrices
+//  cache and only build as needed.
+//TODO: merge with `CoordinateSpaceMapping`?
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+enum ClipSpaceConversion {
+    Local,
+    ScaleOffset(ScaleOffset),
+    Transform(LayoutToWorldTransform),
+}
+
+impl ClipSpaceConversion {
+    /// Construct a new clip space converter between two spatial nodes.
+    fn new(
+        prim_spatial_node_index: SpatialNodeIndex,
+        clip_spatial_node_index: SpatialNodeIndex,
+        spatial_tree: &SpatialTree,
+    ) -> Self {
+        //Note: this code is different from `get_relative_transform` in a way that we only try
+        // getting the relative transform if it's Local or ScaleOffset,
+        // falling back to the world transform otherwise.
+        let clip_spatial_node = spatial_tree.get_spatial_node(clip_spatial_node_index);
+        let prim_spatial_node = spatial_tree.get_spatial_node(prim_spatial_node_index);
+
+        if prim_spatial_node_index == clip_spatial_node_index {
+            ClipSpaceConversion::Local
+        } else if prim_spatial_node.coordinate_system_id == clip_spatial_node.coordinate_system_id {
+            let scale_offset = prim_spatial_node.content_transform
+                .inverse()
+                .accumulate(&clip_spatial_node.content_transform);
+            ClipSpaceConversion::ScaleOffset(scale_offset)
+        } else {
+            ClipSpaceConversion::Transform(
+                spatial_tree
+                    .get_world_transform(clip_spatial_node_index)
+                    .into_transform()
+            )
+        }
+    }
+
+    fn to_flags(&self) -> ClipNodeFlags {
+        match *self {
+            ClipSpaceConversion::Local => {
+                ClipNodeFlags::SAME_SPATIAL_NODE | ClipNodeFlags::SAME_COORD_SYSTEM
+            }
+            ClipSpaceConversion::ScaleOffset(..) => {
+                ClipNodeFlags::SAME_COORD_SYSTEM
+            }
+            ClipSpaceConversion::Transform(..) => {
+                ClipNodeFlags::empty()
+            }
+        }
+    }
+}
+
+// Temporary information that is cached and reused
+// during building of a clip chain instance.
+#[derive(MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+struct ClipNodeInfo {
+    conversion: ClipSpaceConversion,
+    handle: ClipDataHandle,
+}
+
+impl ClipNodeInfo {
+    fn create_instance(
+        &self,
+        node: &ClipNode,
+        clipped_rect: &LayoutRect,
+        gpu_cache: &mut GpuCache,
+        resource_cache: &mut ResourceCache,
+        mask_tiles: &mut Vec<VisibleMaskImageTile>,
+        spatial_tree: &SpatialTree,
+        request_resources: bool,
+    ) -> Option<ClipNodeInstance> {
+        // Calculate some flags that are required for the segment
+        // building logic.
+        let mut flags = self.conversion.to_flags();
+
+        // Some clip shaders support a fast path mode for simple clips.
+        // TODO(gw): We could also apply fast path when segments are created, since we only write
+        //           the mask for a single corner at a time then, so can always consider radii uniform.
+        let is_raster_2d =
+            flags.contains(ClipNodeFlags::SAME_COORD_SYSTEM) ||
+            spatial_tree
+                .get_world_viewport_transform(node.item.spatial_node_index)
+                .is_2d_axis_aligned();
+        if is_raster_2d && node.item.kind.supports_fast_path_rendering() {
+            flags |= ClipNodeFlags::USE_FAST_PATH;
+        }
+
+        let mut visible_tiles = None;
+
+        if let ClipItemKind::Image { rect, image, .. } = node.item.kind {
+            let request = ImageRequest {
+                key: image,
+                rendering: ImageRendering::Auto,
+                tile: None,
+            };
+
+            if let Some(props) = resource_cache.get_image_properties(image) {
+                if let Some(tile_size) = props.tiling {
+                    let tile_range_start = mask_tiles.len();
+
+                    // Bug 1648323 - It is unclear why on rare occasions we get
+                    // a clipped_rect that does not intersect the clip's mask rect.
+                    // defaulting to clipped_rect here results in zero repetitions
+                    // which clips the primitive entirely.
+                    let visible_rect =
+                        clipped_rect.intersection(&rect).unwrap_or(*clipped_rect);
+
+                    let repetitions = image_tiling::repetitions(
+                        &rect,
+                        &visible_rect,
+                        rect.size(),
+                    );
+
+                    for Repetition { origin, .. } in repetitions {
+                        let layout_image_rect = LayoutRect::from_origin_and_size(
+                            origin,
+                            rect.size(),
+                        );
+                        let tiles = image_tiling::tiles(
+                            &layout_image_rect,
+                            &visible_rect,
+                            &props.visible_rect,
+                            tile_size as i32,
+                        );
+                        for tile in tiles {
+                            if request_resources {
+                                resource_cache.request_image(
+                                    request.with_tile(tile.offset),
+                                    gpu_cache,
+                                );
+                            }
+                            mask_tiles.push(VisibleMaskImageTile {
+                                tile_offset: tile.offset,
+                                tile_rect: tile.rect,
+                            });
+                        }
+                    }
+                    visible_tiles = Some(tile_range_start..mask_tiles.len());
+                } else if request_resources {
+                    resource_cache.request_image(request, gpu_cache);
+                }
+            } else {
+                // If the supplied image key doesn't exist in the resource cache,
+                // skip the clip node since there is nothing to mask with.
+                warn!("Clip mask with missing image key {:?}", request.key);
+                return None;
+            }
+        }
+
+        Some(ClipNodeInstance {
+            handle: self.handle,
+            flags,
+            visible_tiles,
+        })
+    }
+}
+
+impl ClipNode {
+    pub fn update(
+        &mut self,
+        device_pixel_scale: DevicePixelScale,
+    ) {
+        match self.item.kind {
+            ClipItemKind::Image { .. } |
+            ClipItemKind::Rectangle { .. } |
+            ClipItemKind::RoundedRectangle { .. } => {}
+
+            ClipItemKind::BoxShadow { ref mut source } => {
+                // Quote from https://drafts.csswg.org/css-backgrounds-3/#shadow-blur
+                // "the image that would be generated by applying to the shadow a
+                // Gaussian blur with a standard deviation equal to half the blur radius."
+                let blur_radius_dp = source.blur_radius * 0.5;
+
+                // Create scaling from requested size to cache size.
+                let mut content_scale = LayoutToWorldScale::new(1.0) * device_pixel_scale;
+                content_scale.0 = clamp_to_scale_factor(content_scale.0, false);
+
+                // Create the cache key for this box-shadow render task.
+                let cache_size = to_cache_size(source.shadow_rect_alloc_size, &mut content_scale);
+
+                let bs_cache_key = BoxShadowCacheKey {
+                    blur_radius_dp: (blur_radius_dp * content_scale.0).round() as i32,
+                    clip_mode: source.clip_mode,
+                    original_alloc_size: (source.original_alloc_size * content_scale).round().to_i32(),
+                    br_top_left: (source.shadow_radius.top_left * content_scale).round().to_i32(),
+                    br_top_right: (source.shadow_radius.top_right * content_scale).round().to_i32(),
+                    br_bottom_right: (source.shadow_radius.bottom_right * content_scale).round().to_i32(),
+                    br_bottom_left: (source.shadow_radius.bottom_left * content_scale).round().to_i32(),
+                    device_pixel_scale: Au::from_f32_px(content_scale.0),
+                };
+
+                source.cache_key = Some((cache_size, bs_cache_key));
+            }
+        }
+    }
+}
+
+#[derive(Default)]
+pub struct ClipStoreScratchBuffer {
+    clip_node_instances: Vec<ClipNodeInstance>,
+    mask_tiles: Vec<VisibleMaskImageTile>,
+}
+
+/// The main clipping public interface that other modules access.
+#[derive(MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct ClipStore {
+    pub clip_node_instances: Vec<ClipNodeInstance>,
+    mask_tiles: Vec<VisibleMaskImageTile>,
+
+    active_clip_node_info: Vec<ClipNodeInfo>,
+    active_local_clip_rect: Option<LayoutRect>,
+    active_pic_coverage_rect: PictureRect,
+}
+
+// A clip chain instance is what gets built for a given clip
+// chain id + local primitive region + positioning node.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct ClipChainInstance {
+    pub clips_range: ClipNodeRange,
+    // Combined clip rect for clips that are in the
+    // same coordinate system as the primitive.
+    pub local_clip_rect: LayoutRect,
+    pub has_non_local_clips: bool,
+    // If true, this clip chain requires allocation
+    // of a clip mask.
+    pub needs_mask: bool,
+    // Combined clip rect in picture space (may
+    // be more conservative that local_clip_rect).
+    pub pic_coverage_rect: PictureRect,
+    // Space, in which the `pic_coverage_rect` is defined.
+    pub pic_spatial_node_index: SpatialNodeIndex,
+}
+
+impl ClipChainInstance {
+    pub fn empty() -> Self {
+        ClipChainInstance {
+            clips_range: ClipNodeRange {
+                first: 0,
+                count: 0,
+            },
+            local_clip_rect: LayoutRect::zero(),
+            has_non_local_clips: false,
+            needs_mask: false,
+            pic_coverage_rect: PictureRect::zero(),
+            pic_spatial_node_index: SpatialNodeIndex::INVALID,
+        }
+    }
+}
+
+impl ClipStore {
+    pub fn new() -> Self {
+        ClipStore {
+            clip_node_instances: Vec::new(),
+            mask_tiles: Vec::new(),
+            active_clip_node_info: Vec::new(),
+            active_local_clip_rect: None,
+            active_pic_coverage_rect: PictureRect::max_rect(),
+        }
+    }
+
+    pub fn get_instance_from_range(
+        &self,
+        node_range: &ClipNodeRange,
+        index: u32,
+    ) -> &ClipNodeInstance {
+        &self.clip_node_instances[(node_range.first + index) as usize]
+    }
+
+    /// Setup the active clip chains for building a clip chain instance.
+    pub fn set_active_clips(
+        &mut self,
+        prim_spatial_node_index: SpatialNodeIndex,
+        pic_spatial_node_index: SpatialNodeIndex,
+        clip_leaf_id: ClipLeafId,
+        spatial_tree: &SpatialTree,
+        clip_data_store: &ClipDataStore,
+        clip_tree: &ClipTree,
+    ) {
+        self.active_clip_node_info.clear();
+        self.active_local_clip_rect = None;
+        self.active_pic_coverage_rect = PictureRect::max_rect();
+
+        let clip_root = clip_tree.current_clip_root();
+        let clip_leaf = clip_tree.get_leaf(clip_leaf_id);
+
+        let mut local_clip_rect = clip_leaf.local_clip_rect;
+        let mut current = clip_leaf.node_id;
+
+        while current != clip_root {
+            let node = clip_tree.get_node(current);
+
+            if !add_clip_node_to_current_chain(
+                node.handle,
+                prim_spatial_node_index,
+                pic_spatial_node_index,
+                &mut local_clip_rect,
+                &mut self.active_clip_node_info,
+                &mut self.active_pic_coverage_rect,
+                clip_data_store,
+                spatial_tree,
+            ) {
+                return;
+            }
+
+            current = node.parent;
+        }
+
+        self.active_local_clip_rect = Some(local_clip_rect);
+    }
+
+    /// Setup the active clip chains, based on an existing primitive clip chain instance.
+    pub fn set_active_clips_from_clip_chain(
+        &mut self,
+        prim_clip_chain: &ClipChainInstance,
+        prim_spatial_node_index: SpatialNodeIndex,
+        spatial_tree: &SpatialTree,
+        clip_data_store: &ClipDataStore,
+    ) {
+        // TODO(gw): Although this does less work than set_active_clips(), it does
+        //           still do some unnecessary work (such as the clip space conversion).
+        //           We could consider optimizing this if it ever shows up in a profile.
+
+        self.active_clip_node_info.clear();
+        self.active_local_clip_rect = Some(prim_clip_chain.local_clip_rect);
+        self.active_pic_coverage_rect = prim_clip_chain.pic_coverage_rect;
+
+        let clip_instances = &self
+            .clip_node_instances[prim_clip_chain.clips_range.to_range()];
+        for clip_instance in clip_instances {
+            let clip = &clip_data_store[clip_instance.handle];
+            let conversion = ClipSpaceConversion::new(
+                prim_spatial_node_index,
+                clip.item.spatial_node_index,
+                spatial_tree,
+            );
+            self.active_clip_node_info.push(ClipNodeInfo {
+                handle: clip_instance.handle,
+                conversion,
+            });
+        }
+    }
+
+    /// Given a clip-chain instance, return a safe rect within the visible region
+    /// that can be assumed to be unaffected by clip radii. Returns None if it
+    /// encounters any complex cases, just handling rounded rects in the same
+    /// coordinate system as the clip-chain for now.
+    pub fn get_inner_rect_for_clip_chain(
+        &self,
+        clip_chain: &ClipChainInstance,
+        clip_data_store: &ClipDataStore,
+        spatial_tree: &SpatialTree,
+    ) -> Option<PictureRect> {
+        let mut inner_rect = clip_chain.pic_coverage_rect;
+        let clip_instances = &self
+            .clip_node_instances[clip_chain.clips_range.to_range()];
+
+        for clip_instance in clip_instances {
+            // Don't handle mapping between coord systems for now
+            if !clip_instance.flags.contains(ClipNodeFlags::SAME_COORD_SYSTEM) {
+                return None;
+            }
+
+            let clip_node = &clip_data_store[clip_instance.handle];
+
+            match clip_node.item.kind {
+                // Ignore any clips which are complex or impossible to calculate
+                // inner rects for now
+                ClipItemKind::Rectangle { mode: ClipMode::ClipOut, .. } |
+                ClipItemKind::Image { .. } |
+                ClipItemKind::BoxShadow { .. } |
+                ClipItemKind::RoundedRectangle { mode: ClipMode::ClipOut, .. } => {
+                    return None;
+                }
+                // Normal Clip rects are already handled by the clip-chain pic_coverage_rect,
+                // no need to do anything here
+                ClipItemKind::Rectangle { mode: ClipMode::Clip, .. } => {}
+                ClipItemKind::RoundedRectangle { mode: ClipMode::Clip, rect, radius } => {
+                    // Get an inner rect for the rounded-rect clip
+                    let local_inner_rect = match extract_inner_rect_safe(&rect, &radius) {
+                        Some(rect) => rect,
+                        None => return None,
+                    };
+
+                    // Map it from local -> picture space
+                    let mapper = SpaceMapper::new_with_target(
+                        clip_chain.pic_spatial_node_index,
+                        clip_node.item.spatial_node_index,
+                        PictureRect::max_rect(),
+                        spatial_tree,
+                    );
+
+                    // Accumulate in to the inner_rect, in case there are multiple rounded-rect clips
+                    if let Some(pic_inner_rect) = mapper.map(&local_inner_rect) {
+                        inner_rect = inner_rect.intersection(&pic_inner_rect).unwrap_or(PictureRect::zero());
+                    }
+                }
+            }
+        }
+
+        Some(inner_rect)
+    }
+
+    /// The main interface external code uses. Given a local primitive, positioning
+    /// information, and a clip chain id, build an optimized clip chain instance.
+    pub fn build_clip_chain_instance(
+        &mut self,
+        local_prim_rect: LayoutRect,
+        prim_to_pic_mapper: &SpaceMapper<LayoutPixel, PicturePixel>,
+        pic_to_world_mapper: &SpaceMapper<PicturePixel, WorldPixel>,
+        spatial_tree: &SpatialTree,
+        gpu_cache: &mut GpuCache,
+        resource_cache: &mut ResourceCache,
+        device_pixel_scale: DevicePixelScale,
+        world_rect: &WorldRect,
+        clip_data_store: &mut ClipDataStore,
+        request_resources: bool,
+    ) -> Option<ClipChainInstance> {
+        let local_clip_rect = match self.active_local_clip_rect {
+            Some(rect) => rect,
+            None => return None,
+        };
+        profile_scope!("build_clip_chain_instance");
+
+        let local_bounding_rect = local_prim_rect.intersection(&local_clip_rect)?;
+        let mut pic_coverage_rect = prim_to_pic_mapper.map(&local_bounding_rect)?;
+        let world_clip_rect = pic_to_world_mapper.map(&pic_coverage_rect)?;
+
+        // Now, we've collected all the clip nodes that *potentially* affect this
+        // primitive region, and reduced the size of the prim region as much as possible.
+
+        // Run through the clip nodes, and see which ones affect this prim region.
+
+        let first_clip_node_index = self.clip_node_instances.len() as u32;
+        let mut has_non_local_clips = false;
+        let mut needs_mask = false;
+
+        // For each potential clip node
+        for node_info in self.active_clip_node_info.drain(..) {
+            let node = &mut clip_data_store[node_info.handle];
+
+            // See how this clip affects the prim region.
+            let clip_result = match node_info.conversion {
+                ClipSpaceConversion::Local => {
+                    node.item.kind.get_clip_result(&local_bounding_rect)
+                }
+                ClipSpaceConversion::ScaleOffset(ref scale_offset) => {
+                    has_non_local_clips = true;
+                    node.item.kind.get_clip_result(&scale_offset.unmap_rect(&local_bounding_rect))
+                }
+                ClipSpaceConversion::Transform(ref transform) => {
+                    has_non_local_clips = true;
+                    node.item.kind.get_clip_result_complex(
+                        transform,
+                        &world_clip_rect,
+                        world_rect,
+                    )
+                }
+            };
+
+            match clip_result {
+                ClipResult::Accept => {
+                    // Doesn't affect the primitive at all, so skip adding to list
+                }
+                ClipResult::Reject => {
+                    // Completely clips the supplied prim rect
+                    return None;
+                }
+                ClipResult::Partial => {
+                    // Needs a mask -> add to clip node indices
+
+                    // TODO(gw): Ensure this only runs once on each node per frame?
+                    node.update(device_pixel_scale);
+
+                    // Create the clip node instance for this clip node
+                    if let Some(instance) = node_info.create_instance(
+                        node,
+                        &local_bounding_rect,
+                        gpu_cache,
+                        resource_cache,
+                        &mut self.mask_tiles,
+                        spatial_tree,
+                        request_resources,
+                    ) {
+                        // As a special case, a partial accept of a clip rect that is
+                        // in the same coordinate system as the primitive doesn't need
+                        // a clip mask. Instead, it can be handled by the primitive
+                        // vertex shader as part of the local clip rect. This is an
+                        // important optimization for reducing the number of clip
+                        // masks that are allocated on common pages.
+                        needs_mask |= match node.item.kind {
+                            ClipItemKind::Rectangle { mode: ClipMode::ClipOut, .. } |
+                            ClipItemKind::RoundedRectangle { .. } |
+                            ClipItemKind::Image { .. } |
+                            ClipItemKind::BoxShadow { .. } => {
+                                true
+                            }
+
+                            ClipItemKind::Rectangle { mode: ClipMode::Clip, .. } => {
+                                !instance.flags.contains(ClipNodeFlags::SAME_COORD_SYSTEM)
+                            }
+                        };
+
+                        // Store this in the index buffer for this clip chain instance.
+                        self.clip_node_instances.push(instance);
+                    }
+                }
+            }
+        }
+
+        // Get the range identifying the clip nodes in the index buffer.
+        let clips_range = ClipNodeRange {
+            first: first_clip_node_index,
+            count: self.clip_node_instances.len() as u32 - first_clip_node_index,
+        };
+
+        // If this clip chain needs a mask, reduce the size of the mask allocation
+        // by any clips that were in the same space as the picture. This can result
+        // in much smaller clip mask allocations in some cases. Note that the ordering
+        // here is important - the reduction must occur *after* the clip item accept
+        // reject checks above, so that we don't eliminate masks accidentally (since
+        // we currently only support a local clip rect in the vertex shader).
+        if needs_mask {
+            pic_coverage_rect = pic_coverage_rect.intersection(&self.active_pic_coverage_rect)?;
+        }
+
+        // Return a valid clip chain instance
+        Some(ClipChainInstance {
+            clips_range,
+            has_non_local_clips,
+            local_clip_rect,
+            pic_coverage_rect,
+            pic_spatial_node_index: prim_to_pic_mapper.ref_spatial_node_index,
+            needs_mask,
+        })
+    }
+
+    pub fn begin_frame(&mut self, scratch: &mut ClipStoreScratchBuffer) {
+        mem::swap(&mut self.clip_node_instances, &mut scratch.clip_node_instances);
+        mem::swap(&mut self.mask_tiles, &mut scratch.mask_tiles);
+        self.clip_node_instances.clear();
+        self.mask_tiles.clear();
+    }
+
+    pub fn end_frame(&mut self, scratch: &mut ClipStoreScratchBuffer) {
+        mem::swap(&mut self.clip_node_instances, &mut scratch.clip_node_instances);
+        mem::swap(&mut self.mask_tiles, &mut scratch.mask_tiles);
+    }
+
+    pub fn visible_mask_tiles(&self, instance: &ClipNodeInstance) -> &[VisibleMaskImageTile] {
+        if let Some(range) = &instance.visible_tiles {
+            &self.mask_tiles[range.clone()]
+        } else {
+            &[]
+        }
+    }
+}
+
+pub struct ComplexTranslateIter<I> {
+    source: I,
+    offset: LayoutVector2D,
+}
+
+impl<I: Iterator<Item = ComplexClipRegion>> Iterator for ComplexTranslateIter<I> {
+    type Item = ComplexClipRegion;
+    fn next(&mut self) -> Option<Self::Item> {
+        self.source
+            .next()
+            .map(|mut complex| {
+                complex.rect = complex.rect.translate(self.offset);
+                complex
+            })
+    }
+}
+
+// The ClipItemKey is a hashable representation of the contents
+// of a clip item. It is used during interning to de-duplicate
+// clip nodes between frames and display lists. This allows quick
+// comparison of clip node equality by handle, and also allows
+// the uploaded GPU cache handle to be retained between display lists.
+// TODO(gw): Maybe we should consider constructing these directly
+//           in the DL builder?
+#[derive(Copy, Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum ClipItemKeyKind {
+    Rectangle(RectangleKey, ClipMode),
+    RoundedRectangle(RectangleKey, BorderRadiusAu, ClipMode),
+    ImageMask(RectangleKey, ImageKey, Option<PolygonDataHandle>),
+    BoxShadow(PointKey, SizeKey, BorderRadiusAu, RectangleKey, Au, BoxShadowClipMode),
+}
+
+impl ClipItemKeyKind {
+    pub fn rectangle(rect: LayoutRect, mode: ClipMode) -> Self {
+        ClipItemKeyKind::Rectangle(rect.into(), mode)
+    }
+
+    pub fn rounded_rect(rect: LayoutRect, mut radii: BorderRadius, mode: ClipMode) -> Self {
+        if radii.is_zero() {
+            ClipItemKeyKind::rectangle(rect, mode)
+        } else {
+            ensure_no_corner_overlap(&mut radii, rect.size());
+            ClipItemKeyKind::RoundedRectangle(
+                rect.into(),
+                radii.into(),
+                mode,
+            )
+        }
+    }
+
+    pub fn image_mask(image_mask: &ImageMask, mask_rect: LayoutRect,
+                      polygon_handle: Option<PolygonDataHandle>) -> Self {
+        ClipItemKeyKind::ImageMask(
+            mask_rect.into(),
+            image_mask.image,
+            polygon_handle,
+        )
+    }
+
+    pub fn box_shadow(
+        shadow_rect: LayoutRect,
+        shadow_radius: BorderRadius,
+        prim_shadow_rect: LayoutRect,
+        blur_radius: f32,
+        clip_mode: BoxShadowClipMode,
+    ) -> Self {
+        // Get the fractional offsets required to match the
+        // source rect with a minimal rect.
+        let fract_offset = LayoutPoint::new(
+            shadow_rect.min.x.fract().abs(),
+            shadow_rect.min.y.fract().abs(),
+        );
+
+        ClipItemKeyKind::BoxShadow(
+            fract_offset.into(),
+            shadow_rect.size().into(),
+            shadow_radius.into(),
+            prim_shadow_rect.into(),
+            Au::from_f32_px(blur_radius),
+            clip_mode,
+        )
+    }
+
+    pub fn node_kind(&self) -> ClipNodeKind {
+        match *self {
+            ClipItemKeyKind::Rectangle(_, ClipMode::Clip) => ClipNodeKind::Rectangle,
+
+            ClipItemKeyKind::Rectangle(_, ClipMode::ClipOut) |
+            ClipItemKeyKind::RoundedRectangle(..) |
+            ClipItemKeyKind::ImageMask(..) |
+            ClipItemKeyKind::BoxShadow(..) => ClipNodeKind::Complex,
+        }
+    }
+}
+
+#[derive(Debug, Copy, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipItemKey {
+    pub kind: ClipItemKeyKind,
+    pub spatial_node_index: SpatialNodeIndex,
+}
+
+/// The data available about an interned clip node during scene building
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipInternData {
+    pub key: ClipItemKey,
+}
+
+impl intern::InternDebug for ClipItemKey {}
+
+impl intern::Internable for ClipIntern {
+    type Key = ClipItemKey;
+    type StoreData = ClipNode;
+    type InternData = ClipInternData;
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_CLIPS;
+}
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum ClipItemKind {
+    Rectangle {
+        rect: LayoutRect,
+        mode: ClipMode,
+    },
+    RoundedRectangle {
+        rect: LayoutRect,
+        radius: BorderRadius,
+        mode: ClipMode,
+    },
+    Image {
+        image: ImageKey,
+        rect: LayoutRect,
+        polygon_handle: Option<PolygonDataHandle>,
+    },
+    BoxShadow {
+        source: BoxShadowClipSource,
+    },
+}
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipItem {
+    pub kind: ClipItemKind,
+    pub spatial_node_index: SpatialNodeIndex,
+}
+
+fn compute_box_shadow_parameters(
+    shadow_rect_fract_offset: LayoutPoint,
+    shadow_rect_size: LayoutSize,
+    mut shadow_radius: BorderRadius,
+    prim_shadow_rect: LayoutRect,
+    blur_radius: f32,
+    clip_mode: BoxShadowClipMode,
+) -> BoxShadowClipSource {
+    // Make sure corners don't overlap.
+    ensure_no_corner_overlap(&mut shadow_radius, shadow_rect_size);
+
+    let fract_size = LayoutSize::new(
+        shadow_rect_size.width.fract().abs(),
+        shadow_rect_size.height.fract().abs(),
+    );
+
+    // Create a minimal size primitive mask to blur. In this
+    // case, we ensure the size of each corner is the same,
+    // to simplify the shader logic that stretches the blurred
+    // result across the primitive.
+    let max_corner_width = shadow_radius.top_left.width
+                                .max(shadow_radius.bottom_left.width)
+                                .max(shadow_radius.top_right.width)
+                                .max(shadow_radius.bottom_right.width);
+    let max_corner_height = shadow_radius.top_left.height
+                                .max(shadow_radius.bottom_left.height)
+                                .max(shadow_radius.top_right.height)
+                                .max(shadow_radius.bottom_right.height);
+
+    // Get maximum distance that can be affected by given blur radius.
+    let blur_region = (BLUR_SAMPLE_SCALE * blur_radius).ceil();
+
+    // If the largest corner is smaller than the blur radius, we need to ensure
+    // that it's big enough that the corners don't affect the middle segments.
+    let used_corner_width = max_corner_width.max(blur_region);
+    let used_corner_height = max_corner_height.max(blur_region);
+
+    // Minimal nine-patch size, corner + internal + corner.
+    let min_shadow_rect_size = LayoutSize::new(
+        2.0 * used_corner_width + blur_region,
+        2.0 * used_corner_height + blur_region,
+    );
+
+    // The minimal rect to blur.
+    let mut minimal_shadow_rect = LayoutRect::from_origin_and_size(
+        LayoutPoint::new(
+            blur_region + shadow_rect_fract_offset.x,
+            blur_region + shadow_rect_fract_offset.y,
+        ),
+        LayoutSize::new(
+            min_shadow_rect_size.width + fract_size.width,
+            min_shadow_rect_size.height + fract_size.height,
+        ),
+    );
+
+    // If the width or height ends up being bigger than the original
+    // primitive shadow rect, just blur the entire rect along that
+    // axis and draw that as a simple blit. This is necessary for
+    // correctness, since the blur of one corner may affect the blur
+    // in another corner.
+    let mut stretch_mode_x = BoxShadowStretchMode::Stretch;
+    if shadow_rect_size.width < minimal_shadow_rect.width() {
+        minimal_shadow_rect.max.x = minimal_shadow_rect.min.x + shadow_rect_size.width;
+        stretch_mode_x = BoxShadowStretchMode::Simple;
+    }
+
+    let mut stretch_mode_y = BoxShadowStretchMode::Stretch;
+    if shadow_rect_size.height < minimal_shadow_rect.height() {
+        minimal_shadow_rect.max.y = minimal_shadow_rect.min.y + shadow_rect_size.height;
+        stretch_mode_y = BoxShadowStretchMode::Simple;
+    }
+
+    // Expand the shadow rect by enough room for the blur to take effect.
+    let shadow_rect_alloc_size = LayoutSize::new(
+        2.0 * blur_region + minimal_shadow_rect.width().ceil(),
+        2.0 * blur_region + minimal_shadow_rect.height().ceil(),
+    );
+
+    BoxShadowClipSource {
+        original_alloc_size: shadow_rect_alloc_size,
+        shadow_rect_alloc_size,
+        shadow_radius,
+        prim_shadow_rect,
+        blur_radius,
+        clip_mode,
+        stretch_mode_x,
+        stretch_mode_y,
+        render_task: None,
+        cache_key: None,
+        minimal_shadow_rect,
+    }
+}
+
+impl ClipItemKind {
+    pub fn new_box_shadow(
+        shadow_rect_fract_offset: LayoutPoint,
+        shadow_rect_size: LayoutSize,
+        mut shadow_radius: BorderRadius,
+        prim_shadow_rect: LayoutRect,
+        blur_radius: f32,
+        clip_mode: BoxShadowClipMode,
+    ) -> Self {
+        let mut source = compute_box_shadow_parameters(
+            shadow_rect_fract_offset,
+            shadow_rect_size,
+            shadow_radius,
+            prim_shadow_rect,
+            blur_radius,
+            clip_mode,
+        );
+
+        fn needed_downscaling(source: &BoxShadowClipSource) -> Option<f32> {
+            // This size is fairly arbitrary, but it's the same as the size that
+            // we use to avoid caching big blurred stacking contexts.
+            //
+            // If you change it, ensure that the reftests
+            // box-shadow-large-blur-radius-* still hit the downscaling path,
+            // and that they render correctly.
+            const MAX_SIZE: f32 = 2048.;
+
+            let max_dimension =
+                source.shadow_rect_alloc_size.width.max(source.shadow_rect_alloc_size.height);
+
+            if max_dimension > MAX_SIZE {
+                Some(MAX_SIZE / max_dimension)
+            } else {
+                None
+            }
+        }
+
+        if let Some(downscale) = needed_downscaling(&source) {
+            shadow_radius.bottom_left.height *= downscale;
+            shadow_radius.bottom_left.width *= downscale;
+            shadow_radius.bottom_right.height *= downscale;
+            shadow_radius.bottom_right.width *= downscale;
+            shadow_radius.top_left.height *= downscale;
+            shadow_radius.top_left.width *= downscale;
+            shadow_radius.top_right.height *= downscale;
+            shadow_radius.top_right.width *= downscale;
+
+            let original_alloc_size = source.shadow_rect_alloc_size;
+
+            source = compute_box_shadow_parameters(
+                shadow_rect_fract_offset * downscale,
+                shadow_rect_size * downscale,
+                shadow_radius,
+                prim_shadow_rect,
+                blur_radius * downscale,
+                clip_mode,
+            );
+            source.original_alloc_size = original_alloc_size;
+        }
+        ClipItemKind::BoxShadow { source }
+    }
+
+    /// Returns true if this clip mask can run through the fast path
+    /// for the given clip item type.
+    ///
+    /// Note: this logic has to match `ClipBatcher::add` behavior.
+    fn supports_fast_path_rendering(&self) -> bool {
+        match *self {
+            ClipItemKind::Rectangle { .. } |
+            ClipItemKind::Image { .. } |
+            ClipItemKind::BoxShadow { .. } => {
+                false
+            }
+            ClipItemKind::RoundedRectangle { ref radius, .. } => {
+                // The rounded clip rect fast path shader can only work
+                // if the radii are uniform.
+                radius.is_uniform().is_some()
+            }
+        }
+    }
+
+    // Get an optional clip rect that a clip source can provide to
+    // reduce the size of a primitive region. This is typically
+    // used to eliminate redundant clips, and reduce the size of
+    // any clip mask that eventually gets drawn.
+    pub fn get_local_clip_rect(&self) -> Option<LayoutRect> {
+        match *self {
+            ClipItemKind::Rectangle { rect, mode: ClipMode::Clip } => Some(rect),
+            ClipItemKind::Rectangle { mode: ClipMode::ClipOut, .. } => None,
+            ClipItemKind::RoundedRectangle { rect, mode: ClipMode::Clip, .. } => Some(rect),
+            ClipItemKind::RoundedRectangle { mode: ClipMode::ClipOut, .. } => None,
+            ClipItemKind::Image { rect, .. } => {
+                Some(rect)
+            }
+            ClipItemKind::BoxShadow { .. } => None,
+        }
+    }
+
+    fn get_clip_result_complex(
+        &self,
+        transform: &LayoutToWorldTransform,
+        prim_world_rect: &WorldRect,
+        world_rect: &WorldRect,
+    ) -> ClipResult {
+        let visible_rect = match prim_world_rect.intersection(world_rect) {
+            Some(rect) => rect,
+            None => return ClipResult::Reject,
+        };
+
+        let (clip_rect, inner_rect, mode) = match *self {
+            ClipItemKind::Rectangle { rect, mode } => {
+                (rect, Some(rect), mode)
+            }
+            ClipItemKind::RoundedRectangle { rect, ref radius, mode } => {
+                let inner_clip_rect = extract_inner_rect_safe(&rect, radius);
+                (rect, inner_clip_rect, mode)
+            }
+            ClipItemKind::Image { rect, .. } => {
+                (rect, None, ClipMode::Clip)
+            }
+            ClipItemKind::BoxShadow { .. } => {
+                return ClipResult::Partial;
+            }
+        };
+
+        if let Some(ref inner_clip_rect) = inner_rect {
+            if let Some(()) = projected_rect_contains(inner_clip_rect, transform, &visible_rect) {
+                return match mode {
+                    ClipMode::Clip => ClipResult::Accept,
+                    ClipMode::ClipOut => ClipResult::Reject,
+                };
+            }
+        }
+
+        match mode {
+            ClipMode::Clip => {
+                let outer_clip_rect = match project_rect(
+                    transform,
+                    &clip_rect,
+                    &world_rect,
+                ) {
+                    Some(outer_clip_rect) => outer_clip_rect,
+                    None => return ClipResult::Partial,
+                };
+
+                match outer_clip_rect.intersection(prim_world_rect) {
+                    Some(..) => {
+                        ClipResult::Partial
+                    }
+                    None => {
+                        ClipResult::Reject
+                    }
+                }
+            }
+            ClipMode::ClipOut => ClipResult::Partial,
+        }
+    }
+
+    // Check how a given clip source affects a local primitive region.
+    fn get_clip_result(
+        &self,
+        prim_rect: &LayoutRect,
+    ) -> ClipResult {
+        match *self {
+            ClipItemKind::Rectangle { rect, mode: ClipMode::Clip } => {
+                if rect.contains_box(prim_rect) {
+                    return ClipResult::Accept;
+                }
+
+                match rect.intersection(prim_rect) {
+                    Some(..) => {
+                        ClipResult::Partial
+                    }
+                    None => {
+                        ClipResult::Reject
+                    }
+                }
+            }
+            ClipItemKind::Rectangle { rect, mode: ClipMode::ClipOut } => {
+                if rect.contains_box(prim_rect) {
+                    return ClipResult::Reject;
+                }
+
+                match rect.intersection(prim_rect) {
+                    Some(_) => {
+                        ClipResult::Partial
+                    }
+                    None => {
+                        ClipResult::Accept
+                    }
+                }
+            }
+            ClipItemKind::RoundedRectangle { rect, ref radius, mode: ClipMode::Clip } => {
+                // TODO(gw): Consider caching this in the ClipNode
+                //           if it ever shows in profiles.
+                if rounded_rectangle_contains_box_quick(&rect, radius, &prim_rect) {
+                    return ClipResult::Accept;
+                }
+
+                match rect.intersection(prim_rect) {
+                    Some(..) => {
+                        ClipResult::Partial
+                    }
+                    None => {
+                        ClipResult::Reject
+                    }
+                }
+            }
+            ClipItemKind::RoundedRectangle { rect, ref radius, mode: ClipMode::ClipOut } => {
+                // TODO(gw): Consider caching this in the ClipNode
+                //           if it ever shows in profiles.
+                if rounded_rectangle_contains_box_quick(&rect, radius, &prim_rect) {
+                    return ClipResult::Reject;
+                }
+
+                match rect.intersection(prim_rect) {
+                    Some(_) => {
+                        ClipResult::Partial
+                    }
+                    None => {
+                        ClipResult::Accept
+                    }
+                }
+            }
+            ClipItemKind::Image { rect, .. } => {
+                match rect.intersection(prim_rect) {
+                    Some(..) => {
+                        ClipResult::Partial
+                    }
+                    None => {
+                        ClipResult::Reject
+                    }
+                }
+            }
+            ClipItemKind::BoxShadow { .. } => {
+                ClipResult::Partial
+            }
+        }
+    }
+}
+
+/// Represents a local rect and a device space
+/// rectangles that are either outside or inside bounds.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Geometry {
+    pub local_rect: LayoutRect,
+    pub device_rect: DeviceIntRect,
+}
+
+impl From<LayoutRect> for Geometry {
+    fn from(local_rect: LayoutRect) -> Self {
+        Geometry {
+            local_rect,
+            device_rect: DeviceIntRect::zero(),
+        }
+    }
+}
+
+pub fn rounded_rectangle_contains_point(
+    point: &LayoutPoint,
+    rect: &LayoutRect,
+    radii: &BorderRadius
+) -> bool {
+    if !rect.contains(*point) {
+        return false;
+    }
+
+    let top_left_center = rect.min + radii.top_left.to_vector();
+    if top_left_center.x > point.x && top_left_center.y > point.y &&
+       !Ellipse::new(radii.top_left).contains(*point - top_left_center.to_vector()) {
+        return false;
+    }
+
+    let bottom_right_center = rect.bottom_right() - radii.bottom_right.to_vector();
+    if bottom_right_center.x < point.x && bottom_right_center.y < point.y &&
+       !Ellipse::new(radii.bottom_right).contains(*point - bottom_right_center.to_vector()) {
+        return false;
+    }
+
+    let top_right_center = rect.top_right() +
+                           LayoutVector2D::new(-radii.top_right.width, radii.top_right.height);
+    if top_right_center.x < point.x && top_right_center.y > point.y &&
+       !Ellipse::new(radii.top_right).contains(*point - top_right_center.to_vector()) {
+        return false;
+    }
+
+    let bottom_left_center = rect.bottom_left() +
+                             LayoutVector2D::new(radii.bottom_left.width, -radii.bottom_left.height);
+    if bottom_left_center.x > point.x && bottom_left_center.y < point.y &&
+       !Ellipse::new(radii.bottom_left).contains(*point - bottom_left_center.to_vector()) {
+        return false;
+    }
+
+    true
+}
+
+/// Return true if the rounded rectangle described by `container` and `radii`
+/// definitely contains `containee`. May return false negatives, but never false
+/// positives.
+fn rounded_rectangle_contains_box_quick(
+    container: &LayoutRect,
+    radii: &BorderRadius,
+    containee: &LayoutRect,
+) -> bool {
+    if !container.contains_box(containee) {
+        return false;
+    }
+
+    /// Return true if `point` falls within `corner`. This only covers the
+    /// upper-left case; we transform the other corners into that form.
+    fn foul(point: LayoutPoint, corner: LayoutPoint) -> bool {
+        point.x < corner.x && point.y < corner.y
+    }
+
+    /// Flip `pt` about the y axis (i.e. negate `x`).
+    fn flip_x(pt: LayoutPoint) -> LayoutPoint {
+        LayoutPoint { x: -pt.x, .. pt }
+    }
+
+    /// Flip `pt` about the x axis (i.e. negate `y`).
+    fn flip_y(pt: LayoutPoint) -> LayoutPoint {
+        LayoutPoint { y: -pt.y, .. pt }
+    }
+
+    if foul(containee.top_left(), container.top_left() + radii.top_left) ||
+        foul(flip_x(containee.top_right()), flip_x(container.top_right()) + radii.top_right) ||
+        foul(flip_y(containee.bottom_left()), flip_y(container.bottom_left()) + radii.bottom_left) ||
+        foul(-containee.bottom_right(), -container.bottom_right() + radii.bottom_right)
+    {
+        return false;
+    }
+
+    true
+}
+
+/// Test where point p is relative to the infinite line that passes through the segment
+/// defined by p0 and p1. Point p is on the "left" of the line if the triangle (p0, p1, p)
+/// forms a counter-clockwise triangle.
+/// > 0 is left of the line
+/// < 0 is right of the line
+/// == 0 is on the line
+pub fn is_left_of_line(
+    p_x: f32,
+    p_y: f32,
+    p0_x: f32,
+    p0_y: f32,
+    p1_x: f32,
+    p1_y: f32,
+) -> f32 {
+    (p1_x - p0_x) * (p_y - p0_y) - (p_x - p0_x) * (p1_y - p0_y)
+}
+
+pub fn polygon_contains_point(
+    point: &LayoutPoint,
+    rect: &LayoutRect,
+    polygon: &PolygonKey,
+) -> bool {
+    if !rect.contains(*point) {
+        return false;
+    }
+
+    // p is a LayoutPoint that we'll be comparing to dimensionless PointKeys,
+    // which were created from LayoutPoints, so it all works out.
+    let p = LayoutPoint::new(point.x - rect.min.x, point.y - rect.min.y);
+
+    // Calculate a winding number for this point.
+    let mut winding_number: i32 = 0;
+
+    let count = polygon.point_count as usize;
+
+    for i in 0..count {
+        let p0 = polygon.points[i];
+        let p1 = polygon.points[(i + 1) % count];
+
+        if p0.y <= p.y {
+            if p1.y > p.y {
+                if is_left_of_line(p.x, p.y, p0.x, p0.y, p1.x, p1.y) > 0.0 {
+                    winding_number = winding_number + 1;
+                }
+            }
+        } else if p1.y <= p.y {
+            if is_left_of_line(p.x, p.y, p0.x, p0.y, p1.x, p1.y) < 0.0 {
+                winding_number = winding_number - 1;
+            }
+        }
+    }
+
+    match polygon.fill_rule {
+        FillRule::Nonzero => winding_number != 0,
+        FillRule::Evenodd => winding_number.abs() % 2 == 1,
+    }
+}
+
+pub fn projected_rect_contains(
+    source_rect: &LayoutRect,
+    transform: &LayoutToWorldTransform,
+    target_rect: &WorldRect,
+) -> Option<()> {
+    let points = [
+        transform.transform_point2d(source_rect.top_left())?,
+        transform.transform_point2d(source_rect.top_right())?,
+        transform.transform_point2d(source_rect.bottom_right())?,
+        transform.transform_point2d(source_rect.bottom_left())?,
+    ];
+    let target_points = [
+        target_rect.top_left(),
+        target_rect.top_right(),
+        target_rect.bottom_right(),
+        target_rect.bottom_left(),
+    ];
+    // iterate the edges of the transformed polygon
+    for (a, b) in points
+        .iter()
+        .cloned()
+        .zip(points[1..].iter().cloned().chain(iter::once(points[0])))
+    {
+        // If this edge is redundant, it's a weird, case, and we shouldn't go
+        // length in trying to take the fast path (e.g. when the whole rectangle is a point).
+        // If any of edges of the target rectangle crosses the edge, it's not completely
+        // inside our transformed polygon either.
+        if a.approx_eq(&b) || target_points.iter().any(|&c| (b - a).cross(c - a) < 0.0) {
+            return None
+        }
+    }
+
+    Some(())
+}
+
+
+// Add a clip node into the list of clips to be processed
+// for the current clip chain. Returns false if the clip
+// results in the entire primitive being culled out.
+fn add_clip_node_to_current_chain(
+    handle: ClipDataHandle,
+    prim_spatial_node_index: SpatialNodeIndex,
+    pic_spatial_node_index: SpatialNodeIndex,
+    local_clip_rect: &mut LayoutRect,
+    clip_node_info: &mut Vec<ClipNodeInfo>,
+    pic_coverage_rect: &mut PictureRect,
+    clip_data_store: &ClipDataStore,
+    spatial_tree: &SpatialTree,
+) -> bool {
+    let clip_node = &clip_data_store[handle];
+
+    // Determine the most efficient way to convert between coordinate
+    // systems of the primitive and clip node.
+    let conversion = ClipSpaceConversion::new(
+        prim_spatial_node_index,
+        clip_node.item.spatial_node_index,
+        spatial_tree,
+    );
+
+    // If we can convert spaces, try to reduce the size of the region
+    // requested, and cache the conversion information for the next step.
+    if let Some(clip_rect) = clip_node.item.kind.get_local_clip_rect() {
+        match conversion {
+            ClipSpaceConversion::Local => {
+                *local_clip_rect = match local_clip_rect.intersection(&clip_rect) {
+                    Some(rect) => rect,
+                    None => return false,
+                };
+            }
+            ClipSpaceConversion::ScaleOffset(ref scale_offset) => {
+                let clip_rect = scale_offset.map_rect(&clip_rect);
+                *local_clip_rect = match local_clip_rect.intersection(&clip_rect) {
+                    Some(rect) => rect,
+                    None => return false,
+                };
+            }
+            ClipSpaceConversion::Transform(..) => {
+                // Map the local clip rect directly into the same space as the picture
+                // surface. This will often be the same space as the clip itself, which
+                // results in a reduction in allocated clip mask size.
+
+                // For simplicity, only apply this optimization if the clip is in the
+                // same coord system as the picture. There are some 'advanced' perspective
+                // clip tests in wrench that break without this check. Those cases are
+                // never used in Gecko, and we aim to remove support in WR for that
+                // in future to simplify the clipping pipeline.
+                let pic_coord_system = spatial_tree
+                    .get_spatial_node(pic_spatial_node_index)
+                    .coordinate_system_id;
+
+                let clip_coord_system = spatial_tree
+                    .get_spatial_node(clip_node.item.spatial_node_index)
+                    .coordinate_system_id;
+
+                if pic_coord_system == clip_coord_system {
+                    let mapper = SpaceMapper::new_with_target(
+                        pic_spatial_node_index,
+                        clip_node.item.spatial_node_index,
+                        PictureRect::max_rect(),
+                        spatial_tree,
+                    );
+
+                    if let Some(pic_clip_rect) = mapper.map(&clip_rect) {
+                        *pic_coverage_rect = pic_clip_rect
+                            .intersection(pic_coverage_rect)
+                            .unwrap_or(PictureRect::zero());
+                    }
+                }
+            }
+        }
+    }
+
+    clip_node_info.push(ClipNodeInfo {
+        conversion,
+        handle,
+    });
+
+    true
+}
+
+#[cfg(test)]
+mod tests {
+    use super::projected_rect_contains;
+    use euclid::{Transform3D, rect};
+
+    #[test]
+    fn test_empty_projected_rect() {
+        assert_eq!(
+            None,
+            projected_rect_contains(
+                &rect(10.0, 10.0, 0.0, 0.0).to_box2d(),
+                &Transform3D::identity(),
+                &rect(20.0, 20.0, 10.0, 10.0).to_box2d(),
+            ),
+            "Empty rectangle is considered to include a non-empty!"
+        );
+    }
+}
+
+/// PolygonKeys get interned, because it's a convenient way to move the data
+/// for the polygons out of the ClipItemKind and ClipItemKeyKind enums. The
+/// polygon data is both interned and retrieved by the scene builder, and not
+/// accessed at all by the frame builder. Another oddity is that the
+/// PolygonKey contains the totality of the information about the polygon, so
+/// the InternData and StoreData types are both PolygonKey.
+#[derive(Copy, Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
+#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
+pub enum PolygonIntern {}
+
+pub type PolygonDataHandle = intern::Handle<PolygonIntern>;
+
+impl intern::InternDebug for PolygonKey {}
+
+impl intern::Internable for PolygonIntern {
+    type Key = PolygonKey;
+    type StoreData = PolygonKey;
+    type InternData = PolygonKey;
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_POLYGONS;
+}
diff --git a/gfx/wr/webrender/src/command_buffer.rs b/gfx/wr/webrender/src/command_buffer.rs
new file mode 100644
index 0000000000..0a3572c357
--- /dev/null
+++ b/gfx/wr/webrender/src/command_buffer.rs
@@ -0,0 +1,312 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::{spatial_tree::SpatialNodeIndex, render_task_graph::RenderTaskId, surface::SurfaceTileDescriptor, picture::TileKey, renderer::GpuBufferAddress, FastHashMap, prim_store::PrimitiveInstanceIndex, gpu_cache::GpuCacheAddress};
+
+/// A tightly packed command stored in a command buffer
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone)]
+pub struct Command(u32);
+
+impl Command {
+    /// Draw a simple primitive that needs prim instance index only.
+    const CMD_DRAW_SIMPLE_PRIM: u32 = 0x00000000;
+    /// Change the current spatial node.
+    const CMD_SET_SPATIAL_NODE: u32 = 0x10000000;
+    /// Draw a complex (3d-split) primitive, that has multiple GPU cache addresses.
+    const CMD_DRAW_COMPLEX_PRIM: u32 = 0x20000000;
+    /// Draw a primitive, that has a single GPU buffer addresses.
+    const CMD_DRAW_INSTANCE: u32 = 0x40000000;
+
+    /// Bitmask for command bits of the command.
+    const CMD_MASK: u32 = 0xf0000000;
+    /// Bitmask for param bits of the command.
+    const PARAM_MASK: u32 = 0x0fffffff;
+
+    /// Encode drawing a simple primitive.
+    fn draw_simple_prim(prim_instance_index: PrimitiveInstanceIndex) -> Self {
+        Command(Command::CMD_DRAW_SIMPLE_PRIM | prim_instance_index.0)
+    }
+
+    /// Encode changing spatial node.
+    fn set_spatial_node(spatial_node_index: SpatialNodeIndex) -> Self {
+        Command(Command::CMD_SET_SPATIAL_NODE | spatial_node_index.0)
+    }
+
+    /// Encode drawing a complex prim.
+    fn draw_complex_prim(prim_instance_index: PrimitiveInstanceIndex) -> Self {
+        Command(Command::CMD_DRAW_COMPLEX_PRIM | prim_instance_index.0)
+    }
+
+    fn draw_instance(prim_instance_index: PrimitiveInstanceIndex) -> Self {
+        Command(Command::CMD_DRAW_INSTANCE | prim_instance_index.0)
+    }
+
+    /// Encode arbitrary data word.
+    fn data(data: u32) -> Self {
+        Command(data)
+    }
+}
+
+/// The unpacked equivalent to a `Command`.
+pub enum PrimitiveCommand {
+    Simple {
+        prim_instance_index: PrimitiveInstanceIndex,
+    },
+    Complex {
+        prim_instance_index: PrimitiveInstanceIndex,
+        gpu_address: GpuCacheAddress,
+    },
+    Instance {
+        prim_instance_index: PrimitiveInstanceIndex,
+        gpu_buffer_address: GpuBufferAddress,
+    },
+}
+
+impl PrimitiveCommand {
+    pub fn simple(
+        prim_instance_index: PrimitiveInstanceIndex,
+    ) -> Self {
+        PrimitiveCommand::Simple {
+            prim_instance_index,
+        }
+    }
+
+    pub fn complex(
+        prim_instance_index: PrimitiveInstanceIndex,
+        gpu_address: GpuCacheAddress,
+    ) -> Self {
+        PrimitiveCommand::Complex {
+            prim_instance_index,
+            gpu_address,
+        }
+    }
+
+    pub fn instance(
+        prim_instance_index: PrimitiveInstanceIndex,
+        gpu_buffer_address: GpuBufferAddress,
+    ) -> Self {
+        PrimitiveCommand::Instance {
+            prim_instance_index,
+            gpu_buffer_address,
+        }
+    }
+}
+
+
+/// A list of commands describing how to draw a primitive list.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CommandBuffer {
+    /// The encoded drawing commands.
+    commands: Vec<Command>,
+    /// Cached current spatial node.
+    current_spatial_node_index: SpatialNodeIndex,
+}
+
+impl CommandBuffer {
+    /// Construct a new cmd buffer.
+    pub fn new() -> Self {
+        CommandBuffer {
+            commands: Vec::new(),
+            current_spatial_node_index: SpatialNodeIndex::INVALID,
+        }
+    }
+
+    /// Add a primitive to the command buffer.
+    pub fn add_prim(
+        &mut self,
+        prim_cmd: &PrimitiveCommand,
+        spatial_node_index: SpatialNodeIndex,
+    ) {
+        if self.current_spatial_node_index != spatial_node_index {
+            self.commands.push(Command::set_spatial_node(spatial_node_index));
+            self.current_spatial_node_index = spatial_node_index;
+        }
+
+        match *prim_cmd {
+            PrimitiveCommand::Simple { prim_instance_index } => {
+                self.commands.push(Command::draw_simple_prim(prim_instance_index));
+            }
+            PrimitiveCommand::Complex { prim_instance_index, gpu_address } => {
+                self.commands.push(Command::draw_complex_prim(prim_instance_index));
+                self.commands.push(Command::data((gpu_address.u as u32) << 16 | gpu_address.v as u32));
+            }
+            PrimitiveCommand::Instance { prim_instance_index, gpu_buffer_address } => {
+                self.commands.push(Command::draw_instance(prim_instance_index));
+                self.commands.push(Command::data((gpu_buffer_address.u as u32) << 16 | gpu_buffer_address.v as u32));
+            }
+        }
+    }
+
+    /// Iterate the command list, calling a provided closure for each primitive draw command.
+    pub fn iter_prims<F>(
+        &self,
+        f: &mut F,
+    ) where F: FnMut(&PrimitiveCommand, SpatialNodeIndex) {
+        let mut current_spatial_node_index = SpatialNodeIndex::INVALID;
+        let mut cmd_iter = self.commands.iter();
+
+        while let Some(cmd) = cmd_iter.next() {
+            let command = cmd.0 & Command::CMD_MASK;
+            let param = cmd.0 & Command::PARAM_MASK;
+
+            match command {
+                Command::CMD_DRAW_SIMPLE_PRIM => {
+                    let prim_instance_index = PrimitiveInstanceIndex(param);
+                    let cmd = PrimitiveCommand::simple(prim_instance_index);
+                    f(&cmd, current_spatial_node_index);
+                }
+                Command::CMD_SET_SPATIAL_NODE => {
+                    current_spatial_node_index = SpatialNodeIndex(param);
+                }
+                Command::CMD_DRAW_COMPLEX_PRIM => {
+                    let prim_instance_index = PrimitiveInstanceIndex(param);
+                    let data = cmd_iter.next().unwrap();
+                    let gpu_address = GpuCacheAddress {
+                        u: (data.0 >> 16) as u16,
+                        v: (data.0 & 0xffff) as u16,
+                    };
+                    let cmd = PrimitiveCommand::complex(
+                        prim_instance_index,
+                        gpu_address,
+                    );
+                    f(&cmd, current_spatial_node_index);
+                }
+                Command::CMD_DRAW_INSTANCE => {
+                    let prim_instance_index = PrimitiveInstanceIndex(param);
+                    let data = cmd_iter.next().unwrap();
+                    let gpu_buffer_address = GpuBufferAddress {
+                        u: (data.0 >> 16) as u16,
+                        v: (data.0 & 0xffff) as u16,
+                    };
+                    let cmd = PrimitiveCommand::instance(
+                        prim_instance_index,
+                        gpu_buffer_address,
+                    );
+                    f(&cmd, current_spatial_node_index);
+                }
+                _ => {
+                    unreachable!();
+                }
+            }
+        }
+    }
+}
+
+/// Abstracts whether a command buffer is being built for a tiled (picture cache)
+/// or simple (child surface).
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum CommandBufferBuilderKind {
+    Tiled {
+        // TODO(gw): It might be worth storing this as a 2d-array instead
+        //           of a hash map if it ever shows up in profiles. This is
+        //           slightly complicated by the sub_slice_index in the
+        //           TileKey structure - could have a 2 level array?
+        tiles: FastHashMap<TileKey, SurfaceTileDescriptor>,
+    },
+    Simple {
+        render_task_id: RenderTaskId,
+        root_task_id: Option<RenderTaskId>,
+    },
+    Invalid,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CommandBufferBuilder {
+    pub kind: CommandBufferBuilderKind,
+
+    /// If a command buffer establishes a sub-graph, then at the end of constructing
+    /// the surface, the parent surface is supplied as an input dependency, and the
+    /// parent surface gets a duplicated (existing) task with the same location, and
+    /// with the sub-graph output as an input dependency.
+    pub establishes_sub_graph: bool,
+
+    /// If this surface builds a sub-graph, it will mark a task in the filter sub-graph
+    /// as a resolve source for the input from the parent surface.
+    pub resolve_source: Option<RenderTaskId>,
+
+    /// List of render tasks that depend on the task that will be created for this builder.
+    pub extra_dependencies: Vec<RenderTaskId>,
+}
+
+impl CommandBufferBuilder {
+    pub fn empty() -> Self {
+        CommandBufferBuilder {
+            kind: CommandBufferBuilderKind::Invalid,
+            establishes_sub_graph: false,
+            resolve_source: None,
+            extra_dependencies: Vec::new(),
+        }
+    }
+
+    /// Construct a tiled command buffer builder.
+    pub fn new_tiled(
+        tiles: FastHashMap<TileKey, SurfaceTileDescriptor>,
+    ) -> Self {
+        CommandBufferBuilder {
+            kind: CommandBufferBuilderKind::Tiled {
+                tiles,
+            },
+            establishes_sub_graph: false,
+            resolve_source: None,
+            extra_dependencies: Vec::new(),
+        }
+    }
+
+    /// Construct a simple command buffer builder.
+    pub fn new_simple(
+        render_task_id: RenderTaskId,
+        establishes_sub_graph: bool,
+        root_task_id: Option<RenderTaskId>,
+    ) -> Self {
+        CommandBufferBuilder {
+            kind: CommandBufferBuilderKind::Simple {
+                render_task_id,
+                root_task_id,
+            },
+            establishes_sub_graph,
+            resolve_source: None,
+            extra_dependencies: Vec::new(),
+        }
+    }
+}
+
+// Index into a command buffer stored in a `CommandBufferList`.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone)]
+pub struct CommandBufferIndex(pub u32);
+
+// Container for a list of command buffers that are built for a frame.
+pub struct CommandBufferList {
+    cmd_buffers: Vec<CommandBuffer>,
+}
+
+impl CommandBufferList {
+    pub fn new() -> Self {
+        CommandBufferList {
+            cmd_buffers: Vec::new(),
+        }
+    }
+
+    pub fn create_cmd_buffer(
+        &mut self,
+    ) -> CommandBufferIndex {
+        let index = CommandBufferIndex(self.cmd_buffers.len() as u32);
+        self.cmd_buffers.push(CommandBuffer::new());
+        index
+    }
+
+    pub fn get(&self, index: CommandBufferIndex) -> &CommandBuffer {
+        &self.cmd_buffers[index.0 as usize]
+    }
+
+    pub fn get_mut(&mut self, index: CommandBufferIndex) -> &mut CommandBuffer {
+        &mut self.cmd_buffers[index.0 as usize]
+    }
+}
diff --git a/gfx/wr/webrender/src/composite.rs b/gfx/wr/webrender/src/composite.rs
new file mode 100644
index 0000000000..9fde23e8d6
--- /dev/null
+++ b/gfx/wr/webrender/src/composite.rs
@@ -0,0 +1,1466 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ColorF, YuvRangedColorSpace, YuvFormat, ImageRendering, ExternalImageId, ImageBufferKind};
+use api::units::*;
+use api::ColorDepth;
+use crate::image_source::resolve_image;
+use euclid::{Box2D, Transform3D};
+use crate::gpu_cache::GpuCache;
+use crate::gpu_types::{ZBufferId, ZBufferIdGenerator};
+use crate::internal_types::TextureSource;
+use crate::picture::{ImageDependency, ResolvedSurfaceTexture, TileCacheInstance, TileId, TileSurface};
+use crate::prim_store::DeferredResolve;
+use crate::resource_cache::{ImageRequest, ResourceCache};
+use crate::util::{Preallocator, ScaleOffset};
+use crate::tile_cache::PictureCacheDebugInfo;
+use std::{ops, u64, os::raw::c_void};
+
+/*
+ Types and definitions related to compositing picture cache tiles
+ and/or OS compositor integration.
+ */
+
+/// Describes details of an operation to apply to a native surface
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum NativeSurfaceOperationDetails {
+    CreateSurface {
+        id: NativeSurfaceId,
+        virtual_offset: DeviceIntPoint,
+        tile_size: DeviceIntSize,
+        is_opaque: bool,
+    },
+    CreateExternalSurface {
+        id: NativeSurfaceId,
+        is_opaque: bool,
+    },
+    CreateBackdropSurface {
+        id: NativeSurfaceId,
+        color: ColorF,
+    },
+    DestroySurface {
+        id: NativeSurfaceId,
+    },
+    CreateTile {
+        id: NativeTileId,
+    },
+    DestroyTile {
+        id: NativeTileId,
+    },
+    AttachExternalImage {
+        id: NativeSurfaceId,
+        external_image: ExternalImageId,
+    }
+}
+
+/// Describes an operation to apply to a native surface
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct NativeSurfaceOperation {
+    pub details: NativeSurfaceOperationDetails,
+}
+
+/// Describes the source surface information for a tile to be composited. This
+/// is the analog of the TileSurface type, with target surface information
+/// resolved such that it can be used by the renderer.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone)]
+pub enum CompositeTileSurface {
+    Texture {
+        surface: ResolvedSurfaceTexture,
+    },
+    Color {
+        color: ColorF,
+    },
+    Clear,
+    ExternalSurface {
+        external_surface_index: ResolvedExternalSurfaceIndex,
+    },
+}
+
+/// The surface format for a tile being composited.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum CompositeSurfaceFormat {
+    Rgba,
+    Yuv,
+}
+
+bitflags! {
+    /// Optional features that can be opted-out of when compositing,
+    /// possibly allowing a fast path to be selected.
+    pub struct CompositeFeatures: u8 {
+        // UV coordinates do not require clamping, for example because the
+        // entire texture is being composited.
+        const NO_UV_CLAMP = 1 << 0;
+        // The texture sample should not be modulated by a specified color.
+        const NO_COLOR_MODULATION = 1 << 1;
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum TileKind {
+    Opaque,
+    Alpha,
+    Clear,
+}
+
+// Index in to the compositor transforms stored in `CompositeState`
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone)]
+pub struct CompositorTransformIndex(usize);
+
+impl CompositorTransformIndex {
+    pub const INVALID: CompositorTransformIndex = CompositorTransformIndex(!0);
+}
+
+/// Describes the geometry and surface of a tile to be composited
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone)]
+pub struct CompositeTile {
+    pub surface: CompositeTileSurface,
+    pub local_rect: PictureRect,
+    pub local_valid_rect: PictureRect,
+    pub local_dirty_rect: PictureRect,
+    pub device_clip_rect: DeviceRect,
+    pub z_id: ZBufferId,
+    pub kind: TileKind,
+    pub transform_index: CompositorTransformIndex,
+}
+
+pub fn tile_kind(surface: &CompositeTileSurface, is_opaque: bool) -> TileKind {
+    match surface {
+        // Color tiles are, by definition, opaque. We might support non-opaque color
+        // tiles if we ever find pages that have a lot of these.
+        CompositeTileSurface::Color { .. } => TileKind::Opaque,
+        // Clear tiles have a special bucket
+        CompositeTileSurface::Clear => TileKind::Clear,
+        CompositeTileSurface::Texture { .. }
+        | CompositeTileSurface::ExternalSurface { .. } => {
+            // Texture surfaces get bucketed by opaque/alpha, for z-rejection
+            // on the Draw compositor mode.
+            if is_opaque {
+                TileKind::Opaque
+            } else {
+                TileKind::Alpha
+            }
+        }
+    }
+}
+
+pub enum ExternalSurfaceDependency {
+    Yuv {
+        image_dependencies: [ImageDependency; 3],
+        color_space: YuvRangedColorSpace,
+        format: YuvFormat,
+        channel_bit_depth: u32,
+    },
+    Rgb {
+        image_dependency: ImageDependency,
+    },
+}
+
+/// Describes information about drawing a primitive as a compositor surface.
+/// For now, we support only YUV images as compositor surfaces, but in future
+/// this will also support RGBA images.
+pub struct ExternalSurfaceDescriptor {
+    // Normalized rectangle of this surface in local coordinate space
+    // TODO(gw): Fix up local_rect unit kinds in ExternalSurfaceDescriptor (many flow on effects)
+    pub local_surface_size: LayoutSize,
+    pub local_rect: PictureRect,
+    pub local_clip_rect: PictureRect,
+    pub clip_rect: DeviceRect,
+    pub transform_index: CompositorTransformIndex,
+    pub image_rendering: ImageRendering,
+    pub z_id: ZBufferId,
+    pub dependency: ExternalSurfaceDependency,
+    /// If native compositing is enabled, the native compositor surface handle.
+    /// Otherwise, this will be None
+    pub native_surface_id: Option<NativeSurfaceId>,
+    /// If the native surface needs to be updated, this will contain the size
+    /// of the native surface as Some(size). If not dirty, this is None.
+    pub update_params: Option<DeviceIntSize>,
+}
+
+/// Information about a plane in a YUV or RGB surface.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone)]
+pub struct ExternalPlaneDescriptor {
+    pub texture: TextureSource,
+    pub uv_rect: TexelRect,
+}
+
+impl ExternalPlaneDescriptor {
+    fn invalid() -> Self {
+        ExternalPlaneDescriptor {
+            texture: TextureSource::Invalid,
+            uv_rect: TexelRect::invalid(),
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct ResolvedExternalSurfaceIndex(pub usize);
+
+impl ResolvedExternalSurfaceIndex {
+    pub const INVALID: ResolvedExternalSurfaceIndex = ResolvedExternalSurfaceIndex(usize::MAX);
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum ResolvedExternalSurfaceColorData {
+    Yuv {
+        // YUV specific information
+        image_dependencies: [ImageDependency; 3],
+        planes: [ExternalPlaneDescriptor; 3],
+        color_space: YuvRangedColorSpace,
+        format: YuvFormat,
+        channel_bit_depth: u32,
+    },
+    Rgb {
+        image_dependency: ImageDependency,
+        plane: ExternalPlaneDescriptor,
+    },
+}
+
+/// An ExternalSurfaceDescriptor that has had image keys
+/// resolved to texture handles. This contains all the
+/// information that the compositor step in renderer
+/// needs to know.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ResolvedExternalSurface {
+    pub color_data: ResolvedExternalSurfaceColorData,
+    pub image_buffer_kind: ImageBufferKind,
+    // Update information for a native surface if it's dirty
+    pub update_params: Option<(NativeSurfaceId, DeviceIntSize)>,
+}
+
+/// Public interface specified in `WebRenderOptions` that configures
+/// how WR compositing will operate.
+pub enum CompositorConfig {
+    /// Let WR draw tiles via normal batching. This requires no special OS support.
+    Draw {
+        /// If this is zero, a full screen present occurs at the end of the
+        /// frame. This is the simplest and default mode. If this is non-zero,
+        /// then the operating system supports a form of 'partial present' where
+        /// only dirty regions of the framebuffer need to be updated.
+        max_partial_present_rects: usize,
+        /// If this is true, WR must draw the previous frames' dirty regions when
+        /// doing a partial present. This is used for EGL which requires the front
+        /// buffer to always be fully consistent.
+        draw_previous_partial_present_regions: bool,
+        /// A client provided interface to a compositor handling partial present.
+        /// Required if webrender must query the backbuffer's age.
+        partial_present: Option<Box<dyn PartialPresentCompositor>>,
+    },
+    /// Use a native OS compositor to draw tiles. This requires clients to implement
+    /// the Compositor trait, but can be significantly more power efficient on operating
+    /// systems that support it.
+    Native {
+        /// A client provided interface to a native / OS compositor.
+        compositor: Box<dyn Compositor>,
+    }
+}
+
+impl CompositorConfig {
+    pub fn compositor(&mut self) -> Option<&mut Box<dyn Compositor>> {
+        match self {
+            CompositorConfig::Native { ref mut compositor, .. } => {
+                Some(compositor)
+            }
+            CompositorConfig::Draw { .. } => {
+                None
+            }
+        }
+    }
+
+    pub fn partial_present(&mut self) -> Option<&mut Box<dyn PartialPresentCompositor>> {
+        match self {
+            CompositorConfig::Native { .. } => {
+                None
+            }
+            CompositorConfig::Draw { ref mut partial_present, .. } => {
+                partial_present.as_mut()
+            }
+        }
+    }
+
+}
+
+impl Default for CompositorConfig {
+    /// Default compositor config is full present without partial present.
+    fn default() -> Self {
+        CompositorConfig::Draw {
+            max_partial_present_rects: 0,
+            draw_previous_partial_present_regions: false,
+            partial_present: None,
+        }
+    }
+}
+
+/// This is a representation of `CompositorConfig` without the `Compositor` trait
+/// present. This allows it to be freely copied to other threads, such as the render
+/// backend where the frame builder can access it.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum CompositorKind {
+    /// WR handles compositing via drawing.
+    Draw {
+        /// Partial present support.
+        max_partial_present_rects: usize,
+        /// Draw previous regions when doing partial present.
+        draw_previous_partial_present_regions: bool,
+    },
+    /// Native OS compositor.
+    Native {
+        /// The capabilities of the underlying platform.
+        capabilities: CompositorCapabilities,
+    },
+}
+
+impl Default for CompositorKind {
+    /// Default compositor config is full present without partial present.
+    fn default() -> Self {
+        CompositorKind::Draw {
+            max_partial_present_rects: 0,
+            draw_previous_partial_present_regions: false,
+        }
+    }
+}
+
+impl CompositorKind {
+    pub fn get_virtual_surface_size(&self) -> i32 {
+        match self {
+            CompositorKind::Draw { .. } => 0,
+            CompositorKind::Native { capabilities, .. } => capabilities.virtual_surface_size,
+        }
+    }
+
+    pub fn should_redraw_on_invalidation(&self) -> bool {
+        match self {
+            CompositorKind::Draw { max_partial_present_rects, .. } => {
+                // When partial present is enabled, we need to force redraw.
+                *max_partial_present_rects > 0
+            }
+            CompositorKind::Native { capabilities, .. } => capabilities.redraw_on_invalidation,
+        }
+    }
+}
+
+/// The backing surface kind for a tile. Same as `TileSurface`, minus
+/// the texture cache handles, visibility masks etc.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(PartialEq, Clone)]
+pub enum TileSurfaceKind {
+    Texture,
+    Color {
+        color: ColorF,
+    },
+    Clear,
+}
+
+impl From<&TileSurface> for TileSurfaceKind {
+    fn from(surface: &TileSurface) -> Self {
+        match surface {
+            TileSurface::Texture { .. } => TileSurfaceKind::Texture,
+            TileSurface::Color { color } => TileSurfaceKind::Color { color: *color },
+            TileSurface::Clear => TileSurfaceKind::Clear,
+        }
+    }
+}
+
+/// Describes properties that identify a tile composition uniquely.
+/// The backing surface for this tile.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(PartialEq, Clone)]
+pub struct CompositeTileDescriptor {
+    pub tile_id: TileId,
+    pub surface_kind: TileSurfaceKind,
+}
+
+/// Describes the properties that identify a surface composition uniquely.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(PartialEq, Clone)]
+pub struct CompositeSurfaceDescriptor {
+    pub surface_id: Option<NativeSurfaceId>,
+    pub clip_rect: DeviceRect,
+    pub transform: CompositorSurfaceTransform,
+    // A list of image keys and generations that this compositor surface
+    // depends on. This avoids composites being skipped when the only
+    // thing that has changed is the generation of an compositor surface
+    // image dependency.
+    pub image_dependencies: [ImageDependency; 3],
+    pub image_rendering: ImageRendering,
+    // List of the surface information for each tile added to this virtual surface
+    pub tile_descriptors: Vec<CompositeTileDescriptor>,
+}
+
+/// Describes surface properties used to composite a frame. This
+/// is used to compare compositions between frames.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(PartialEq, Clone)]
+pub struct CompositeDescriptor {
+    pub surfaces: Vec<CompositeSurfaceDescriptor>,
+    pub external_surfaces_rect: DeviceRect,
+}
+
+impl CompositeDescriptor {
+    /// Construct an empty descriptor.
+    pub fn empty() -> Self {
+        CompositeDescriptor {
+            surfaces: Vec::new(),
+            external_surfaces_rect: DeviceRect::zero(),
+        }
+    }
+}
+
+pub struct CompositeStatePreallocator {
+    tiles: Preallocator,
+    external_surfaces: Preallocator,
+    occluders: Preallocator,
+    occluders_events: Preallocator,
+    occluders_active: Preallocator,
+    descriptor_surfaces: Preallocator,
+}
+
+impl CompositeStatePreallocator {
+    pub fn record(&mut self, state: &CompositeState) {
+        self.tiles.record_vec(&state.tiles);
+        self.external_surfaces.record_vec(&state.external_surfaces);
+        self.occluders.record_vec(&state.occluders.occluders);
+        self.occluders_events.record_vec(&state.occluders.events);
+        self.occluders_active.record_vec(&state.occluders.active);
+        self.descriptor_surfaces.record_vec(&state.descriptor.surfaces);
+    }
+
+    pub fn preallocate(&self, state: &mut CompositeState) {
+        self.tiles.preallocate_vec(&mut state.tiles);
+        self.external_surfaces.preallocate_vec(&mut state.external_surfaces);
+        self.occluders.preallocate_vec(&mut state.occluders.occluders);
+        self.occluders_events.preallocate_vec(&mut state.occluders.events);
+        self.occluders_active.preallocate_vec(&mut state.occluders.active);
+        self.descriptor_surfaces.preallocate_vec(&mut state.descriptor.surfaces);
+    }
+}
+
+impl Default for CompositeStatePreallocator {
+    fn default() -> Self {
+        CompositeStatePreallocator {
+            tiles: Preallocator::new(56),
+            external_surfaces: Preallocator::new(0),
+            occluders: Preallocator::new(16),
+            occluders_events: Preallocator::new(32),
+            occluders_active: Preallocator::new(16),
+            descriptor_surfaces: Preallocator::new(8),
+        }
+    }
+}
+
+/// A transform for either a picture cache or external compositor surface, stored
+/// in the `CompositeState` structure. This allows conversions from local rects
+/// to raster or device rects, without access to the spatial tree (e.g. during
+/// the render step where dirty rects are calculated). Since we know that we only
+/// handle scale and offset transforms for these types, we can store a single
+/// ScaleOffset rather than 4x4 matrix here for efficiency.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CompositorTransform {
+    // Map from local rect of a composite tile to the real backing surface coords
+    local_to_surface: ScaleOffset,
+    // Map from surface coords to the final device space position
+    surface_to_device: ScaleOffset,
+    // Combined local -> surface -> device transform
+    local_to_device: ScaleOffset,
+}
+
+/// The list of tiles to be drawn this frame
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CompositeState {
+    // TODO(gw): Consider splitting up CompositeState into separate struct types depending
+    //           on the selected compositing mode. Many of the fields in this state struct
+    //           are only applicable to either Native or Draw compositing mode.
+    /// List of tiles to be drawn by the Draw compositor.
+    /// Tiles are accumulated in this vector and sorted from front to back at the end of the
+    /// frame.
+    pub tiles: Vec<CompositeTile>,
+    /// List of primitives that were promoted to be compositor surfaces.
+    pub external_surfaces: Vec<ResolvedExternalSurface>,
+    /// Used to generate z-id values for tiles in the Draw compositor mode.
+    pub z_generator: ZBufferIdGenerator,
+    // If false, we can't rely on the dirty rects in the CompositeTile
+    // instances. This currently occurs during a scroll event, as a
+    // signal to refresh the whole screen. This is only a temporary
+    // measure until we integrate with OS compositors. In the meantime
+    // it gives us the ability to partial present for any non-scroll
+    // case as a simple win (e.g. video, animation etc).
+    pub dirty_rects_are_valid: bool,
+    /// The kind of compositor for picture cache tiles (e.g. drawn by WR, or OS compositor)
+    pub compositor_kind: CompositorKind,
+    /// List of registered occluders
+    pub occluders: Occluders,
+    /// Description of the surfaces and properties that are being composited.
+    pub descriptor: CompositeDescriptor,
+    /// Debugging information about the state of the pictures cached for regression testing.
+    pub picture_cache_debug: PictureCacheDebugInfo,
+    /// List of registered transforms used by picture cache or external surfaces
+    pub transforms: Vec<CompositorTransform>,
+    /// Whether we have low quality pinch zoom enabled
+    low_quality_pinch_zoom: bool,
+}
+
+impl CompositeState {
+    /// Construct a new state for compositing picture tiles. This is created
+    /// during each frame construction and passed to the renderer.
+    pub fn new(
+        compositor_kind: CompositorKind,
+        max_depth_ids: i32,
+        dirty_rects_are_valid: bool,
+        low_quality_pinch_zoom: bool,
+    ) -> Self {
+        CompositeState {
+            tiles: Vec::new(),
+            z_generator: ZBufferIdGenerator::new(max_depth_ids),
+            dirty_rects_are_valid,
+            compositor_kind,
+            occluders: Occluders::new(),
+            descriptor: CompositeDescriptor::empty(),
+            external_surfaces: Vec::new(),
+            picture_cache_debug: PictureCacheDebugInfo::new(),
+            transforms: Vec::new(),
+            low_quality_pinch_zoom,
+        }
+    }
+
+    /// Register use of a transform for a picture cache tile or external surface
+    pub fn register_transform(
+        &mut self,
+        local_to_surface: ScaleOffset,
+        surface_to_device: ScaleOffset,
+    ) -> CompositorTransformIndex {
+        let index = CompositorTransformIndex(self.transforms.len());
+
+        let local_to_device = local_to_surface.accumulate(&surface_to_device);
+
+        self.transforms.push(CompositorTransform {
+            local_to_surface,
+            surface_to_device,
+            local_to_device,
+        });
+
+        index
+    }
+
+    /// Calculate the device-space rect of a local compositor surface rect
+    pub fn get_device_rect(
+        &self,
+        local_rect: &PictureRect,
+        transform_index: CompositorTransformIndex,
+    ) -> DeviceRect {
+        let transform = &self.transforms[transform_index.0];
+        transform.local_to_device.map_rect(&local_rect).round()
+    }
+
+    /// Calculate the device-space rect of a local compositor surface rect, normalized
+    /// to the origin of a given point
+    pub fn get_surface_rect<T>(
+        &self,
+        local_sub_rect: &Box2D<f32, T>,
+        local_bounds: &Box2D<f32, T>,
+        transform_index: CompositorTransformIndex,
+    ) -> DeviceRect {
+        let transform = &self.transforms[transform_index.0];
+
+        let surface_bounds = transform.local_to_surface.map_rect(&local_bounds);
+        let surface_rect = transform.local_to_surface.map_rect(&local_sub_rect);
+
+        surface_rect
+            .translate(-surface_bounds.min.to_vector())
+            .round_out()
+            .intersection(&surface_bounds.size().round().into())
+            .unwrap_or_else(DeviceRect::zero)
+    }
+
+    /// Get the local -> device compositor transform
+    pub fn get_device_transform(
+        &self,
+        transform_index: CompositorTransformIndex,
+    ) -> ScaleOffset {
+        let transform = &self.transforms[transform_index.0];
+        transform.local_to_device
+    }
+
+    /// Get the surface -> device compositor transform
+    pub fn get_compositor_transform(
+        &self,
+        transform_index: CompositorTransformIndex,
+    ) -> ScaleOffset {
+        let transform = &self.transforms[transform_index.0];
+        transform.surface_to_device
+    }
+
+    /// Register an occluder during picture cache updates that can be
+    /// used during frame building to occlude tiles.
+    pub fn register_occluder(
+        &mut self,
+        z_id: ZBufferId,
+        rect: WorldRect,
+    ) {
+        let world_rect = rect.round().to_i32();
+
+        self.occluders.push(world_rect, z_id);
+    }
+
+    /// Add a picture cache to be composited
+    pub fn push_surface(
+        &mut self,
+        tile_cache: &TileCacheInstance,
+        device_clip_rect: DeviceRect,
+        resource_cache: &ResourceCache,
+        gpu_cache: &mut GpuCache,
+        deferred_resolves: &mut Vec<DeferredResolve>,
+    ) {
+        let slice_transform = self.get_compositor_transform(tile_cache.transform_index).to_transform();
+
+        let image_rendering = if self.low_quality_pinch_zoom {
+            ImageRendering::Auto
+        } else {
+            ImageRendering::CrispEdges
+        };
+
+        if let Some(backdrop_surface) = &tile_cache.backdrop_surface {
+            // Use the backdrop native surface we created and add that to the composite state.
+            self.descriptor.surfaces.push(
+                CompositeSurfaceDescriptor {
+                    surface_id: Some(backdrop_surface.id),
+                    clip_rect: backdrop_surface.device_rect,
+                    transform: slice_transform,
+                    image_dependencies: [ImageDependency::INVALID; 3],
+                    image_rendering,
+                    tile_descriptors: Vec::new(),
+                }
+            );
+        }
+
+        for sub_slice in &tile_cache.sub_slices {
+            let mut surface_device_rect = DeviceRect::zero();
+
+            for tile in sub_slice.tiles.values() {
+                if !tile.is_visible {
+                    // This can occur when a tile is found to be occluded during frame building.
+                    continue;
+                }
+
+                // Accumulate this tile into the overall surface bounds. This is used below
+                // to clamp the size of the supplied clip rect to a reasonable value.
+                // NOTE: This clip rect must include the device_valid_rect rather than
+                //       the tile device rect. This ensures that in the case of a picture
+                //       cache slice that is smaller than a single tile, the clip rect in
+                //       the composite descriptor will change if the position of that slice
+                //       is changed. Otherwise, WR may conclude that no composite is needed
+                //       if the tile itself was not invalidated due to changing content.
+                //       See bug #1675414 for more detail.
+                surface_device_rect = surface_device_rect.union(&tile.device_valid_rect);
+            }
+
+            // Append the visible tiles from this sub-slice
+            self.tiles.extend_from_slice(&sub_slice.composite_tiles);
+
+            // If the clip rect is too large, it can cause accuracy and correctness problems
+            // for some native compositors (specifically, CoreAnimation in this case). To
+            // work around that, intersect the supplied clip rect with the current bounds
+            // of the native surface, which ensures it is a reasonable size.
+            let surface_clip_rect = device_clip_rect
+                .intersection(&surface_device_rect)
+                .unwrap_or(DeviceRect::zero());
+
+            // Only push tiles if they have valid clip rects.
+            if !surface_clip_rect.is_empty() {
+                // Add opaque surface before any compositor surfaces
+                if !sub_slice.opaque_tile_descriptors.is_empty() {
+                    self.descriptor.surfaces.push(
+                        CompositeSurfaceDescriptor {
+                            surface_id: sub_slice.native_surface.as_ref().map(|s| s.opaque),
+                            clip_rect: surface_clip_rect,
+                            transform: slice_transform,
+                            image_dependencies: [ImageDependency::INVALID; 3],
+                            image_rendering,
+                            tile_descriptors: sub_slice.opaque_tile_descriptors.clone(),
+                        }
+                    );
+                }
+    
+                // Add alpha tiles after opaque surfaces
+                if !sub_slice.alpha_tile_descriptors.is_empty() {
+                    self.descriptor.surfaces.push(
+                        CompositeSurfaceDescriptor {
+                            surface_id: sub_slice.native_surface.as_ref().map(|s| s.alpha),
+                            clip_rect: surface_clip_rect,
+                            transform: slice_transform,
+                            image_dependencies: [ImageDependency::INVALID; 3],
+                            image_rendering,
+                            tile_descriptors: sub_slice.alpha_tile_descriptors.clone(),
+                        }
+                    );
+                }
+            }
+
+            // For each compositor surface that was promoted, build the
+            // information required for the compositor to draw it
+            for compositor_surface in &sub_slice.compositor_surfaces {
+                let external_surface = &compositor_surface.descriptor;
+
+                let clip_rect = external_surface
+                    .clip_rect
+                    .intersection(&device_clip_rect)
+                    .unwrap_or_else(DeviceRect::zero);
+                    
+                // Skip compositor surfaces with empty clip rects.
+                if clip_rect.is_empty() {
+                    continue;
+                }
+
+                let required_plane_count =
+                    match external_surface.dependency {
+                        ExternalSurfaceDependency::Yuv { format, .. } => {
+                            format.get_plane_num()
+                        },
+                        ExternalSurfaceDependency::Rgb { .. } => {
+                            1
+                        }
+                    };
+
+                let mut image_dependencies = [ImageDependency::INVALID; 3];
+
+                for i in 0 .. required_plane_count {
+                    let dependency = match external_surface.dependency {
+                        ExternalSurfaceDependency::Yuv { image_dependencies, .. } => {
+                            image_dependencies[i]
+                        },
+                        ExternalSurfaceDependency::Rgb { image_dependency, .. } => {
+                            image_dependency
+                        }
+                    };
+                    image_dependencies[i] = dependency;
+                }
+
+                // Get a new z_id for each compositor surface, to ensure correct ordering
+                // when drawing with the simple (Draw) compositor, and to schedule compositing
+                // of any required updates into the surfaces.
+                let needs_external_surface_update = match self.compositor_kind {
+                    CompositorKind::Draw { .. } => true,
+                    _ => external_surface.update_params.is_some(),
+                };
+                let external_surface_index = if needs_external_surface_update {
+                    let external_surface_index = self.compute_external_surface_dependencies(
+                        &external_surface,
+                        &image_dependencies,
+                        required_plane_count,
+                        resource_cache,
+                        gpu_cache,
+                        deferred_resolves,
+                    );
+                    if external_surface_index == ResolvedExternalSurfaceIndex::INVALID {
+                        continue;
+                    }
+                    external_surface_index
+                } else {
+                    ResolvedExternalSurfaceIndex::INVALID
+                };
+
+                let surface = CompositeTileSurface::ExternalSurface { external_surface_index };
+                let local_rect = external_surface.local_surface_size.cast_unit().into();
+
+                let tile = CompositeTile {
+                    kind: tile_kind(&surface, compositor_surface.is_opaque),
+                    surface,
+                    local_rect,
+                    local_valid_rect: local_rect,
+                    local_dirty_rect: local_rect,
+                    device_clip_rect: clip_rect,
+                    z_id: external_surface.z_id,
+                    transform_index: external_surface.transform_index,
+                };
+
+                // Add a surface descriptor for each compositor surface. For the Draw
+                // compositor, this is used to avoid composites being skipped by adding
+                // a dependency on the compositor surface external image keys / generations.
+                self.descriptor.surfaces.push(
+                    CompositeSurfaceDescriptor {
+                        surface_id: external_surface.native_surface_id,
+                        clip_rect,
+                        transform: self.get_compositor_transform(external_surface.transform_index).to_transform(),
+                        image_dependencies: image_dependencies,
+                        image_rendering: external_surface.image_rendering,
+                        tile_descriptors: Vec::new(),
+                    }
+                );
+
+                let device_rect =
+                    self.get_device_rect(&local_rect, external_surface.transform_index);
+                self.descriptor.external_surfaces_rect =
+                    self.descriptor.external_surfaces_rect.union(&device_rect);
+
+                self.tiles.push(tile);
+            }
+        }
+    }
+
+    /// Compare this state vs. a previous frame state, and invalidate dirty rects if
+    /// the surface count has changed
+    pub fn update_dirty_rect_validity(
+        &mut self,
+        old_descriptor: &CompositeDescriptor,
+    ) {
+        // TODO(gw): Make this more robust in other cases - there are other situations where
+        //           the surface count may be the same but we still need to invalidate the
+        //           dirty rects (e.g. if the surface ordering changed, or the external
+        //           surface itself is animated?)
+
+        if old_descriptor.surfaces.len() != self.descriptor.surfaces.len() {
+            self.dirty_rects_are_valid = false;
+            return;
+        }
+
+        // The entire area of external surfaces are treated as dirty, however,
+        // if a surface has moved or shrunk that is no longer valid, as we
+        // additionally need to ensure the area the surface used to occupy is
+        // composited.
+        if !self
+            .descriptor
+            .external_surfaces_rect
+            .contains_box(&old_descriptor.external_surfaces_rect)
+        {
+            self.dirty_rects_are_valid = false;
+            return;
+        }
+    }
+
+    fn compute_external_surface_dependencies(
+        &mut self,
+        external_surface: &ExternalSurfaceDescriptor,
+        image_dependencies: &[ImageDependency; 3],
+        required_plane_count: usize,
+        resource_cache: &ResourceCache,
+        gpu_cache: &mut GpuCache,
+        deferred_resolves: &mut Vec<DeferredResolve>,
+    ) -> ResolvedExternalSurfaceIndex {
+        let mut planes = [
+            ExternalPlaneDescriptor::invalid(),
+            ExternalPlaneDescriptor::invalid(),
+            ExternalPlaneDescriptor::invalid(),
+        ];
+
+        let mut valid_plane_count = 0;
+        for i in 0 .. required_plane_count {
+            let request = ImageRequest {
+                key: image_dependencies[i].key,
+                rendering: external_surface.image_rendering,
+                tile: None,
+            };
+
+            let cache_item = resolve_image(
+                request,
+                resource_cache,
+                gpu_cache,
+                deferred_resolves,
+            );
+
+            if cache_item.texture_id != TextureSource::Invalid {
+                valid_plane_count += 1;
+                let plane = &mut planes[i];
+                *plane = ExternalPlaneDescriptor {
+                    texture: cache_item.texture_id,
+                    uv_rect: cache_item.uv_rect.into(),
+                };
+            }
+        }
+
+        // Check if there are valid images added for each YUV plane
+        if valid_plane_count < required_plane_count {
+            warn!("Warnings: skip a YUV/RGB compositor surface, found {}/{} valid images",
+                valid_plane_count,
+                required_plane_count,
+            );
+            return ResolvedExternalSurfaceIndex::INVALID;
+        }
+
+        let external_surface_index = ResolvedExternalSurfaceIndex(self.external_surfaces.len());
+
+        // If the external surface descriptor reports that the native surface
+        // needs to be updated, create an update params tuple for the renderer
+        // to use.
+        let update_params = external_surface.update_params.map(|surface_size| {
+            (
+                external_surface.native_surface_id.expect("bug: no native surface!"),
+                surface_size
+            )
+        });
+
+        match external_surface.dependency {
+            ExternalSurfaceDependency::Yuv{ color_space, format, channel_bit_depth, .. } => {
+
+                let image_buffer_kind = planes[0].texture.image_buffer_kind();
+
+                self.external_surfaces.push(ResolvedExternalSurface {
+                    color_data: ResolvedExternalSurfaceColorData::Yuv {
+                        image_dependencies: *image_dependencies,
+                        planes,
+                        color_space,
+                        format,
+                        channel_bit_depth,
+                        },
+                    image_buffer_kind,
+                    update_params,
+                });
+            },
+            ExternalSurfaceDependency::Rgb { .. } => {
+                let image_buffer_kind = planes[0].texture.image_buffer_kind();
+
+                self.external_surfaces.push(ResolvedExternalSurface {
+                    color_data: ResolvedExternalSurfaceColorData::Rgb {
+                        image_dependency: image_dependencies[0],
+                        plane: planes[0],
+                    },
+                    image_buffer_kind,
+                    update_params,
+                });
+            },
+        }
+        external_surface_index
+    }
+
+    pub fn end_frame(&mut self) {
+        // Sort tiles from front to back.
+        self.tiles.sort_by_key(|tile| tile.z_id.0);
+    }
+}
+
+/// An arbitrary identifier for a native (OS compositor) surface
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct NativeSurfaceId(pub u64);
+
+impl NativeSurfaceId {
+    /// A special id for the native surface that is used for debug / profiler overlays.
+    pub const DEBUG_OVERLAY: NativeSurfaceId = NativeSurfaceId(u64::MAX);
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct NativeTileId {
+    pub surface_id: NativeSurfaceId,
+    pub x: i32,
+    pub y: i32,
+}
+
+impl NativeTileId {
+    /// A special id for the native surface that is used for debug / profiler overlays.
+    pub const DEBUG_OVERLAY: NativeTileId = NativeTileId {
+        surface_id: NativeSurfaceId::DEBUG_OVERLAY,
+        x: 0,
+        y: 0,
+    };
+}
+
+/// Information about a bound surface that the native compositor
+/// returns to WR.
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct NativeSurfaceInfo {
+    /// An offset into the surface that WR should draw. Some compositing
+    /// implementations (notably, DirectComposition) use texture atlases
+    /// when the surface sizes are small. In this case, an offset can
+    /// be returned into the larger texture where WR should draw. This
+    /// can be (0, 0) if texture atlases are not used.
+    pub origin: DeviceIntPoint,
+    /// The ID of the FBO that WR should bind to, in order to draw to
+    /// the bound surface. On Windows (ANGLE) this will always be 0,
+    /// since creating a p-buffer sets the default framebuffer to
+    /// be the DirectComposition surface. On Mac, this will be non-zero,
+    /// since it identifies the IOSurface that has been bound to draw to.
+    // TODO(gw): This may need to be a larger / different type for WR
+    //           backends that are not GL.
+    pub fbo_id: u32,
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CompositorCapabilities {
+    /// The virtual surface size used by the underlying platform.
+    pub virtual_surface_size: i32,
+    /// Whether the compositor requires redrawing on invalidation.
+    pub redraw_on_invalidation: bool,
+    /// The maximum number of dirty rects that can be provided per compositor
+    /// surface update. If this is zero, the entire compositor surface for
+    /// a given tile will be drawn if it's dirty.
+    pub max_update_rects: usize,
+    /// Whether or not this compositor will create surfaces for backdrops.
+    pub supports_surface_for_backdrop: bool,
+}
+
+impl Default for CompositorCapabilities {
+    fn default() -> Self {
+        // The default set of compositor capabilities for a given platform.
+        // These should only be modified if a compositor diverges specifically
+        // from the default behavior so that compositors don't have to track
+        // which changes to this structure unless necessary.
+        CompositorCapabilities {
+            virtual_surface_size: 0,
+            redraw_on_invalidation: false,
+            // Assume compositors can do at least partial update of surfaces. If not,
+            // the native compositor should override this to be 0.
+            max_update_rects: 1,
+            supports_surface_for_backdrop: false,
+        }
+    }
+}
+
+#[repr(C)]
+#[derive(Copy, Clone, Debug)]
+pub enum WindowSizeMode {
+    Normal,
+    Minimized,
+    Maximized,
+    Fullscreen,
+    Invalid,
+}
+
+#[repr(C)]
+#[derive(Copy, Clone, Debug)]
+pub struct WindowVisibility {
+    pub size_mode: WindowSizeMode,
+    pub is_fully_occluded: bool,
+}
+
+impl Default for WindowVisibility {
+    fn default() -> Self {
+        WindowVisibility {
+            size_mode: WindowSizeMode::Normal,
+            is_fully_occluded: false,
+        }
+    }
+}
+
+/// The transform type to apply to Compositor surfaces.
+// TODO: Should transform from CompositorSurfacePixel instead, but this requires a cleanup of the
+// Compositor API to use CompositorSurface-space geometry instead of Device-space where necessary
+// to avoid a bunch of noisy cast_unit calls and make it actually type-safe. May be difficult due
+// to pervasive use of Device-space nomenclature inside WR.
+// pub struct CompositorSurfacePixel;
+// pub type CompositorSurfaceTransform = Transform3D<f32, CompositorSurfacePixel, DevicePixel>;
+pub type CompositorSurfaceTransform = Transform3D<f32, DevicePixel, DevicePixel>;
+
+/// Defines an interface to a native (OS level) compositor. If supplied
+/// by the client application, then picture cache slices will be
+/// composited by the OS compositor, rather than drawn via WR batches.
+pub trait Compositor {
+    /// Create a new OS compositor surface with the given properties.
+    fn create_surface(
+        &mut self,
+        id: NativeSurfaceId,
+        virtual_offset: DeviceIntPoint,
+        tile_size: DeviceIntSize,
+        is_opaque: bool,
+    );
+
+    /// Create a new OS compositor surface that can be used with an
+    /// existing ExternalImageId, instead of being drawn to by WebRender.
+    /// Surfaces created by this can only be used with attach_external_image,
+    /// and not create_tile/destroy_tile/bind/unbind.
+    fn create_external_surface(
+        &mut self,
+        id: NativeSurfaceId,
+        is_opaque: bool,
+    );
+
+    /// Create a new OS backdrop surface that will display a color.
+    fn create_backdrop_surface(
+        &mut self,
+        id: NativeSurfaceId,
+        color: ColorF,
+    );
+
+    /// Destroy the surface with the specified id. WR may call this
+    /// at any time the surface is no longer required (including during
+    /// renderer deinit). It's the responsibility of the embedder
+    /// to ensure that the surface is only freed once the GPU is
+    /// no longer using the surface (if this isn't already handled
+    /// by the operating system).
+    fn destroy_surface(
+        &mut self,
+        id: NativeSurfaceId,
+    );
+
+    /// Create a new OS compositor tile with the given properties.
+    fn create_tile(
+        &mut self,
+        id: NativeTileId,
+    );
+
+    /// Destroy an existing compositor tile.
+    fn destroy_tile(
+        &mut self,
+        id: NativeTileId,
+    );
+
+    /// Attaches an ExternalImageId to an OS compositor surface created
+    /// by create_external_surface, and uses that as the contents of
+    /// the surface. It is expected that a single surface will have
+    /// many different images attached (like one for each video frame).
+    fn attach_external_image(
+        &mut self,
+        id: NativeSurfaceId,
+        external_image: ExternalImageId
+    );
+
+    /// Mark a tile as invalid before any surfaces are queued for
+    /// composition and before it is updated with bind. This is useful
+    /// for early composition, allowing for dependency tracking of which
+    /// surfaces can be composited early while others are still updating.
+    fn invalidate_tile(
+        &mut self,
+        _id: NativeTileId,
+        _valid_rect: DeviceIntRect
+    ) {}
+
+    /// Bind this surface such that WR can issue OpenGL commands
+    /// that will target the surface. Returns an (x, y) offset
+    /// where WR should draw into the surface. This can be set
+    /// to (0, 0) if the OS doesn't use texture atlases. The dirty
+    /// rect is a local surface rect that specifies which part
+    /// of the surface needs to be updated. If max_update_rects
+    /// in CompositeConfig is 0, this will always be the size
+    /// of the entire surface. The returned offset is only
+    /// relevant to compositors that store surfaces in a texture
+    /// atlas (that is, WR expects that the dirty rect doesn't
+    /// affect the coordinates of the returned origin).
+    fn bind(
+        &mut self,
+        id: NativeTileId,
+        dirty_rect: DeviceIntRect,
+        valid_rect: DeviceIntRect,
+    ) -> NativeSurfaceInfo;
+
+    /// Unbind the surface. This is called by WR when it has
+    /// finished issuing OpenGL commands on the current surface.
+    fn unbind(
+        &mut self,
+    );
+
+    /// Begin the frame
+    fn begin_frame(&mut self);
+
+    /// Add a surface to the visual tree to be composited. Visuals must
+    /// be added every frame, between the begin/end transaction call. The
+    /// z-order of the surfaces is determined by the order they are added
+    /// to the visual tree.
+    // TODO(gw): Adding visuals every frame makes the interface simple,
+    //           but may have performance implications on some compositors?
+    //           We might need to change the interface to maintain a visual
+    //           tree that can be mutated?
+    // TODO(gw): We might need to add a concept of a hierachy in future.
+    fn add_surface(
+        &mut self,
+        id: NativeSurfaceId,
+        transform: CompositorSurfaceTransform,
+        clip_rect: DeviceIntRect,
+        image_rendering: ImageRendering,
+    );
+
+    /// Notify the compositor that all tiles have been invalidated and all
+    /// native surfaces have been added, thus it is safe to start compositing
+    /// valid surfaces. The dirty rects array allows native compositors that
+    /// support partial present to skip copying unchanged areas.
+    /// Optionally provides a set of rectangles for the areas known to be
+    /// opaque, this is currently only computed if the caller is SwCompositor.
+    fn start_compositing(
+        &mut self,
+        _clear_color: ColorF,
+        _dirty_rects: &[DeviceIntRect],
+        _opaque_rects: &[DeviceIntRect],
+    ) {}
+
+    /// Commit any changes in the compositor tree for this frame. WR calls
+    /// this once when all surface and visual updates are complete, to signal
+    /// that the OS composite transaction should be applied.
+    fn end_frame(&mut self);
+
+    /// Enable/disable native compositor usage
+    fn enable_native_compositor(&mut self, enable: bool);
+
+    /// Safely deinitialize any remaining resources owned by the compositor.
+    fn deinit(&mut self);
+
+    /// Get the capabilities struct for this compositor. This is used to
+    /// specify what features a compositor supports, depending on the
+    /// underlying platform
+    fn get_capabilities(&self) -> CompositorCapabilities;
+
+    fn get_window_visibility(&self) -> WindowVisibility;
+}
+
+/// Information about the underlying data buffer of a mapped tile.
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct MappedTileInfo {
+    pub data: *mut c_void,
+    pub stride: i32,
+}
+
+/// Descriptor for a locked surface that will be directly composited by SWGL.
+#[repr(C)]
+pub struct SWGLCompositeSurfaceInfo {
+    /// The number of YUV planes in the surface. 0 indicates non-YUV BGRA.
+    /// 1 is interleaved YUV. 2 is NV12. 3 is planar YUV.
+    pub yuv_planes: u32,
+    /// Textures for planes of the surface, or 0 if not applicable.
+    pub textures: [u32; 3],
+    /// Color space of surface if using a YUV format.
+    pub color_space: YuvRangedColorSpace,
+    /// Color depth of surface if using a YUV format.
+    pub color_depth: ColorDepth,
+    /// The actual source surface size before transformation.
+    pub size: DeviceIntSize,
+}
+
+/// A Compositor variant that supports mapping tiles into CPU memory.
+pub trait MappableCompositor: Compositor {
+    /// Map a tile's underlying buffer so it can be used as the backing for
+    /// a SWGL framebuffer. This is intended to be a replacement for 'bind'
+    /// in any compositors that intend to directly interoperate with SWGL
+    /// while supporting some form of native layers.
+    fn map_tile(
+        &mut self,
+        id: NativeTileId,
+        dirty_rect: DeviceIntRect,
+        valid_rect: DeviceIntRect,
+    ) -> Option<MappedTileInfo>;
+
+    /// Unmap a tile that was was previously mapped via map_tile to signal
+    /// that SWGL is done rendering to the buffer.
+    fn unmap_tile(&mut self);
+
+    fn lock_composite_surface(
+        &mut self,
+        ctx: *mut c_void,
+        external_image_id: ExternalImageId,
+        composite_info: *mut SWGLCompositeSurfaceInfo,
+    ) -> bool;
+    fn unlock_composite_surface(&mut self, ctx: *mut c_void, external_image_id: ExternalImageId);
+}
+
+/// Defines an interface to a non-native (application-level) Compositor which handles
+/// partial present. This is required if webrender must query the backbuffer's age.
+/// TODO: Use the Compositor trait for native and non-native compositors, and integrate
+/// this functionality there.
+pub trait PartialPresentCompositor {
+    /// Allows webrender to specify the total region that will be rendered to this frame,
+    /// ie the frame's dirty region and some previous frames' dirty regions, if applicable
+    /// (calculated using the buffer age). Must be called before anything has been rendered
+    /// to the main framebuffer.
+    fn set_buffer_damage_region(&mut self, rects: &[DeviceIntRect]);
+}
+
+/// Information about an opaque surface used to occlude tiles.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct Occluder {
+    z_id: ZBufferId,
+    world_rect: WorldIntRect,
+}
+
+// Whether this event is the start or end of a rectangle
+#[derive(Debug)]
+enum OcclusionEventKind {
+    Begin,
+    End,
+}
+
+// A list of events on the y-axis, with the rectangle range that it affects on the x-axis
+#[derive(Debug)]
+struct OcclusionEvent {
+    y: i32,
+    x_range: ops::Range<i32>,
+    kind: OcclusionEventKind,
+}
+
+impl OcclusionEvent {
+    fn new(y: i32, kind: OcclusionEventKind, x0: i32, x1: i32) -> Self {
+        OcclusionEvent {
+            y,
+            x_range: ops::Range {
+                start: x0,
+                end: x1,
+            },
+            kind,
+        }
+    }
+}
+
+/// List of registered occluders.
+///
+/// Also store a couple of vectors for reuse.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct Occluders {
+    occluders: Vec<Occluder>,
+
+    // The two vectors below are kept to avoid unnecessary reallocations in area().
+
+    #[cfg_attr(feature = "serde", serde(skip))]
+    events: Vec<OcclusionEvent>,
+
+    #[cfg_attr(feature = "serde", serde(skip))]
+    active: Vec<ops::Range<i32>>,
+}
+
+impl Occluders {
+    fn new() -> Self {
+        Occluders {
+            occluders: Vec::new(),
+            events: Vec::new(),
+            active: Vec::new(),
+        }
+    }
+
+    fn push(&mut self, world_rect: WorldIntRect, z_id: ZBufferId) {
+        self.occluders.push(Occluder { world_rect, z_id });
+    }
+
+    /// Returns true if a tile with the specified rectangle and z_id
+    /// is occluded by an opaque surface in front of it.
+    pub fn is_tile_occluded(
+        &mut self,
+        z_id: ZBufferId,
+        world_rect: WorldRect,
+    ) -> bool {
+        // It's often the case that a tile is only occluded by considering multiple
+        // picture caches in front of it (for example, the background tiles are
+        // often occluded by a combination of the content slice + the scrollbar slices).
+
+        // The basic algorithm is:
+        //    For every occluder:
+        //      If this occluder is in front of the tile we are querying:
+        //         Clip the occluder rectangle to the query rectangle.
+        //    Calculate the total non-overlapping area of those clipped occluders.
+        //    If the cumulative area of those occluders is the same as the area of the query tile,
+        //       Then the entire tile must be occluded and can be skipped during rasterization and compositing.
+
+        // Get the reference area we will compare against.
+        let world_rect = world_rect.round().to_i32();
+        let ref_area = world_rect.area();
+
+        // Calculate the non-overlapping area of the valid occluders.
+        let cover_area = self.area(z_id, &world_rect);
+        debug_assert!(cover_area <= ref_area);
+
+        // Check if the tile area is completely covered
+        ref_area == cover_area
+    }
+
+    /// Return the total area covered by a set of occluders, accounting for
+    /// overlapping areas between those rectangles.
+    fn area(
+        &mut self,
+        z_id: ZBufferId,
+        clip_rect: &WorldIntRect,
+    ) -> i32 {
+        // This implementation is based on the article https://leetcode.com/articles/rectangle-area-ii/.
+        // This is not a particularly efficient implementation (it skips building segment trees), however
+        // we typically use this where the length of the rectangles array is < 10, so simplicity is more important.
+
+        self.events.clear();
+        self.active.clear();
+
+        let mut area = 0;
+
+        // Step through each rectangle and build the y-axis event list
+        for occluder in &self.occluders {
+            // Only consider occluders in front of this rect
+            if occluder.z_id.0 < z_id.0 {
+                // Clip the source rect to the rectangle we care about, since we only
+                // want to record area for the tile we are comparing to.
+                if let Some(rect) = occluder.world_rect.intersection(clip_rect) {
+                    let x0 = rect.min.x;
+                    let x1 = x0 + rect.width();
+                    self.events.push(OcclusionEvent::new(rect.min.y, OcclusionEventKind::Begin, x0, x1));
+                    self.events.push(OcclusionEvent::new(rect.min.y + rect.height(), OcclusionEventKind::End, x0, x1));
+                }
+            }
+        }
+
+        // If we didn't end up with any valid events, the area must be 0
+        if self.events.is_empty() {
+            return 0;
+        }
+
+        // Sort the events by y-value
+        self.events.sort_by_key(|e| e.y);
+        let mut cur_y = self.events[0].y;
+
+        // Step through each y interval
+        for event in &self.events {
+            // This is the dimension of the y-axis we are accumulating areas for
+            let dy = event.y - cur_y;
+
+            // If we have active events covering x-ranges in this y-interval, process them
+            if dy != 0 && !self.active.is_empty() {
+                assert!(dy > 0);
+
+                // Step through the x-ranges, ordered by x0 of each event
+                self.active.sort_by_key(|i| i.start);
+                let mut query = 0;
+                let mut cur = self.active[0].start;
+
+                // Accumulate the non-overlapping x-interval that contributes to area for this y-interval.
+                for interval in &self.active {
+                    cur = interval.start.max(cur);
+                    query += (interval.end - cur).max(0);
+                    cur = cur.max(interval.end);
+                }
+
+                // Accumulate total area for this y-interval
+                area += query * dy;
+            }
+
+            // Update the active events list
+            match event.kind {
+                OcclusionEventKind::Begin => {
+                    self.active.push(event.x_range.clone());
+                }
+                OcclusionEventKind::End => {
+                    let index = self.active.iter().position(|i| *i == event.x_range).unwrap();
+                    self.active.remove(index);
+                }
+            }
+
+            cur_y = event.y;
+        }
+
+        area
+    }
+}
diff --git a/gfx/wr/webrender/src/compositor/mod.rs b/gfx/wr/webrender/src/compositor/mod.rs
new file mode 100644
index 0000000000..e517f22719
--- /dev/null
+++ b/gfx/wr/webrender/src/compositor/mod.rs
@@ -0,0 +1,6 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#[cfg(feature = "sw_compositor")]
+pub mod sw_compositor;
diff --git a/gfx/wr/webrender/src/compositor/sw_compositor.rs b/gfx/wr/webrender/src/compositor/sw_compositor.rs
new file mode 100644
index 0000000000..0a5037e6d0
--- /dev/null
+++ b/gfx/wr/webrender/src/compositor/sw_compositor.rs
@@ -0,0 +1,1533 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use gleam::{gl, gl::Gl};
+use std::cell::{Cell, UnsafeCell};
+use std::collections::{hash_map::HashMap, VecDeque};
+use std::ops::{Deref, DerefMut, Range};
+use std::ptr;
+use std::sync::atomic::{AtomicBool, AtomicI8, AtomicPtr, AtomicU32, AtomicU8, Ordering};
+use std::sync::{Arc, Condvar, Mutex, MutexGuard};
+use std::thread;
+use crate::{
+    api::units::*, api::ColorDepth, api::ColorF, api::ExternalImageId, api::ImageRendering, api::YuvRangedColorSpace,
+    Compositor, CompositorCapabilities, CompositorSurfaceTransform, NativeSurfaceId, NativeSurfaceInfo, NativeTileId,
+    profiler, MappableCompositor, SWGLCompositeSurfaceInfo, WindowVisibility,
+};
+
+pub struct SwTile {
+    x: i32,
+    y: i32,
+    fbo_id: u32,
+    color_id: u32,
+    valid_rect: DeviceIntRect,
+    /// Composition of tiles must be ordered such that any tiles that may overlap
+    /// an invalidated tile in an earlier surface only get drawn after that tile
+    /// is actually updated. We store a count of the number of overlapping invalid
+    /// here, that gets decremented when the invalid tiles are finally updated so
+    /// that we know when it is finally safe to draw. Must use a Cell as we might
+    /// be analyzing multiple tiles and surfaces
+    overlaps: Cell<u32>,
+    /// Whether the tile's contents has been invalidated
+    invalid: Cell<bool>,
+    /// Graph node for job dependencies of this tile
+    graph_node: SwCompositeGraphNodeRef,
+}
+
+impl SwTile {
+    fn new(x: i32, y: i32) -> Self {
+        SwTile {
+            x,
+            y,
+            fbo_id: 0,
+            color_id: 0,
+            valid_rect: DeviceIntRect::zero(),
+            overlaps: Cell::new(0),
+            invalid: Cell::new(false),
+            graph_node: SwCompositeGraphNode::new(),
+        }
+    }
+
+    /// The offset of the tile in the local space of the surface before any
+    /// transform is applied.
+    fn origin(&self, surface: &SwSurface) -> DeviceIntPoint {
+        DeviceIntPoint::new(self.x * surface.tile_size.width, self.y * surface.tile_size.height)
+    }
+
+    /// The offset valid rect positioned within the local space of the surface
+    /// before any transform is applied.
+    fn local_bounds(&self, surface: &SwSurface) -> DeviceIntRect {
+        self.valid_rect.translate(self.origin(surface).to_vector())
+    }
+
+    /// Bounds used for determining overlap dependencies. This may either be the
+    /// full tile bounds or the actual valid rect, depending on whether the tile
+    /// is invalidated this frame. These bounds are more conservative as such and
+    /// may differ from the precise bounds used to actually composite the tile.
+    fn overlap_rect(
+        &self,
+        surface: &SwSurface,
+        transform: &CompositorSurfaceTransform,
+        clip_rect: &DeviceIntRect,
+    ) -> Option<DeviceIntRect> {
+        let bounds = self.local_bounds(surface);
+        let device_rect = transform.outer_transformed_box2d(&bounds.to_f32())?.round_out();
+        Some(device_rect.intersection(&clip_rect.to_f32())?.to_i32())
+    }
+
+    /// Determine if the tile's bounds may overlap the dependency rect if it were
+    /// to be composited at the given position.
+    fn may_overlap(
+        &self,
+        surface: &SwSurface,
+        transform: &CompositorSurfaceTransform,
+        clip_rect: &DeviceIntRect,
+        dep_rect: &DeviceIntRect,
+    ) -> bool {
+        self.overlap_rect(surface, transform, clip_rect)
+            .map_or(false, |r| r.intersects(dep_rect))
+    }
+
+    /// Get valid source and destination rectangles for composition of the tile
+    /// within a surface, bounded by the clipping rectangle. May return None if
+    /// it falls outside of the clip rect.
+    fn composite_rects(
+        &self,
+        surface: &SwSurface,
+        transform: &CompositorSurfaceTransform,
+        clip_rect: &DeviceIntRect,
+    ) -> Option<(DeviceIntRect, DeviceIntRect, bool, bool)> {
+        // Offset the valid rect to the appropriate surface origin.
+        let valid = self.local_bounds(surface);
+        // The destination rect is the valid rect transformed and then clipped.
+        let dest_rect = transform.outer_transformed_box2d(&valid.to_f32())?.round_out();
+        if !dest_rect.intersects(&clip_rect.to_f32()) {
+            return None;
+        }
+        // To get a valid source rect, we need to inverse transform the clipped destination rect to find out the effect
+        // of the clip rect in source-space. After this, we subtract off the source-space valid rect origin to get
+        // a source rect that is now relative to the surface origin rather than absolute.
+        let inv_transform = transform.inverse()?;
+        let src_rect = inv_transform
+            .outer_transformed_box2d(&dest_rect)?
+            .round()
+            .translate(-valid.min.to_vector().to_f32());
+        // Ensure source and dest rects when transformed from Box2D to Rect formats will still fit in an i32.
+        // If p0=i32::MIN and p1=i32::MAX, then evaluating the size with p1-p0 will overflow an i32 and not
+        // be representable. 
+        if src_rect.size().try_cast::<i32>().is_none() ||
+           dest_rect.size().try_cast::<i32>().is_none() {
+            return None;
+        }
+        Some((src_rect.try_cast()?, dest_rect.try_cast()?, transform.m11 < 0.0, transform.m22 < 0.0))
+    }
+}
+
+pub struct SwSurface {
+    tile_size: DeviceIntSize,
+    is_opaque: bool,
+    tiles: Vec<SwTile>,
+    /// An attached external image for this surface.
+    external_image: Option<ExternalImageId>,
+    /// Descriptor for the external image if successfully locked for composite.
+    composite_surface: Option<SWGLCompositeSurfaceInfo>,
+}
+
+impl SwSurface {
+    fn new(tile_size: DeviceIntSize, is_opaque: bool) -> Self {
+        SwSurface {
+            tile_size,
+            is_opaque,
+            tiles: Vec::new(),
+            external_image: None,
+            composite_surface: None,
+        }
+    }
+
+    /// Conserative approximation of local bounds of the surface by combining
+    /// the local bounds of all enclosed tiles.
+    fn local_bounds(&self) -> DeviceIntRect {
+        let mut bounds = DeviceIntRect::zero();
+        for tile in &self.tiles {
+            bounds = bounds.union(&tile.local_bounds(self));
+        }
+        bounds
+    }
+
+    /// The transformed and clipped conservative device-space bounds of the
+    /// surface.
+    fn device_bounds(
+        &self,
+        transform: &CompositorSurfaceTransform,
+        clip_rect: &DeviceIntRect,
+    ) -> Option<DeviceIntRect> {
+        let bounds = self.local_bounds();
+        let device_rect = transform.outer_transformed_box2d(&bounds.to_f32())?.round_out();
+        Some(device_rect.intersection(&clip_rect.to_f32())?.to_i32())
+    }
+}
+
+fn image_rendering_to_gl_filter(filter: ImageRendering) -> gl::GLenum {
+    match filter {
+        ImageRendering::Pixelated => gl::NEAREST,
+        ImageRendering::Auto | ImageRendering::CrispEdges => gl::LINEAR,
+    }
+}
+
+/// A source for a composite job which can either be a single BGRA locked SWGL
+/// resource or a collection of SWGL resources representing a YUV surface.
+#[derive(Clone)]
+enum SwCompositeSource {
+    BGRA(swgl::LockedResource),
+    YUV(
+        swgl::LockedResource,
+        swgl::LockedResource,
+        swgl::LockedResource,
+        YuvRangedColorSpace,
+        ColorDepth,
+    ),
+}
+
+/// Mark ExternalImage's renderer field as safe to send to SwComposite thread.
+unsafe impl Send for SwCompositeSource {}
+
+/// A tile composition job to be processed by the SwComposite thread.
+/// Stores relevant details about the tile and where to composite it.
+#[derive(Clone)]
+struct SwCompositeJob {
+    /// Locked texture that will be unlocked immediately following the job
+    locked_src: SwCompositeSource,
+    /// Locked framebuffer that may be shared among many jobs
+    locked_dst: swgl::LockedResource,
+    src_rect: DeviceIntRect,
+    dst_rect: DeviceIntRect,
+    clipped_dst: DeviceIntRect,
+    opaque: bool,
+    flip_x: bool,
+    flip_y: bool,
+    filter: ImageRendering,
+    /// The total number of bands for this job
+    num_bands: u8,
+}
+
+impl SwCompositeJob {
+    /// Process a composite job
+    fn process(&self, band_index: i32) {
+        // Bands are allocated in reverse order, but we want to process them in increasing order.
+        let num_bands = self.num_bands as i32;
+        let band_index = num_bands - 1 - band_index;
+        // Calculate the Y extents for the job's band, starting at the current index and spanning to
+        // the following index.
+        let band_offset = (self.clipped_dst.height() * band_index) / num_bands;
+        let band_height = (self.clipped_dst.height() * (band_index + 1)) / num_bands - band_offset;
+        // Create a rect that is the intersection of the band with the clipped dest
+        let band_clip = DeviceIntRect::from_origin_and_size(
+            DeviceIntPoint::new(self.clipped_dst.min.x, self.clipped_dst.min.y + band_offset),
+            DeviceIntSize::new(self.clipped_dst.width(), band_height),
+        );
+        match self.locked_src {
+            SwCompositeSource::BGRA(ref resource) => {
+                self.locked_dst.composite(
+                    resource,
+                    self.src_rect.min.x,
+                    self.src_rect.min.y,
+                    self.src_rect.width(),
+                    self.src_rect.height(),
+                    self.dst_rect.min.x,
+                    self.dst_rect.min.y,
+                    self.dst_rect.width(),
+                    self.dst_rect.height(),
+                    self.opaque,
+                    self.flip_x,
+                    self.flip_y,
+                    image_rendering_to_gl_filter(self.filter),
+                    band_clip.min.x,
+                    band_clip.min.y,
+                    band_clip.width(),
+                    band_clip.height(),
+                );
+            }
+            SwCompositeSource::YUV(ref y, ref u, ref v, color_space, color_depth) => {
+                let swgl_color_space = match color_space {
+                    YuvRangedColorSpace::Rec601Narrow => swgl::YuvRangedColorSpace::Rec601Narrow,
+                    YuvRangedColorSpace::Rec601Full => swgl::YuvRangedColorSpace::Rec601Full,
+                    YuvRangedColorSpace::Rec709Narrow => swgl::YuvRangedColorSpace::Rec709Narrow,
+                    YuvRangedColorSpace::Rec709Full => swgl::YuvRangedColorSpace::Rec709Full,
+                    YuvRangedColorSpace::Rec2020Narrow => swgl::YuvRangedColorSpace::Rec2020Narrow,
+                    YuvRangedColorSpace::Rec2020Full => swgl::YuvRangedColorSpace::Rec2020Full,
+                    YuvRangedColorSpace::GbrIdentity => swgl::YuvRangedColorSpace::GbrIdentity,
+                };
+                self.locked_dst.composite_yuv(
+                    y,
+                    u,
+                    v,
+                    swgl_color_space,
+                    color_depth.bit_depth(),
+                    self.src_rect.min.x,
+                    self.src_rect.min.y,
+                    self.src_rect.width(),
+                    self.src_rect.height(),
+                    self.dst_rect.min.x,
+                    self.dst_rect.min.y,
+                    self.dst_rect.width(),
+                    self.dst_rect.height(),
+                    self.flip_x,
+                    self.flip_y,
+                    band_clip.min.x,
+                    band_clip.min.y,
+                    band_clip.width(),
+                    band_clip.height(),
+                );
+            }
+        }
+    }
+}
+
+/// A reference to a SwCompositeGraph node that can be passed from the render
+/// thread to the SwComposite thread. Consistency of mutation is ensured in
+/// SwCompositeGraphNode via use of Atomic operations that prevent more than
+/// one thread from mutating SwCompositeGraphNode at once. This avoids using
+/// messy and not-thread-safe RefCells or expensive Mutexes inside the graph
+/// node and at least signals to the compiler that potentially unsafe coercions
+/// are occurring.
+#[derive(Clone)]
+struct SwCompositeGraphNodeRef(Arc<UnsafeCell<SwCompositeGraphNode>>);
+
+impl SwCompositeGraphNodeRef {
+    fn new(graph_node: SwCompositeGraphNode) -> Self {
+        SwCompositeGraphNodeRef(Arc::new(UnsafeCell::new(graph_node)))
+    }
+
+    fn get(&self) -> &SwCompositeGraphNode {
+        unsafe { &*self.0.get() }
+    }
+
+    fn get_mut(&self) -> &mut SwCompositeGraphNode {
+        unsafe { &mut *self.0.get() }
+    }
+
+    fn get_ptr_mut(&self) -> *mut SwCompositeGraphNode {
+        self.0.get()
+    }
+}
+
+unsafe impl Send for SwCompositeGraphNodeRef {}
+
+impl Deref for SwCompositeGraphNodeRef {
+    type Target = SwCompositeGraphNode;
+
+    fn deref(&self) -> &Self::Target {
+        self.get()
+    }
+}
+
+impl DerefMut for SwCompositeGraphNodeRef {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.get_mut()
+    }
+}
+
+/// Dependency graph of composite jobs to be completed. Keeps a list of child jobs that are dependent on the completion of this job.
+/// Also keeps track of the number of parent jobs that this job is dependent upon before it can be processed. Once there are no more
+/// in-flight parent jobs that it depends on, the graph node is finally added to the job queue for processing.
+struct SwCompositeGraphNode {
+    /// Job to be queued for this graph node once ready.
+    job: Option<SwCompositeJob>,
+    /// The number of remaining bands associated with this job. When this is
+    /// non-zero and the node has no more parents left, then the node is being
+    /// actively used by the composite thread to process jobs. Once it hits
+    /// zero, the owning thread (which brought it to zero) can safely retire
+    /// the node as no other thread is using it.
+    remaining_bands: AtomicU8,
+    /// The number of bands that are available for processing.
+    available_bands: AtomicI8,
+    /// Count of parents this graph node depends on. While this is non-zero the
+    /// node must ensure that it is only being actively mutated by the render
+    /// thread and otherwise never being accessed by the render thread.
+    parents: AtomicU32,
+    /// Graph nodes of child jobs that are dependent on this job
+    children: Vec<SwCompositeGraphNodeRef>,
+}
+
+unsafe impl Sync for SwCompositeGraphNode {}
+
+impl SwCompositeGraphNode {
+    fn new() -> SwCompositeGraphNodeRef {
+        SwCompositeGraphNodeRef::new(SwCompositeGraphNode {
+            job: None,
+            remaining_bands: AtomicU8::new(0),
+            available_bands: AtomicI8::new(0),
+            parents: AtomicU32::new(0),
+            children: Vec::new(),
+        })
+    }
+
+    /// Reset the node's state for a new frame
+    fn reset(&mut self) {
+        self.job = None;
+        self.remaining_bands.store(0, Ordering::SeqCst);
+        self.available_bands.store(0, Ordering::SeqCst);
+        // Initialize parents to 1 as sentinel dependency for uninitialized job
+        // to avoid queuing unitialized job as unblocked child dependency.
+        self.parents.store(1, Ordering::SeqCst);
+        self.children.clear();
+    }
+
+    /// Add a dependent child node to dependency list. Update its parent count.
+    fn add_child(&mut self, child: SwCompositeGraphNodeRef) {
+        child.parents.fetch_add(1, Ordering::SeqCst);
+        self.children.push(child);
+    }
+
+    /// Install a job for this node. Return whether or not the job has any unprocessed parents
+    /// that would block immediate composition.
+    fn set_job(&mut self, job: SwCompositeJob, num_bands: u8) -> bool {
+        self.job = Some(job);
+        self.remaining_bands.store(num_bands, Ordering::SeqCst);
+        self.available_bands.store(num_bands as _, Ordering::SeqCst);
+        // Subtract off the sentinel parent dependency now that job is initialized and check
+        // whether there are any remaining parent dependencies to see if this job is ready.
+        self.parents.fetch_sub(1, Ordering::SeqCst) <= 1
+    }
+
+    /// Take an available band if possible. Also return whether there are no more bands left
+    /// so the caller may properly clean up after.
+    fn take_band(&self) -> (Option<i32>, bool) {
+        let available = self.available_bands.fetch_sub(1, Ordering::SeqCst);
+        if available > 0 {
+            (Some(available as i32 - 1), available == 1)
+        } else {
+            (None, true)
+        }
+    }
+
+    /// Try to take the job from this node for processing and then process it within the current band.
+    fn process_job(&self, band_index: i32) {
+        if let Some(ref job) = self.job {
+            job.process(band_index);
+        }
+    }
+
+    /// After processing a band, check all child dependencies and remove this parent from
+    /// their dependency counts. If applicable, queue the new child bands for composition.
+    fn unblock_children(&mut self, thread: &SwCompositeThread) {
+        if self.remaining_bands.fetch_sub(1, Ordering::SeqCst) > 1 {
+            return;
+        }
+        // Clear the job to release any locked resources.
+        self.job = None;
+        let mut lock = None;
+        for child in self.children.drain(..) {
+            // Remove the child's parent dependency on this node. If there are no more
+            // parent dependencies left, send the child job bands for composition.
+            if child.parents.fetch_sub(1, Ordering::SeqCst) <= 1 {
+                if lock.is_none() {
+                    lock = Some(thread.lock());
+                }
+                thread.send_job(lock.as_mut().unwrap(), child);
+            }
+        }
+    }
+}
+
+/// The SwComposite thread processes a queue of composite jobs, also signaling
+/// via a condition when all available jobs have been processed, as tracked by
+/// the job count.
+struct SwCompositeThread {
+    /// Queue of available composite jobs
+    jobs: Mutex<SwCompositeJobQueue>,
+    /// Cache of the current job being processed. This maintains a pointer to
+    /// the contents of the SwCompositeGraphNodeRef, which is safe due to the
+    /// fact that SwCompositor maintains a strong reference to the contents
+    /// in an SwTile to keep it alive while this is in use.
+    current_job: AtomicPtr<SwCompositeGraphNode>,
+    /// Condition signaled when either there are jobs available to process or
+    /// there are no more jobs left to process. Otherwise stated, this signals
+    /// when the job queue transitions from an empty to non-empty state or from
+    /// a non-empty to empty state.
+    jobs_available: Condvar,
+    /// Whether all available jobs have been processed.
+    jobs_completed: AtomicBool,
+    /// Whether the main thread is waiting for for job completeion.
+    waiting_for_jobs: AtomicBool,
+    /// Whether the SwCompositor is shutting down
+    shutting_down: AtomicBool,
+}
+
+/// The SwCompositeThread struct is shared between the SwComposite thread
+/// and the rendering thread so that both ends can access the job queue.
+unsafe impl Sync for SwCompositeThread {}
+
+/// A FIFO queue of composite jobs to be processed.
+type SwCompositeJobQueue = VecDeque<SwCompositeGraphNodeRef>;
+
+/// Locked access to the composite job queue.
+type SwCompositeThreadLock<'a> = MutexGuard<'a, SwCompositeJobQueue>;
+
+impl SwCompositeThread {
+    /// Create the SwComposite thread. Requires a SWGL context in which
+    /// to do the composition.
+    fn new() -> Arc<SwCompositeThread> {
+        let info = Arc::new(SwCompositeThread {
+            jobs: Mutex::new(SwCompositeJobQueue::new()),
+            current_job: AtomicPtr::new(ptr::null_mut()),
+            jobs_available: Condvar::new(),
+            jobs_completed: AtomicBool::new(true),
+            waiting_for_jobs: AtomicBool::new(false),
+            shutting_down: AtomicBool::new(false),
+        });
+        let result = info.clone();
+        let thread_name = "SwComposite";
+        thread::Builder::new()
+            .name(thread_name.into())
+            // The composite thread only calls into SWGL to composite, and we
+            // have potentially many composite threads for different windows,
+            // so using the default stack size is excessive. A reasonably small
+            // stack size should be more than enough for SWGL and reduce memory
+            // overhead.
+            // Bug 1731569 - Need at least 36K to avoid problems with ASAN.
+            .stack_size(40 * 1024)
+            .spawn(move || {
+                profiler::register_thread(thread_name);
+                // Process any available jobs. This will return a non-Ok
+                // result when the job queue is dropped, causing the thread
+                // to eventually exit.
+                while let Some((job, band)) = info.take_job(true) {
+                    info.process_job(job, band);
+                }
+                profiler::unregister_thread();
+            })
+            .expect("Failed creating SwComposite thread");
+        result
+    }
+
+    fn deinit(&self) {
+        // Signal that the thread needs to exit.
+        self.shutting_down.store(true, Ordering::SeqCst);
+        // Wake up the thread in case it is blocked waiting for new jobs
+        self.jobs_available.notify_all();
+    }
+
+    /// Process a job contained in a dependency graph node received from the job queue.
+    /// Any child dependencies will be unblocked as appropriate after processing. The
+    /// job count will be updated to reflect this.
+    fn process_job(&self, graph_node: &mut SwCompositeGraphNode, band: i32) {
+        // Do the actual processing of the job contained in this node.
+        graph_node.process_job(band);
+        // Unblock any child dependencies now that this job has been processed.
+        graph_node.unblock_children(self);
+    }
+
+    /// Queue a tile for composition by adding to the queue and increasing the job count.
+    fn queue_composite(
+        &self,
+        locked_src: SwCompositeSource,
+        locked_dst: swgl::LockedResource,
+        src_rect: DeviceIntRect,
+        dst_rect: DeviceIntRect,
+        clip_rect: DeviceIntRect,
+        opaque: bool,
+        flip_x: bool,
+        flip_y: bool,
+        filter: ImageRendering,
+        mut graph_node: SwCompositeGraphNodeRef,
+        job_queue: &mut SwCompositeJobQueue,
+    ) {
+        // For jobs that would span a sufficiently large destination rectangle, split
+        // it into multiple horizontal bands so that multiple threads can process them.
+        let clipped_dst = match dst_rect.intersection(&clip_rect) {
+            Some(clipped_dst) => clipped_dst,
+            None => return,
+        };
+
+        let num_bands = if clipped_dst.width() >= 64 && clipped_dst.height() >= 64 {
+            (clipped_dst.height() / 64).min(4) as u8
+        } else {
+            1
+        };
+        let job = SwCompositeJob {
+            locked_src,
+            locked_dst,
+            src_rect,
+            dst_rect,
+            clipped_dst,
+            opaque,
+            flip_x,
+            flip_y,
+            filter,
+            num_bands,
+        };
+        if graph_node.set_job(job, num_bands) {
+            self.send_job(job_queue, graph_node);
+        }
+    }
+
+    fn prepare_for_composites(&self) {
+        // Initially, the job queue is empty. Trivially, this means we consider all
+        // jobs queued so far as completed.
+        self.jobs_completed.store(true, Ordering::SeqCst);
+    }
+
+    /// Lock the thread for access to the job queue.
+    fn lock(&self) -> SwCompositeThreadLock {
+        self.jobs.lock().unwrap()
+    }
+
+    /// Send a job to the composite thread by adding it to the job queue.
+    /// Signal that this job has been added in case the queue was empty and the
+    /// SwComposite thread is waiting for jobs.
+    fn send_job(&self, queue: &mut SwCompositeJobQueue, job: SwCompositeGraphNodeRef) {
+        if queue.is_empty() {
+            self.jobs_completed.store(false, Ordering::SeqCst);
+            self.jobs_available.notify_all();
+        }
+        queue.push_back(job);
+    }
+
+    /// Try to get a band of work from the currently cached job when available.
+    /// If there is a job, but it has no available bands left, null out the job
+    /// so that other threads do not bother checking the job.
+    fn try_take_job(&self) -> Option<(&mut SwCompositeGraphNode, i32)> {
+        let current_job_ptr = self.current_job.load(Ordering::SeqCst);
+        if let Some(current_job) = unsafe { current_job_ptr.as_mut() } {
+            let (band, done) = current_job.take_band();
+            if done {
+                let _ = self.current_job.compare_exchange(
+                    current_job_ptr,
+                    ptr::null_mut(),
+                    Ordering::SeqCst,
+                    Ordering::SeqCst,
+                );
+            }
+            if let Some(band) = band {
+                return Some((current_job, band));
+            }
+        }
+        return None;
+    }
+
+    /// Take a job from the queue. Optionally block waiting for jobs to become
+    /// available if this is called from the SwComposite thread.
+    fn take_job(&self, wait: bool) -> Option<(&mut SwCompositeGraphNode, i32)> {
+        // First try checking the cached job outside the scope of the mutex.
+        // For jobs that have multiple bands, this allows us to avoid having
+        // to lock the mutex multiple times to check the job for each band.
+        if let Some((job, band)) = self.try_take_job() {
+            return Some((job, band));
+        }
+        // Lock the job queue while checking for available jobs. The lock
+        // won't be held while the job is processed later outside of this
+        // function so that other threads can pull from the queue meanwhile.
+        let mut jobs = self.lock();
+        loop {
+            // While inside the mutex, check the cached job again to see if it
+            // has been updated.
+            if let Some((job, band)) = self.try_take_job() {
+                return Some((job, band));
+            }
+            // If no cached job was available, try to take a job from the queue
+            // and install it as the current job.
+            if let Some(job) = jobs.pop_front() {
+                self.current_job.store(job.get_ptr_mut(), Ordering::SeqCst);
+                continue;
+            }
+            // Otherwise, the job queue is currently empty. Depending on the
+            // job status, we may either wait for jobs to become available or exit.
+            if wait {
+                // For the SwComposite thread, if we arrive here, the job queue
+                // is empty. Signal that all available jobs have been completed.
+                self.jobs_completed.store(true, Ordering::SeqCst);
+                if self.waiting_for_jobs.load(Ordering::SeqCst) {
+                    // Wake the main thread if it is waiting for a change in job status.
+                    self.jobs_available.notify_all();
+                } else if self.shutting_down.load(Ordering::SeqCst) {
+                    // If SwComposite thread needs to shut down, then exit and stop
+                    // waiting for jobs.
+                    return None;
+                }
+            } else {
+                // If all available jobs have been completed by the SwComposite
+                // thread, then the main thread no longer needs to wait for any
+                // new jobs to appear in the queue and should exit.
+                if self.jobs_completed.load(Ordering::SeqCst) {
+                    return None;
+                }
+                // Otherwise, signal that the main thread is waiting for jobs.
+                self.waiting_for_jobs.store(true, Ordering::SeqCst);
+            }
+            // Wait until jobs are added before checking the job queue again.
+            jobs = self.jobs_available.wait(jobs).unwrap();
+            if !wait {
+                // The main thread is done waiting for jobs.
+                self.waiting_for_jobs.store(false, Ordering::SeqCst);
+            }
+        }
+    }
+
+    /// Wait for all queued composition jobs to be processed.
+    /// Instead of blocking on the SwComposite thread to complete all jobs,
+    /// this may steal some jobs and attempt to process them while waiting.
+    /// This may optionally process jobs synchronously. When normally doing
+    /// asynchronous processing, the graph dependencies are relied upon to
+    /// properly order the jobs, which makes it safe for the render thread
+    /// to steal jobs from the composite thread without violating those
+    /// dependencies. Synchronous processing just disables this job stealing
+    /// so that the composite thread always handles the jobs in the order
+    /// they were queued without having to rely upon possibly unavailable
+    /// graph dependencies.
+    fn wait_for_composites(&self, sync: bool) {
+        // If processing asynchronously, try to steal jobs from the composite
+        // thread if it is busy.
+        if !sync {
+            while let Some((job, band)) = self.take_job(false) {
+                self.process_job(job, band);
+            }
+            // Once there are no more jobs, just fall through to waiting
+            // synchronously for the composite thread to finish processing.
+        }
+        // If processing synchronously, just wait for the composite thread
+        // to complete processing any in-flight jobs, then bail.
+        let mut jobs = self.lock();
+        // Signal that the main thread may wait for job completion so that the
+        // SwComposite thread can wake it up if necessary.
+        self.waiting_for_jobs.store(true, Ordering::SeqCst);
+        // Wait for job completion to ensure there are no more in-flight jobs.
+        while !self.jobs_completed.load(Ordering::SeqCst) {
+            jobs = self.jobs_available.wait(jobs).unwrap();
+        }
+        // Done waiting for job completion.
+        self.waiting_for_jobs.store(false, Ordering::SeqCst);
+    }
+}
+
+/// Parameters describing how to composite a surface within a frame
+type FrameSurface = (
+    NativeSurfaceId,
+    CompositorSurfaceTransform,
+    DeviceIntRect,
+    ImageRendering,
+);
+
+/// Adapter for RenderCompositors to work with SWGL that shuttles between
+/// WebRender and the RenderCompositr via the Compositor API.
+pub struct SwCompositor {
+    gl: swgl::Context,
+    compositor: Box<dyn MappableCompositor>,
+    use_native_compositor: bool,
+    surfaces: HashMap<NativeSurfaceId, SwSurface>,
+    frame_surfaces: Vec<FrameSurface>,
+    /// Any surface added after we're already compositing (i.e. debug overlay)
+    /// needs to be processed after those frame surfaces. For simplicity we
+    /// store them in a separate queue that gets processed later.
+    late_surfaces: Vec<FrameSurface>,
+    cur_tile: NativeTileId,
+    /// The maximum tile size required for any of the allocated surfaces.
+    max_tile_size: DeviceIntSize,
+    /// Reuse the same depth texture amongst all tiles in all surfaces.
+    /// This depth texture must be big enough to accommodate the largest used
+    /// tile size for any surface. The maximum requested tile size is tracked
+    /// to ensure that this depth texture is at least that big.
+    /// This is initialized when the first surface is created and freed when
+    /// the last surface is destroyed, to ensure compositors with no surfaces
+    /// are not holding on to extra memory.
+    depth_id: Option<u32>,
+    /// Instance of the SwComposite thread, only created if we are not relying
+    /// on a native RenderCompositor.
+    composite_thread: Option<Arc<SwCompositeThread>>,
+    /// SWGL locked resource for sharing framebuffer with SwComposite thread
+    locked_framebuffer: Option<swgl::LockedResource>,
+    /// Whether we are currently in the middle of compositing
+    is_compositing: bool,
+}
+
+impl SwCompositor {
+    pub fn new(
+        gl: swgl::Context,
+        compositor: Box<dyn MappableCompositor>,
+        use_native_compositor: bool,
+    ) -> Self {
+        // Only create the SwComposite thread if we're not using a native render
+        // compositor. Thus, we are compositing into the main software framebuffer,
+        // which benefits from compositing asynchronously while updating tiles.
+        let composite_thread = if !use_native_compositor {
+            Some(SwCompositeThread::new())
+        } else {
+            None
+        };
+        SwCompositor {
+            gl,
+            compositor,
+            use_native_compositor,
+            surfaces: HashMap::new(),
+            frame_surfaces: Vec::new(),
+            late_surfaces: Vec::new(),
+            cur_tile: NativeTileId {
+                surface_id: NativeSurfaceId(0),
+                x: 0,
+                y: 0,
+            },
+            max_tile_size: DeviceIntSize::zero(),
+            depth_id: None,
+            composite_thread,
+            locked_framebuffer: None,
+            is_compositing: false,
+        }
+    }
+
+    fn deinit_tile(&self, tile: &SwTile) {
+        self.gl.delete_framebuffers(&[tile.fbo_id]);
+        self.gl.delete_textures(&[tile.color_id]);
+    }
+
+    fn deinit_surface(&self, surface: &SwSurface) {
+        for tile in &surface.tiles {
+            self.deinit_tile(tile);
+        }
+    }
+
+    /// Attempt to occlude any queued surfaces with an opaque occluder rect. If
+    /// an existing surface is occluded, we attempt to restrict its clip rect
+    /// so long as it can remain a single clip rect. Existing frame surfaces
+    /// that are opaque will be fused if possible with the supplied occluder
+    /// rect to further try and restrict any underlying surfaces.
+    fn occlude_surfaces(&mut self) {
+        // Check if inner rect is fully included in outer rect
+        fn includes(outer: &Range<i32>, inner: &Range<i32>) -> bool {
+            outer.start <= inner.start && outer.end >= inner.end
+        }
+
+        // Check if outer range overlaps either the start or end of a range. If
+        // there is overlap, return the portion of the inner range remaining
+        // after the overlap has been removed.
+        fn overlaps(outer: &Range<i32>, inner: &Range<i32>) -> Option<Range<i32>> {
+            if outer.start <= inner.start && outer.end >= inner.start {
+                Some(outer.end..inner.end.max(outer.end))
+            } else if outer.start <= inner.end && outer.end >= inner.end {
+                Some(inner.start..outer.start.max(inner.start))
+            } else {
+                None
+            }
+        }
+
+        fn set_x_range(rect: &mut DeviceIntRect, range: &Range<i32>) {
+            rect.min.x = range.start;
+            rect.max.x = range.end;
+        }
+
+        fn set_y_range(rect: &mut DeviceIntRect, range: &Range<i32>) {
+            rect.min.y = range.start;
+            rect.max.y = range.end;
+        }
+
+        fn union(base: Range<i32>, extra: Range<i32>) -> Range<i32> {
+            base.start.min(extra.start)..base.end.max(extra.end)
+        }
+
+        // Before we can try to occlude any surfaces, we need to fix their clip rects to tightly
+        // bound the valid region. The clip rect might otherwise enclose an invalid area that
+        // can't fully occlude anything even if the surface is opaque.
+        for &mut (ref id, ref transform, ref mut clip_rect, _) in &mut self.frame_surfaces {
+            if let Some(surface) = self.surfaces.get(id) {
+                // Restrict the clip rect to fall within the valid region of the surface.
+                *clip_rect = surface.device_bounds(transform, clip_rect).unwrap_or_default();
+            }
+        }
+
+        // For each frame surface, treat it as an occluder if it is non-empty and opaque. Look
+        // through the preceding surfaces to see if any can be occluded.
+        for occlude_index in 0..self.frame_surfaces.len() {
+            let (ref occlude_id, _, ref occlude_rect, _) = self.frame_surfaces[occlude_index];
+            match self.surfaces.get(occlude_id) {
+                Some(occluder) if occluder.is_opaque && !occlude_rect.is_empty() => {}
+                _ => continue,
+            }
+
+            // Traverse the queued surfaces for this frame in the reverse order of
+            // how they are composited, or rather, in order of visibility. For each
+            // surface, check if the occluder can restrict the clip rect such that
+            // the clip rect can remain a single rect. If the clip rect overlaps
+            // the occluder on one axis interval while remaining fully included in
+            // the occluder's other axis interval, then we can chop down the edge
+            // of the clip rect on the overlapped axis. Further, if the surface is
+            // opaque and its clip rect exactly matches the occluder rect on one
+            // axis interval while overlapping on the other, fuse it with the
+            // occluder rect before considering any underlying surfaces.
+            let (mut occlude_x, mut occlude_y) = (occlude_rect.x_range(), occlude_rect.y_range());
+            for &mut (ref id, _, ref mut clip_rect, _) in self.frame_surfaces[..occlude_index].iter_mut().rev() {
+                if let Some(surface) = self.surfaces.get(id) {
+                    let (clip_x, clip_y) = (clip_rect.x_range(), clip_rect.y_range());
+                    if includes(&occlude_x, &clip_x) {
+                        if let Some(visible) = overlaps(&occlude_y, &clip_y) {
+                            set_y_range(clip_rect, &visible);
+                            if surface.is_opaque && occlude_x == clip_x {
+                                occlude_y = union(occlude_y, visible);
+                            }
+                        }
+                    } else if includes(&occlude_y, &clip_y) {
+                        if let Some(visible) = overlaps(&occlude_x, &clip_x) {
+                            set_x_range(clip_rect, &visible);
+                            if surface.is_opaque && occlude_y == clip_y {
+                                occlude_x = union(occlude_x, visible);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /// Reset tile dependency state for a new frame.
+    fn reset_overlaps(&mut self) {
+        for surface in self.surfaces.values_mut() {
+            for tile in &mut surface.tiles {
+                tile.overlaps.set(0);
+                tile.invalid.set(false);
+                tile.graph_node.reset();
+            }
+        }
+    }
+
+    /// Computes an overlap count for a tile that falls within the given composite
+    /// destination rectangle. This requires checking all surfaces currently queued for
+    /// composition so far in this frame and seeing if they have any invalidated tiles
+    /// whose destination rectangles would also overlap the supplied tile. If so, then the
+    /// increment the overlap count to account for all such dependencies on invalid tiles.
+    /// Tiles with the same overlap count will still be drawn with a stable ordering in
+    /// the order the surfaces were queued, so it is safe to ignore other possible sources
+    /// of composition ordering dependencies, as the later queued tile will still be drawn
+    /// later than the blocking tiles within that stable order. We assume that the tile's
+    /// surface hasn't yet been added to the current frame list of surfaces to composite
+    /// so that we only process potential blockers from surfaces that would come earlier
+    /// in composition.
+    fn init_overlaps(
+        &self,
+        overlap_id: &NativeSurfaceId,
+        overlap_surface: &SwSurface,
+        overlap_tile: &SwTile,
+        overlap_transform: &CompositorSurfaceTransform,
+        overlap_clip_rect: &DeviceIntRect,
+    ) {
+        // Record an extra overlap for an invalid tile to track the tile's dependency
+        // on its own future update.
+        let mut overlaps = if overlap_tile.invalid.get() { 1 } else { 0 };
+
+        let overlap_rect = match overlap_tile.overlap_rect(overlap_surface, overlap_transform, overlap_clip_rect) {
+            Some(overlap_rect) => overlap_rect,
+            None => {
+                overlap_tile.overlaps.set(overlaps);
+                return;
+            }
+        };
+
+        for &(ref id, ref transform, ref clip_rect, _) in &self.frame_surfaces {
+            // We only want to consider surfaces that were added before the current one we're
+            // checking for overlaps. If we find that surface, then we're done.
+            if id == overlap_id {
+                break;
+            }
+            // If the surface's clip rect doesn't overlap the tile's rect,
+            // then there is no need to check any tiles within the surface.
+            if !overlap_rect.intersects(clip_rect) {
+                continue;
+            }
+            if let Some(surface) = self.surfaces.get(id) {
+                for tile in &surface.tiles {
+                    // If there is a deferred tile that might overlap the destination rectangle,
+                    // record the overlap.
+                    if tile.may_overlap(surface, transform, clip_rect, &overlap_rect) {
+                        if tile.overlaps.get() > 0 {
+                            overlaps += 1;
+                        }
+                        // Regardless of whether this tile is deferred, if it has dependency
+                        // overlaps, then record that it is potentially a dependency parent.
+                        tile.graph_node.get_mut().add_child(overlap_tile.graph_node.clone());
+                    }
+                }
+            }
+        }
+        if overlaps > 0 {
+            // Has a dependency on some invalid tiles, so need to defer composition.
+            overlap_tile.overlaps.set(overlaps);
+        }
+    }
+
+    /// Helper function that queues a composite job to the current locked framebuffer
+    fn queue_composite(
+        &self,
+        surface: &SwSurface,
+        transform: &CompositorSurfaceTransform,
+        clip_rect: &DeviceIntRect,
+        filter: ImageRendering,
+        tile: &SwTile,
+        job_queue: &mut SwCompositeJobQueue,
+    ) {
+        if let Some(ref composite_thread) = self.composite_thread {
+            if let Some((src_rect, dst_rect, flip_x, flip_y)) = tile.composite_rects(surface, transform, clip_rect) {
+                let source = if surface.external_image.is_some() {
+                    // If the surface has an attached external image, lock any textures supplied in the descriptor.
+                    match surface.composite_surface {
+                        Some(ref info) => match info.yuv_planes {
+                            0 => match self.gl.lock_texture(info.textures[0]) {
+                                Some(texture) => SwCompositeSource::BGRA(texture),
+                                None => return,
+                            },
+                            3 => match (
+                                self.gl.lock_texture(info.textures[0]),
+                                self.gl.lock_texture(info.textures[1]),
+                                self.gl.lock_texture(info.textures[2]),
+                            ) {
+                                (Some(y_texture), Some(u_texture), Some(v_texture)) => SwCompositeSource::YUV(
+                                    y_texture,
+                                    u_texture,
+                                    v_texture,
+                                    info.color_space,
+                                    info.color_depth,
+                                ),
+                                _ => return,
+                            },
+                            _ => panic!("unsupported number of YUV planes: {}", info.yuv_planes),
+                        },
+                        None => return,
+                    }
+                } else if let Some(texture) = self.gl.lock_texture(tile.color_id) {
+                    // Lock the texture representing the picture cache tile.
+                    SwCompositeSource::BGRA(texture)
+                } else {
+                    return;
+                };
+                if let Some(ref framebuffer) = self.locked_framebuffer {
+                    composite_thread.queue_composite(
+                        source,
+                        framebuffer.clone(),
+                        src_rect,
+                        dst_rect,
+                        *clip_rect,
+                        surface.is_opaque,
+                        flip_x,
+                        flip_y,
+                        filter,
+                        tile.graph_node.clone(),
+                        job_queue,
+                    );
+                }
+            }
+        }
+    }
+
+    /// Lock a surface with an attached external image for compositing.
+    fn try_lock_composite_surface(&mut self, id: &NativeSurfaceId) {
+        if let Some(surface) = self.surfaces.get_mut(id) {
+            if let Some(external_image) = surface.external_image {
+                // If the surface has an attached external image, attempt to lock the external image
+                // for compositing. Yields a descriptor of textures and data necessary for their
+                // interpretation on success.
+                let mut info = SWGLCompositeSurfaceInfo {
+                    yuv_planes: 0,
+                    textures: [0; 3],
+                    color_space: YuvRangedColorSpace::GbrIdentity,
+                    color_depth: ColorDepth::Color8,
+                    size: DeviceIntSize::zero(),
+                };
+                assert!(!surface.tiles.is_empty());
+                let mut tile = &mut surface.tiles[0];
+                if self.compositor.lock_composite_surface(self.gl.into(), external_image, &mut info) {
+                    tile.valid_rect = DeviceIntRect::from_size(info.size);
+                    surface.composite_surface = Some(info);
+                } else {
+                    tile.valid_rect = DeviceIntRect::zero();
+                    surface.composite_surface = None;
+                }
+            }
+        }
+    }
+
+    /// Look for any attached external images that have been locked and then unlock them.
+    fn unlock_composite_surfaces(&mut self) {
+        for &(ref id, _, _, _) in self.frame_surfaces.iter().chain(self.late_surfaces.iter()) {
+            if let Some(surface) = self.surfaces.get_mut(id) {
+                if let Some(external_image) = surface.external_image {
+                    if surface.composite_surface.is_some() {
+                        self.compositor.unlock_composite_surface(self.gl.into(), external_image);
+                        surface.composite_surface = None;
+                    }
+                }
+            }
+        }
+    }
+
+    /// Issue composites for any tiles that are no longer blocked following a tile update.
+    /// We process all surfaces and tiles in the order they were queued.
+    fn flush_composites(&self, tile_id: &NativeTileId, surface: &SwSurface, tile: &SwTile) {
+        let composite_thread = match &self.composite_thread {
+            Some(composite_thread) => composite_thread,
+            None => return,
+        };
+
+        // Look for the tile in the frame list and composite it if it has no dependencies.
+        let mut frame_surfaces = self
+            .frame_surfaces
+            .iter()
+            .skip_while(|&(ref id, _, _, _)| *id != tile_id.surface_id);
+        let (overlap_rect, mut lock) = match frame_surfaces.next() {
+            Some(&(_, ref transform, ref clip_rect, filter)) => {
+                // Remove invalid tile's update dependency.
+                if tile.invalid.get() {
+                    tile.overlaps.set(tile.overlaps.get() - 1);
+                }
+                // If the tile still has overlaps, keep deferring it till later.
+                if tile.overlaps.get() > 0 {
+                    return;
+                }
+                // Otherwise, the tile's dependencies are all resolved, so composite it.
+                let mut lock = composite_thread.lock();
+                self.queue_composite(surface, transform, clip_rect, filter, tile, &mut lock);
+                // Finally, get the tile's overlap rect used for tracking dependencies
+                match tile.overlap_rect(surface, transform, clip_rect) {
+                    Some(overlap_rect) => (overlap_rect, lock),
+                    None => return,
+                }
+            }
+            None => return,
+        };
+
+        // Accumulate rects whose dependencies have been satisfied from this update.
+        // Store the union of all these bounds to quickly reject unaffected tiles.
+        let mut flushed_bounds = overlap_rect;
+        let mut flushed_rects = vec![overlap_rect];
+
+        // Check surfaces following the update in the frame list and see if they would overlap it.
+        for &(ref id, ref transform, ref clip_rect, filter) in frame_surfaces {
+            // If the clip rect doesn't overlap the conservative bounds, we can skip the whole surface.
+            if !flushed_bounds.intersects(clip_rect) {
+                continue;
+            }
+            if let Some(surface) = self.surfaces.get(&id) {
+                // Search through the surface's tiles for any blocked on this update and queue jobs for them.
+                for tile in &surface.tiles {
+                    let mut overlaps = tile.overlaps.get();
+                    // Only check tiles that have existing unresolved dependencies
+                    if overlaps == 0 {
+                        continue;
+                    }
+                    // Get this tile's overlap rect for tracking dependencies
+                    let overlap_rect = match tile.overlap_rect(surface, transform, clip_rect) {
+                        Some(overlap_rect) => overlap_rect,
+                        None => continue,
+                    };
+                    // Do a quick check to see if the tile overlaps the conservative bounds.
+                    if !overlap_rect.intersects(&flushed_bounds) {
+                        continue;
+                    }
+                    // Decrement the overlap count if this tile is dependent on any flushed rects.
+                    for flushed_rect in &flushed_rects {
+                        if overlap_rect.intersects(flushed_rect) {
+                            overlaps -= 1;
+                        }
+                    }
+                    if overlaps != tile.overlaps.get() {
+                        // If the overlap count changed, this tile had a dependency on some flush rects.
+                        // If the count hit zero, it is ready to composite.
+                        tile.overlaps.set(overlaps);
+                        if overlaps == 0 {
+                            self.queue_composite(surface, transform, clip_rect, filter, tile, &mut lock);
+                            // Record that the tile got flushed to update any downwind dependencies.
+                            flushed_bounds = flushed_bounds.union(&overlap_rect);
+                            flushed_rects.push(overlap_rect);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl Compositor for SwCompositor {
+    fn create_surface(
+        &mut self,
+        id: NativeSurfaceId,
+        virtual_offset: DeviceIntPoint,
+        tile_size: DeviceIntSize,
+        is_opaque: bool,
+    ) {
+        if self.use_native_compositor {
+            self.compositor.create_surface(id, virtual_offset, tile_size, is_opaque);
+        }
+        self.max_tile_size = DeviceIntSize::new(
+            self.max_tile_size.width.max(tile_size.width),
+            self.max_tile_size.height.max(tile_size.height),
+        );
+        if self.depth_id.is_none() {
+            self.depth_id = Some(self.gl.gen_textures(1)[0]);
+        }
+        self.surfaces.insert(id, SwSurface::new(tile_size, is_opaque));
+    }
+
+    fn create_external_surface(&mut self, id: NativeSurfaceId, is_opaque: bool) {
+        if self.use_native_compositor {
+            self.compositor.create_external_surface(id, is_opaque);
+        }
+        self.surfaces
+            .insert(id, SwSurface::new(DeviceIntSize::zero(), is_opaque));
+    }
+
+    fn create_backdrop_surface(&mut self, _id: NativeSurfaceId, _color: ColorF) {
+        unreachable!("Not implemented.")
+    }
+
+    fn destroy_surface(&mut self, id: NativeSurfaceId) {
+        if let Some(surface) = self.surfaces.remove(&id) {
+            self.deinit_surface(&surface);
+        }
+        if self.use_native_compositor {
+            self.compositor.destroy_surface(id);
+        }
+        if self.surfaces.is_empty() {
+            if let Some(depth_id) = self.depth_id.take() {
+                self.gl.delete_textures(&[depth_id]);
+            }
+        }
+    }
+
+    fn deinit(&mut self) {
+        if let Some(ref composite_thread) = self.composite_thread {
+            composite_thread.deinit();
+        }
+
+        for surface in self.surfaces.values() {
+            self.deinit_surface(surface);
+        }
+
+        if let Some(depth_id) = self.depth_id.take() {
+            self.gl.delete_textures(&[depth_id]);
+        }
+
+        if self.use_native_compositor {
+            self.compositor.deinit();
+        }
+    }
+
+    fn create_tile(&mut self, id: NativeTileId) {
+        if self.use_native_compositor {
+            self.compositor.create_tile(id);
+        }
+        if let Some(surface) = self.surfaces.get_mut(&id.surface_id) {
+            let mut tile = SwTile::new(id.x, id.y);
+            tile.color_id = self.gl.gen_textures(1)[0];
+            tile.fbo_id = self.gl.gen_framebuffers(1)[0];
+            let mut prev_fbo = [0];
+            unsafe {
+                self.gl.get_integer_v(gl::DRAW_FRAMEBUFFER_BINDING, &mut prev_fbo);
+            }
+            self.gl.bind_framebuffer(gl::DRAW_FRAMEBUFFER, tile.fbo_id);
+            self.gl.framebuffer_texture_2d(
+                gl::DRAW_FRAMEBUFFER,
+                gl::COLOR_ATTACHMENT0,
+                gl::TEXTURE_2D,
+                tile.color_id,
+                0,
+            );
+            self.gl.framebuffer_texture_2d(
+                gl::DRAW_FRAMEBUFFER,
+                gl::DEPTH_ATTACHMENT,
+                gl::TEXTURE_2D,
+                self.depth_id.expect("depth texture should be initialized"),
+                0,
+            );
+            self.gl.bind_framebuffer(gl::DRAW_FRAMEBUFFER, prev_fbo[0] as gl::GLuint);
+
+            surface.tiles.push(tile);
+        }
+    }
+
+    fn destroy_tile(&mut self, id: NativeTileId) {
+        if let Some(surface) = self.surfaces.get_mut(&id.surface_id) {
+            if let Some(idx) = surface.tiles.iter().position(|t| t.x == id.x && t.y == id.y) {
+                let tile = surface.tiles.remove(idx);
+                self.deinit_tile(&tile);
+            }
+        }
+        if self.use_native_compositor {
+            self.compositor.destroy_tile(id);
+        }
+    }
+
+    fn attach_external_image(&mut self, id: NativeSurfaceId, external_image: ExternalImageId) {
+        if self.use_native_compositor {
+            self.compositor.attach_external_image(id, external_image);
+        }
+        if let Some(surface) = self.surfaces.get_mut(&id) {
+            // Surfaces with attached external images have a single tile at the origin encompassing
+            // the entire surface.
+            assert!(surface.tile_size.is_empty());
+            surface.external_image = Some(external_image);
+            if surface.tiles.is_empty() {
+                surface.tiles.push(SwTile::new(0, 0));
+            }
+        }
+    }
+
+    fn invalidate_tile(&mut self, id: NativeTileId, valid_rect: DeviceIntRect) {
+        if self.use_native_compositor {
+            self.compositor.invalidate_tile(id, valid_rect);
+        }
+        if let Some(surface) = self.surfaces.get_mut(&id.surface_id) {
+            if let Some(tile) = surface.tiles.iter_mut().find(|t| t.x == id.x && t.y == id.y) {
+                tile.invalid.set(true);
+                tile.valid_rect = valid_rect;
+            }
+        }
+    }
+
+    fn bind(&mut self, id: NativeTileId, dirty_rect: DeviceIntRect, valid_rect: DeviceIntRect) -> NativeSurfaceInfo {
+        let mut surface_info = NativeSurfaceInfo {
+            origin: DeviceIntPoint::zero(),
+            fbo_id: 0,
+        };
+
+        self.cur_tile = id;
+
+        if let Some(surface) = self.surfaces.get_mut(&id.surface_id) {
+            if let Some(tile) = surface.tiles.iter_mut().find(|t| t.x == id.x && t.y == id.y) {
+                assert_eq!(tile.valid_rect, valid_rect);
+                if valid_rect.is_empty() {
+                    return surface_info;
+                }
+
+                let mut stride = 0;
+                let mut buf = ptr::null_mut();
+                if self.use_native_compositor {
+                    if let Some(tile_info) = self.compositor.map_tile(id, dirty_rect, valid_rect) {
+                        stride = tile_info.stride;
+                        buf = tile_info.data;
+                    }
+                }
+                self.gl.set_texture_buffer(
+                    tile.color_id,
+                    gl::RGBA8,
+                    valid_rect.width(),
+                    valid_rect.height(),
+                    stride,
+                    buf,
+                    surface.tile_size.width,
+                    surface.tile_size.height,
+                );
+                // Reallocate the shared depth buffer to fit the valid rect, but within
+                // a buffer sized to actually fit at least the maximum possible tile size.
+                // The maximum tile size is supplied to avoid reallocation by ensuring the
+                // allocated buffer is actually big enough to accommodate the largest tile
+                // size requested by any used surface, even though supplied valid rect may
+                // actually be much smaller than this. This will only force a texture
+                // reallocation inside SWGL if the maximum tile size has grown since the
+                // last time it was supplied, instead simply reusing the buffer if the max
+                // tile size is not bigger than what was previously allocated.
+                self.gl.set_texture_buffer(
+                    self.depth_id.expect("depth texture should be initialized"),
+                    gl::DEPTH_COMPONENT,
+                    valid_rect.width(),
+                    valid_rect.height(),
+                    0,
+                    ptr::null_mut(),
+                    self.max_tile_size.width,
+                    self.max_tile_size.height,
+                );
+                surface_info.fbo_id = tile.fbo_id;
+                surface_info.origin -= valid_rect.min.to_vector();
+            }
+        }
+
+        surface_info
+    }
+
+    fn unbind(&mut self) {
+        let id = self.cur_tile;
+        if let Some(surface) = self.surfaces.get(&id.surface_id) {
+            if let Some(tile) = surface.tiles.iter().find(|t| t.x == id.x && t.y == id.y) {
+                if tile.valid_rect.is_empty() {
+                    // If we didn't actually render anything, then just queue any
+                    // dependencies.
+                    self.flush_composites(&id, surface, tile);
+                    return;
+                }
+
+                // Force any delayed clears to be resolved.
+                self.gl.resolve_framebuffer(tile.fbo_id);
+
+                if self.use_native_compositor {
+                    self.compositor.unmap_tile();
+                } else {
+                    // If we're not relying on a native compositor, then composite
+                    // any tiles that are dependent on this tile being updated but
+                    // are otherwise ready to composite.
+                    self.flush_composites(&id, surface, tile);
+                }
+            }
+        }
+    }
+
+    fn begin_frame(&mut self) {
+        self.reset_overlaps();
+
+        if self.use_native_compositor {
+            self.compositor.begin_frame();
+        }
+    }
+
+    fn add_surface(
+        &mut self,
+        id: NativeSurfaceId,
+        transform: CompositorSurfaceTransform,
+        clip_rect: DeviceIntRect,
+        filter: ImageRendering,
+    ) {
+        if self.use_native_compositor {
+            self.compositor.add_surface(id, transform, clip_rect, filter);
+        }
+
+        if self.composite_thread.is_some() {
+            // If the surface has an attached external image, try to lock that now.
+            self.try_lock_composite_surface(&id);
+
+            // If we're already busy compositing, then add to the queue of late
+            // surfaces instead of trying to sort into the main frame queue.
+            // These late surfaces will not have any overlap tracking done for
+            // them and must be processed synchronously at the end of the frame.
+            if self.is_compositing {
+                self.late_surfaces.push((id, transform, clip_rect, filter));
+                return;
+            }
+        }
+
+        self.frame_surfaces.push((id, transform, clip_rect, filter));
+    }
+
+    /// Now that all the dependency graph nodes have been built, start queuing
+    /// composition jobs. Any surfaces that get added after this point in the
+    /// frame will not have overlap dependencies assigned and so must instead
+    /// be added to the late_surfaces queue to be processed at the end of the
+    /// frame.
+    fn start_compositing(&mut self, clear_color: ColorF, dirty_rects: &[DeviceIntRect], _opaque_rects: &[DeviceIntRect]) {
+        self.is_compositing = true;
+
+        // Opaque rects are currently only computed here, not by WR itself, so we
+        // ignore the passed parameter and forward our own version onto the native
+        // compositor.
+        let mut opaque_rects: Vec<DeviceIntRect> = Vec::new();
+        for &(ref id, ref transform, ref clip_rect, _filter) in &self.frame_surfaces {
+            if let Some(surface) = self.surfaces.get(id) {
+                if !surface.is_opaque {
+                    continue;
+                }
+
+                for tile in &surface.tiles {
+                    if let Some(rect) = tile.overlap_rect(surface, transform, clip_rect) {
+                        opaque_rects.push(rect);
+                    }
+                }
+            }
+        }
+
+        self.compositor.start_compositing(clear_color, dirty_rects, &opaque_rects);
+
+        if let Some(dirty_rect) = dirty_rects
+            .iter()
+            .fold(DeviceIntRect::zero(), |acc, dirty_rect| acc.union(dirty_rect))
+            .to_non_empty()
+        {
+            // Factor dirty rect into surface clip rects
+            for &mut (_, _, ref mut clip_rect, _) in &mut self.frame_surfaces {
+                *clip_rect = clip_rect.intersection(&dirty_rect).unwrap_or_default();
+            }
+        }
+
+        self.occlude_surfaces();
+
+        // Discard surfaces that are entirely clipped out
+        self.frame_surfaces
+            .retain(|&(_, _, clip_rect, _)| !clip_rect.is_empty());
+
+        if let Some(ref composite_thread) = self.composite_thread {
+            // Compute overlap dependencies for surfaces.
+            for &(ref id, ref transform, ref clip_rect, _filter) in &self.frame_surfaces {
+                if let Some(surface) = self.surfaces.get(id) {
+                    for tile in &surface.tiles {
+                        self.init_overlaps(id, surface, tile, transform, clip_rect);
+                    }
+                }
+            }
+
+            self.locked_framebuffer = self.gl.lock_framebuffer(0);
+
+            composite_thread.prepare_for_composites();
+
+            // Issue any initial composite jobs for the SwComposite thread.
+            let mut lock = composite_thread.lock();
+            for &(ref id, ref transform, ref clip_rect, filter) in &self.frame_surfaces {
+                if let Some(surface) = self.surfaces.get(id) {
+                    for tile in &surface.tiles {
+                        if tile.overlaps.get() == 0 {
+                            // Not dependent on any tiles, so go ahead and composite now.
+                            self.queue_composite(surface, transform, clip_rect, filter, tile, &mut lock);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    fn end_frame(&mut self) {
+        self.is_compositing = false;
+
+        if self.use_native_compositor {
+            self.compositor.end_frame();
+        } else if let Some(ref composite_thread) = self.composite_thread {
+            // Need to wait for the SwComposite thread to finish any queued jobs.
+            composite_thread.wait_for_composites(false);
+
+            if !self.late_surfaces.is_empty() {
+                // All of the main frame surface have been processed by now. But if there
+                // are any late surfaces, we need to kick off a new synchronous composite
+                // phase. These late surfaces don't have any overlap/dependency tracking,
+                // so we just queue them directly and wait synchronously for the composite
+                // thread to process them in order.
+                composite_thread.prepare_for_composites();
+                {
+                    let mut lock = composite_thread.lock();
+                    for &(ref id, ref transform, ref clip_rect, filter) in &self.late_surfaces {
+                        if let Some(surface) = self.surfaces.get(id) {
+                            for tile in &surface.tiles {
+                                self.queue_composite(surface, transform, clip_rect, filter, tile, &mut lock);
+                            }
+                        }
+                    }
+                }
+                composite_thread.wait_for_composites(true);
+            }
+
+            self.locked_framebuffer = None;
+
+            self.unlock_composite_surfaces();
+        }
+
+        self.frame_surfaces.clear();
+        self.late_surfaces.clear();
+
+        self.reset_overlaps();
+    }
+
+    fn enable_native_compositor(&mut self, enable: bool) {
+        // TODO: The SwComposite thread is not properly instantiated if this is
+        // ever actually toggled.
+        assert_eq!(self.use_native_compositor, enable);
+        self.compositor.enable_native_compositor(enable);
+        self.use_native_compositor = enable;
+    }
+
+    fn get_capabilities(&self) -> CompositorCapabilities {
+        self.compositor.get_capabilities()
+    }
+
+    fn get_window_visibility(&self) -> WindowVisibility {
+        self.compositor.get_window_visibility()
+    }
+}
diff --git a/gfx/wr/webrender/src/debug_colors.rs b/gfx/wr/webrender/src/debug_colors.rs
new file mode 100644
index 0000000000..4ce8887126
--- /dev/null
+++ b/gfx/wr/webrender/src/debug_colors.rs
@@ -0,0 +1,159 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#![allow(dead_code)]
+#![cfg_attr(feature = "cargo-clippy", allow(clippy::excessive_precision))]
+
+use api::ColorF;
+
+// A subset of the standard CSS colors, useful for defining GPU tag colors etc.
+
+pub const INDIGO: ColorF = ColorF { r: 0.294117647059, g: 0.0, b: 0.509803921569, a: 1.0 };
+pub const GOLD: ColorF = ColorF { r: 1.0, g: 0.843137254902, b: 0.0, a: 1.0 };
+pub const FIREBRICK: ColorF = ColorF { r: 0.698039215686, g: 0.133333333333, b: 0.133333333333, a: 1.0 };
+pub const INDIANRED: ColorF = ColorF { r: 0.803921568627, g: 0.360784313725, b: 0.360784313725, a: 1.0 };
+pub const YELLOW: ColorF = ColorF { r: 1.0, g: 1.0, b: 0.0, a: 1.0 };
+pub const DARKOLIVEGREEN: ColorF = ColorF { r: 0.333333333333, g: 0.419607843137, b: 0.18431372549, a: 1.0 };
+pub const DARKSEAGREEN: ColorF = ColorF { r: 0.560784313725, g: 0.737254901961, b: 0.560784313725, a: 1.0 };
+pub const SLATEGREY: ColorF = ColorF { r: 0.439215686275, g: 0.501960784314, b: 0.564705882353, a: 1.0 };
+pub const DARKSLATEGREY: ColorF = ColorF { r: 0.18431372549, g: 0.309803921569, b: 0.309803921569, a: 1.0 };
+pub const MEDIUMVIOLETRED: ColorF = ColorF { r: 0.780392156863, g: 0.0823529411765, b: 0.521568627451, a: 1.0 };
+pub const MEDIUMORCHID: ColorF = ColorF { r: 0.729411764706, g: 0.333333333333, b: 0.827450980392, a: 1.0 };
+pub const CHARTREUSE: ColorF = ColorF { r: 0.498039215686, g: 1.0, b: 0.0, a: 1.0 };
+pub const MEDIUMSLATEBLUE: ColorF = ColorF { r: 0.482352941176, g: 0.407843137255, b: 0.933333333333, a: 1.0 };
+pub const BLACK: ColorF = ColorF { r: 0.0, g: 0.0, b: 0.0, a: 1.0 };
+pub const SPRINGGREEN: ColorF = ColorF { r: 0.0, g: 1.0, b: 0.498039215686, a: 1.0 };
+pub const CRIMSON: ColorF = ColorF { r: 0.862745098039, g: 0.078431372549, b: 0.235294117647, a: 1.0 };
+pub const LIGHTSALMON: ColorF = ColorF { r: 1.0, g: 0.627450980392, b: 0.478431372549, a: 1.0 };
+pub const BROWN: ColorF = ColorF { r: 0.647058823529, g: 0.164705882353, b: 0.164705882353, a: 1.0 };
+pub const TURQUOISE: ColorF = ColorF { r: 0.250980392157, g: 0.878431372549, b: 0.81568627451, a: 1.0 };
+pub const OLIVEDRAB: ColorF = ColorF { r: 0.419607843137, g: 0.556862745098, b: 0.137254901961, a: 1.0 };
+pub const CYAN: ColorF = ColorF { r: 0.0, g: 1.0, b: 1.0, a: 1.0 };
+pub const SILVER: ColorF = ColorF { r: 0.752941176471, g: 0.752941176471, b: 0.752941176471, a: 1.0 };
+pub const SKYBLUE: ColorF = ColorF { r: 0.529411764706, g: 0.807843137255, b: 0.921568627451, a: 1.0 };
+pub const GRAY: ColorF = ColorF { r: 0.501960784314, g: 0.501960784314, b: 0.501960784314, a: 1.0 };
+pub const DARKTURQUOISE: ColorF = ColorF { r: 0.0, g: 0.807843137255, b: 0.819607843137, a: 1.0 };
+pub const GOLDENROD: ColorF = ColorF { r: 0.854901960784, g: 0.647058823529, b: 0.125490196078, a: 1.0 };
+pub const DARKGREEN: ColorF = ColorF { r: 0.0, g: 0.392156862745, b: 0.0, a: 1.0 };
+pub const DARKVIOLET: ColorF = ColorF { r: 0.580392156863, g: 0.0, b: 0.827450980392, a: 1.0 };
+pub const DARKGRAY: ColorF = ColorF { r: 0.662745098039, g: 0.662745098039, b: 0.662745098039, a: 1.0 };
+pub const LIGHTPINK: ColorF = ColorF { r: 1.0, g: 0.713725490196, b: 0.756862745098, a: 1.0 };
+pub const TEAL: ColorF = ColorF { r: 0.0, g: 0.501960784314, b: 0.501960784314, a: 1.0 };
+pub const DARKMAGENTA: ColorF = ColorF { r: 0.545098039216, g: 0.0, b: 0.545098039216, a: 1.0 };
+pub const LIGHTGOLDENRODYELLOW: ColorF = ColorF { r: 0.980392156863, g: 0.980392156863, b: 0.823529411765, a: 1.0 };
+pub const LAVENDER: ColorF = ColorF { r: 0.901960784314, g: 0.901960784314, b: 0.980392156863, a: 1.0 };
+pub const YELLOWGREEN: ColorF = ColorF { r: 0.603921568627, g: 0.803921568627, b: 0.196078431373, a: 1.0 };
+pub const THISTLE: ColorF = ColorF { r: 0.847058823529, g: 0.749019607843, b: 0.847058823529, a: 1.0 };
+pub const VIOLET: ColorF = ColorF { r: 0.933333333333, g: 0.509803921569, b: 0.933333333333, a: 1.0 };
+pub const NAVY: ColorF = ColorF { r: 0.0, g: 0.0, b: 0.501960784314, a: 1.0 };
+pub const DIMGREY: ColorF = ColorF { r: 0.411764705882, g: 0.411764705882, b: 0.411764705882, a: 1.0 };
+pub const ORCHID: ColorF = ColorF { r: 0.854901960784, g: 0.439215686275, b: 0.839215686275, a: 1.0 };
+pub const BLUE: ColorF = ColorF { r: 0.0, g: 0.0, b: 1.0, a: 1.0 };
+pub const GHOSTWHITE: ColorF = ColorF { r: 0.972549019608, g: 0.972549019608, b: 1.0, a: 1.0 };
+pub const HONEYDEW: ColorF = ColorF { r: 0.941176470588, g: 1.0, b: 0.941176470588, a: 1.0 };
+pub const CORNFLOWERBLUE: ColorF = ColorF { r: 0.392156862745, g: 0.58431372549, b: 0.929411764706, a: 1.0 };
+pub const DARKBLUE: ColorF = ColorF { r: 0.0, g: 0.0, b: 0.545098039216, a: 1.0 };
+pub const DARKKHAKI: ColorF = ColorF { r: 0.741176470588, g: 0.717647058824, b: 0.419607843137, a: 1.0 };
+pub const MEDIUMPURPLE: ColorF = ColorF { r: 0.576470588235, g: 0.439215686275, b: 0.858823529412, a: 1.0 };
+pub const CORNSILK: ColorF = ColorF { r: 1.0, g: 0.972549019608, b: 0.862745098039, a: 1.0 };
+pub const RED: ColorF = ColorF { r: 1.0, g: 0.0, b: 0.0, a: 1.0 };
+pub const BISQUE: ColorF = ColorF { r: 1.0, g: 0.894117647059, b: 0.76862745098, a: 1.0 };
+pub const SLATEGRAY: ColorF = ColorF { r: 0.439215686275, g: 0.501960784314, b: 0.564705882353, a: 1.0 };
+pub const DARKCYAN: ColorF = ColorF { r: 0.0, g: 0.545098039216, b: 0.545098039216, a: 1.0 };
+pub const KHAKI: ColorF = ColorF { r: 0.941176470588, g: 0.901960784314, b: 0.549019607843, a: 1.0 };
+pub const WHEAT: ColorF = ColorF { r: 0.960784313725, g: 0.870588235294, b: 0.701960784314, a: 1.0 };
+pub const DEEPSKYBLUE: ColorF = ColorF { r: 0.0, g: 0.749019607843, b: 1.0, a: 1.0 };
+pub const REBECCAPURPLE: ColorF = ColorF { r: 0.4, g: 0.2, b: 0.6, a: 1.0 };
+pub const DARKRED: ColorF = ColorF { r: 0.545098039216, g: 0.0, b: 0.0, a: 1.0 };
+pub const STEELBLUE: ColorF = ColorF { r: 0.274509803922, g: 0.509803921569, b: 0.705882352941, a: 1.0 };
+pub const ALICEBLUE: ColorF = ColorF { r: 0.941176470588, g: 0.972549019608, b: 1.0, a: 1.0 };
+pub const LIGHTSLATEGREY: ColorF = ColorF { r: 0.466666666667, g: 0.533333333333, b: 0.6, a: 1.0 };
+pub const GAINSBORO: ColorF = ColorF { r: 0.862745098039, g: 0.862745098039, b: 0.862745098039, a: 1.0 };
+pub const MEDIUMTURQUOISE: ColorF = ColorF { r: 0.282352941176, g: 0.819607843137, b: 0.8, a: 1.0 };
+pub const FLORALWHITE: ColorF = ColorF { r: 1.0, g: 0.980392156863, b: 0.941176470588, a: 1.0 };
+pub const CORAL: ColorF = ColorF { r: 1.0, g: 0.498039215686, b: 0.313725490196, a: 1.0 };
+pub const PURPLE: ColorF = ColorF { r: 0.501960784314, g: 0.0, b: 0.501960784314, a: 1.0 };
+pub const LIGHTGREY: ColorF = ColorF { r: 0.827450980392, g: 0.827450980392, b: 0.827450980392, a: 1.0 };
+pub const LIGHTCYAN: ColorF = ColorF { r: 0.878431372549, g: 1.0, b: 1.0, a: 1.0 };
+pub const DARKSALMON: ColorF = ColorF { r: 0.913725490196, g: 0.588235294118, b: 0.478431372549, a: 1.0 };
+pub const BEIGE: ColorF = ColorF { r: 0.960784313725, g: 0.960784313725, b: 0.862745098039, a: 1.0 };
+pub const AZURE: ColorF = ColorF { r: 0.941176470588, g: 1.0, b: 1.0, a: 1.0 };
+pub const LIGHTSTEELBLUE: ColorF = ColorF { r: 0.690196078431, g: 0.76862745098, b: 0.870588235294, a: 1.0 };
+pub const OLDLACE: ColorF = ColorF { r: 0.992156862745, g: 0.960784313725, b: 0.901960784314, a: 1.0 };
+pub const GREENYELLOW: ColorF = ColorF { r: 0.678431372549, g: 1.0, b: 0.18431372549, a: 1.0 };
+pub const ROYALBLUE: ColorF = ColorF { r: 0.254901960784, g: 0.411764705882, b: 0.882352941176, a: 1.0 };
+pub const LIGHTSEAGREEN: ColorF = ColorF { r: 0.125490196078, g: 0.698039215686, b: 0.666666666667, a: 1.0 };
+pub const MISTYROSE: ColorF = ColorF { r: 1.0, g: 0.894117647059, b: 0.882352941176, a: 1.0 };
+pub const SIENNA: ColorF = ColorF { r: 0.627450980392, g: 0.321568627451, b: 0.176470588235, a: 1.0 };
+pub const LIGHTCORAL: ColorF = ColorF { r: 0.941176470588, g: 0.501960784314, b: 0.501960784314, a: 1.0 };
+pub const ORANGERED: ColorF = ColorF { r: 1.0, g: 0.270588235294, b: 0.0, a: 1.0 };
+pub const NAVAJOWHITE: ColorF = ColorF { r: 1.0, g: 0.870588235294, b: 0.678431372549, a: 1.0 };
+pub const LIME: ColorF = ColorF { r: 0.0, g: 1.0, b: 0.0, a: 1.0 };
+pub const PALEGREEN: ColorF = ColorF { r: 0.596078431373, g: 0.98431372549, b: 0.596078431373, a: 1.0 };
+pub const BURLYWOOD: ColorF = ColorF { r: 0.870588235294, g: 0.721568627451, b: 0.529411764706, a: 1.0 };
+pub const SEASHELL: ColorF = ColorF { r: 1.0, g: 0.960784313725, b: 0.933333333333, a: 1.0 };
+pub const MEDIUMSPRINGGREEN: ColorF = ColorF { r: 0.0, g: 0.980392156863, b: 0.603921568627, a: 1.0 };
+pub const FUCHSIA: ColorF = ColorF { r: 1.0, g: 0.0, b: 1.0, a: 1.0 };
+pub const PAPAYAWHIP: ColorF = ColorF { r: 1.0, g: 0.937254901961, b: 0.835294117647, a: 1.0 };
+pub const BLANCHEDALMOND: ColorF = ColorF { r: 1.0, g: 0.921568627451, b: 0.803921568627, a: 1.0 };
+pub const PERU: ColorF = ColorF { r: 0.803921568627, g: 0.521568627451, b: 0.247058823529, a: 1.0 };
+pub const AQUAMARINE: ColorF = ColorF { r: 0.498039215686, g: 1.0, b: 0.83137254902, a: 1.0 };
+pub const WHITE: ColorF = ColorF { r: 1.0, g: 1.0, b: 1.0, a: 1.0 };
+pub const DARKSLATEGRAY: ColorF = ColorF { r: 0.18431372549, g: 0.309803921569, b: 0.309803921569, a: 1.0 };
+pub const TOMATO: ColorF = ColorF { r: 1.0, g: 0.388235294118, b: 0.278431372549, a: 1.0 };
+pub const IVORY: ColorF = ColorF { r: 1.0, g: 1.0, b: 0.941176470588, a: 1.0 };
+pub const DODGERBLUE: ColorF = ColorF { r: 0.117647058824, g: 0.564705882353, b: 1.0, a: 1.0 };
+pub const LEMONCHIFFON: ColorF = ColorF { r: 1.0, g: 0.980392156863, b: 0.803921568627, a: 1.0 };
+pub const CHOCOLATE: ColorF = ColorF { r: 0.823529411765, g: 0.411764705882, b: 0.117647058824, a: 1.0 };
+pub const ORANGE: ColorF = ColorF { r: 1.0, g: 0.647058823529, b: 0.0, a: 1.0 };
+pub const FORESTGREEN: ColorF = ColorF { r: 0.133333333333, g: 0.545098039216, b: 0.133333333333, a: 1.0 };
+pub const DARKGREY: ColorF = ColorF { r: 0.662745098039, g: 0.662745098039, b: 0.662745098039, a: 1.0 };
+pub const OLIVE: ColorF = ColorF { r: 0.501960784314, g: 0.501960784314, b: 0.0, a: 1.0 };
+pub const MINTCREAM: ColorF = ColorF { r: 0.960784313725, g: 1.0, b: 0.980392156863, a: 1.0 };
+pub const ANTIQUEWHITE: ColorF = ColorF { r: 0.980392156863, g: 0.921568627451, b: 0.843137254902, a: 1.0 };
+pub const DARKORANGE: ColorF = ColorF { r: 1.0, g: 0.549019607843, b: 0.0, a: 1.0 };
+pub const CADETBLUE: ColorF = ColorF { r: 0.372549019608, g: 0.619607843137, b: 0.627450980392, a: 1.0 };
+pub const MOCCASIN: ColorF = ColorF { r: 1.0, g: 0.894117647059, b: 0.709803921569, a: 1.0 };
+pub const LIMEGREEN: ColorF = ColorF { r: 0.196078431373, g: 0.803921568627, b: 0.196078431373, a: 1.0 };
+pub const SADDLEBROWN: ColorF = ColorF { r: 0.545098039216, g: 0.270588235294, b: 0.0745098039216, a: 1.0 };
+pub const GREY: ColorF = ColorF { r: 0.501960784314, g: 0.501960784314, b: 0.501960784314, a: 1.0 };
+pub const DARKSLATEBLUE: ColorF = ColorF { r: 0.282352941176, g: 0.239215686275, b: 0.545098039216, a: 1.0 };
+pub const LIGHTSKYBLUE: ColorF = ColorF { r: 0.529411764706, g: 0.807843137255, b: 0.980392156863, a: 1.0 };
+pub const DEEPPINK: ColorF = ColorF { r: 1.0, g: 0.078431372549, b: 0.576470588235, a: 1.0 };
+pub const PLUM: ColorF = ColorF { r: 0.866666666667, g: 0.627450980392, b: 0.866666666667, a: 1.0 };
+pub const AQUA: ColorF = ColorF { r: 0.0, g: 1.0, b: 1.0, a: 1.0 };
+pub const DARKGOLDENROD: ColorF = ColorF { r: 0.721568627451, g: 0.525490196078, b: 0.043137254902, a: 1.0 };
+pub const MAROON: ColorF = ColorF { r: 0.501960784314, g: 0.0, b: 0.0, a: 1.0 };
+pub const SANDYBROWN: ColorF = ColorF { r: 0.956862745098, g: 0.643137254902, b: 0.376470588235, a: 1.0 };
+pub const MAGENTA: ColorF = ColorF { r: 1.0, g: 0.0, b: 1.0, a: 1.0 };
+pub const TAN: ColorF = ColorF { r: 0.823529411765, g: 0.705882352941, b: 0.549019607843, a: 1.0 };
+pub const ROSYBROWN: ColorF = ColorF { r: 0.737254901961, g: 0.560784313725, b: 0.560784313725, a: 1.0 };
+pub const PINK: ColorF = ColorF { r: 1.0, g: 0.752941176471, b: 0.796078431373, a: 1.0 };
+pub const LIGHTBLUE: ColorF = ColorF { r: 0.678431372549, g: 0.847058823529, b: 0.901960784314, a: 1.0 };
+pub const PALEVIOLETRED: ColorF = ColorF { r: 0.858823529412, g: 0.439215686275, b: 0.576470588235, a: 1.0 };
+pub const MEDIUMSEAGREEN: ColorF = ColorF { r: 0.235294117647, g: 0.701960784314, b: 0.443137254902, a: 1.0 };
+pub const SLATEBLUE: ColorF = ColorF { r: 0.41568627451, g: 0.352941176471, b: 0.803921568627, a: 1.0 };
+pub const DIMGRAY: ColorF = ColorF { r: 0.411764705882, g: 0.411764705882, b: 0.411764705882, a: 1.0 };
+pub const POWDERBLUE: ColorF = ColorF { r: 0.690196078431, g: 0.878431372549, b: 0.901960784314, a: 1.0 };
+pub const SEAGREEN: ColorF = ColorF { r: 0.180392156863, g: 0.545098039216, b: 0.341176470588, a: 1.0 };
+pub const SNOW: ColorF = ColorF { r: 1.0, g: 0.980392156863, b: 0.980392156863, a: 1.0 };
+pub const MEDIUMBLUE: ColorF = ColorF { r: 0.0, g: 0.0, b: 0.803921568627, a: 1.0 };
+pub const MIDNIGHTBLUE: ColorF = ColorF { r: 0.0980392156863, g: 0.0980392156863, b: 0.439215686275, a: 1.0 };
+pub const PALETURQUOISE: ColorF = ColorF { r: 0.686274509804, g: 0.933333333333, b: 0.933333333333, a: 1.0 };
+pub const PALEGOLDENROD: ColorF = ColorF { r: 0.933333333333, g: 0.909803921569, b: 0.666666666667, a: 1.0 };
+pub const WHITESMOKE: ColorF = ColorF { r: 0.960784313725, g: 0.960784313725, b: 0.960784313725, a: 1.0 };
+pub const DARKORCHID: ColorF = ColorF { r: 0.6, g: 0.196078431373, b: 0.8, a: 1.0 };
+pub const SALMON: ColorF = ColorF { r: 0.980392156863, g: 0.501960784314, b: 0.447058823529, a: 1.0 };
+pub const LIGHTSLATEGRAY: ColorF = ColorF { r: 0.466666666667, g: 0.533333333333, b: 0.6, a: 1.0 };
+pub const LAWNGREEN: ColorF = ColorF { r: 0.486274509804, g: 0.988235294118, b: 0.0, a: 1.0 };
+pub const LIGHTGREEN: ColorF = ColorF { r: 0.564705882353, g: 0.933333333333, b: 0.564705882353, a: 1.0 };
+pub const LIGHTGRAY: ColorF = ColorF { r: 0.827450980392, g: 0.827450980392, b: 0.827450980392, a: 1.0 };
+pub const HOTPINK: ColorF = ColorF { r: 1.0, g: 0.411764705882, b: 0.705882352941, a: 1.0 };
+pub const LIGHTYELLOW: ColorF = ColorF { r: 1.0, g: 1.0, b: 0.878431372549, a: 1.0 };
+pub const LAVENDERBLUSH: ColorF = ColorF { r: 1.0, g: 0.941176470588, b: 0.960784313725, a: 1.0 };
+pub const LINEN: ColorF = ColorF { r: 0.980392156863, g: 0.941176470588, b: 0.901960784314, a: 1.0 };
+pub const MEDIUMAQUAMARINE: ColorF = ColorF { r: 0.4, g: 0.803921568627, b: 0.666666666667, a: 1.0 };
+pub const GREEN: ColorF = ColorF { r: 0.0, g: 0.501960784314, b: 0.0, a: 1.0 };
+pub const BLUEVIOLET: ColorF = ColorF { r: 0.541176470588, g: 0.16862745098, b: 0.886274509804, a: 1.0 };
+pub const PEACHPUFF: ColorF = ColorF { r: 1.0, g: 0.854901960784, b: 0.725490196078, a: 1.0 };
diff --git a/gfx/wr/webrender/src/debug_font_data.rs b/gfx/wr/webrender/src/debug_font_data.rs
new file mode 100644
index 0000000000..a891bf0d38
--- /dev/null
+++ b/gfx/wr/webrender/src/debug_font_data.rs
@@ -0,0 +1,1914 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#[derive(Debug)]
+pub struct BakedGlyph {
+    pub x0: u32,
+    pub y0: u32,
+    pub x1: u32,
+    pub y1: u32,
+    pub xo: f32,
+    pub yo: f32,
+    pub xa: f32,
+}
+
+pub const FIRST_GLYPH_INDEX: i32 = 32;
+pub const BMP_WIDTH: i32 = 128;
+pub const BMP_HEIGHT: i32 = 128;
+pub const FONT_SIZE: i32 = 19;
+
+pub const GLYPHS: [BakedGlyph; 96] = [
+    BakedGlyph {
+        x0: 1,
+        y0: 1,
+        x1: 1,
+        y1: 1,
+        xo: 0.000000,
+        yo: 0.000000,
+        xa: 3.864407,
+    },
+    BakedGlyph {
+        x0: 2,
+        y0: 1,
+        x1: 5,
+        y1: 14,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 4.644068,
+    },
+    BakedGlyph {
+        x0: 6,
+        y0: 1,
+        x1: 11,
+        y1: 6,
+        xo: 1.000000,
+        yo: -13.000000,
+        xa: 6.644068,
+    },
+    BakedGlyph {
+        x0: 12,
+        y0: 1,
+        x1: 23,
+        y1: 13,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 11.067797,
+    },
+    BakedGlyph {
+        x0: 24,
+        y0: 1,
+        x1: 32,
+        y1: 17,
+        xo: 1.000000,
+        yo: -14.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 33,
+        y0: 1,
+        x1: 46,
+        y1: 14,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 14.084745,
+    },
+    BakedGlyph {
+        x0: 47,
+        y0: 1,
+        x1: 58,
+        y1: 14,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 10.983051,
+    },
+    BakedGlyph {
+        x0: 59,
+        y0: 1,
+        x1: 61,
+        y1: 6,
+        xo: 1.000000,
+        yo: -13.000000,
+        xa: 4.067797,
+    },
+    BakedGlyph {
+        x0: 62,
+        y0: 1,
+        x1: 67,
+        y1: 19,
+        xo: 1.000000,
+        yo: -14.000000,
+        xa: 5.254237,
+    },
+    BakedGlyph {
+        x0: 68,
+        y0: 1,
+        x1: 72,
+        y1: 19,
+        xo: 0.000000,
+        yo: -14.000000,
+        xa: 5.254237,
+    },
+    BakedGlyph {
+        x0: 73,
+        y0: 1,
+        x1: 81,
+        y1: 8,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 8.000000,
+    },
+    BakedGlyph {
+        x0: 82,
+        y0: 1,
+        x1: 91,
+        y1: 11,
+        xo: 0.000000,
+        yo: -10.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 92,
+        y0: 1,
+        x1: 95,
+        y1: 6,
+        xo: 0.000000,
+        yo: -2.000000,
+        xa: 4.169492,
+    },
+    BakedGlyph {
+        x0: 96,
+        y0: 1,
+        x1: 101,
+        y1: 3,
+        xo: 0.000000,
+        yo: -6.000000,
+        xa: 4.779661,
+    },
+    BakedGlyph {
+        x0: 102,
+        y0: 1,
+        x1: 105,
+        y1: 4,
+        xo: 1.000000,
+        yo: -2.000000,
+        xa: 4.169492,
+    },
+    BakedGlyph {
+        x0: 106,
+        y0: 1,
+        x1: 114,
+        y1: 19,
+        xo: -1.000000,
+        yo: -14.000000,
+        xa: 6.084746,
+    },
+    BakedGlyph {
+        x0: 115,
+        y0: 1,
+        x1: 123,
+        y1: 14,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 1,
+        y0: 20,
+        x1: 6,
+        y1: 32,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 7,
+        y0: 20,
+        x1: 15,
+        y1: 32,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 16,
+        y0: 20,
+        x1: 24,
+        y1: 33,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 25,
+        y0: 20,
+        x1: 34,
+        y1: 32,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 35,
+        y0: 20,
+        x1: 43,
+        y1: 33,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 44,
+        y0: 20,
+        x1: 52,
+        y1: 33,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 53,
+        y0: 20,
+        x1: 61,
+        y1: 32,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 62,
+        y0: 20,
+        x1: 70,
+        y1: 33,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 71,
+        y0: 20,
+        x1: 79,
+        y1: 33,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 80,
+        y0: 20,
+        x1: 83,
+        y1: 30,
+        xo: 1.000000,
+        yo: -9.000000,
+        xa: 4.169492,
+    },
+    BakedGlyph {
+        x0: 84,
+        y0: 20,
+        x1: 88,
+        y1: 32,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 4.169492,
+    },
+    BakedGlyph {
+        x0: 89,
+        y0: 20,
+        x1: 98,
+        y1: 28,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 99,
+        y0: 20,
+        x1: 108,
+        y1: 26,
+        xo: 0.000000,
+        yo: -8.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 109,
+        y0: 20,
+        x1: 118,
+        y1: 28,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 119,
+        y0: 20,
+        x1: 125,
+        y1: 33,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 6.440678,
+    },
+    BakedGlyph {
+        x0: 1,
+        y0: 34,
+        x1: 15,
+        y1: 49,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 15.932203,
+    },
+    BakedGlyph {
+        x0: 16,
+        y0: 34,
+        x1: 27,
+        y1: 46,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 10.864407,
+    },
+    BakedGlyph {
+        x0: 28,
+        y0: 34,
+        x1: 37,
+        y1: 47,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 10.677966,
+    },
+    BakedGlyph {
+        x0: 38,
+        y0: 34,
+        x1: 47,
+        y1: 47,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 10.322034,
+    },
+    BakedGlyph {
+        x0: 48,
+        y0: 34,
+        x1: 58,
+        y1: 47,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 11.898305,
+    },
+    BakedGlyph {
+        x0: 59,
+        y0: 34,
+        x1: 67,
+        y1: 46,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 9.406779,
+    },
+    BakedGlyph {
+        x0: 68,
+        y0: 34,
+        x1: 76,
+        y1: 46,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 8.813560,
+    },
+    BakedGlyph {
+        x0: 77,
+        y0: 34,
+        x1: 86,
+        y1: 47,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 11.152542,
+    },
+    BakedGlyph {
+        x0: 87,
+        y0: 34,
+        x1: 97,
+        y1: 46,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 11.728813,
+    },
+    BakedGlyph {
+        x0: 98,
+        y0: 34,
+        x1: 100,
+        y1: 46,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 4.203390,
+    },
+    BakedGlyph {
+        x0: 101,
+        y0: 34,
+        x1: 108,
+        y1: 47,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 8.254237,
+    },
+    BakedGlyph {
+        x0: 109,
+        y0: 34,
+        x1: 118,
+        y1: 46,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 10.152542,
+    },
+    BakedGlyph {
+        x0: 1,
+        y0: 50,
+        x1: 9,
+        y1: 62,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 8.508474,
+    },
+    BakedGlyph {
+        x0: 10,
+        y0: 50,
+        x1: 23,
+        y1: 62,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 14.661017,
+    },
+    BakedGlyph {
+        x0: 24,
+        y0: 50,
+        x1: 34,
+        y1: 62,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 12.016949,
+    },
+    BakedGlyph {
+        x0: 35,
+        y0: 50,
+        x1: 47,
+        y1: 63,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 13.118644,
+    },
+    BakedGlyph {
+        x0: 48,
+        y0: 50,
+        x1: 57,
+        y1: 62,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 10.033898,
+    },
+    BakedGlyph {
+        x0: 58,
+        y0: 50,
+        x1: 70,
+        y1: 66,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 13.118644,
+    },
+    BakedGlyph {
+        x0: 71,
+        y0: 50,
+        x1: 81,
+        y1: 62,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 10.474576,
+    },
+    BakedGlyph {
+        x0: 82,
+        y0: 50,
+        x1: 91,
+        y1: 63,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 8.762712,
+    },
+    BakedGlyph {
+        x0: 92,
+        y0: 50,
+        x1: 101,
+        y1: 62,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 9.288136,
+    },
+    BakedGlyph {
+        x0: 102,
+        y0: 50,
+        x1: 112,
+        y1: 63,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 11.525424,
+    },
+    BakedGlyph {
+        x0: 113,
+        y0: 50,
+        x1: 124,
+        y1: 62,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 10.576271,
+    },
+    BakedGlyph {
+        x0: 1,
+        y0: 67,
+        x1: 16,
+        y1: 79,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 15.610169,
+    },
+    BakedGlyph {
+        x0: 17,
+        y0: 67,
+        x1: 27,
+        y1: 79,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 10.305085,
+    },
+    BakedGlyph {
+        x0: 28,
+        y0: 67,
+        x1: 38,
+        y1: 79,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 9.644068,
+    },
+    BakedGlyph {
+        x0: 39,
+        y0: 67,
+        x1: 48,
+        y1: 79,
+        xo: 0.000000,
+        yo: -12.000000,
+        xa: 9.491526,
+    },
+    BakedGlyph {
+        x0: 49,
+        y0: 67,
+        x1: 54,
+        y1: 85,
+        xo: 1.000000,
+        yo: -14.000000,
+        xa: 5.254237,
+    },
+    BakedGlyph {
+        x0: 55,
+        y0: 67,
+        x1: 63,
+        y1: 85,
+        xo: -1.000000,
+        yo: -14.000000,
+        xa: 6.084746,
+    },
+    BakedGlyph {
+        x0: 64,
+        y0: 67,
+        x1: 68,
+        y1: 85,
+        xo: 0.000000,
+        yo: -14.000000,
+        xa: 5.254237,
+    },
+    BakedGlyph {
+        x0: 69,
+        y0: 67,
+        x1: 77,
+        y1: 74,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 78,
+        y0: 67,
+        x1: 88,
+        y1: 69,
+        xo: -1.000000,
+        yo: 2.000000,
+        xa: 8.305085,
+    },
+    BakedGlyph {
+        x0: 89,
+        y0: 67,
+        x1: 93,
+        y1: 72,
+        xo: 1.000000,
+        yo: -14.000000,
+        xa: 6.372881,
+    },
+    BakedGlyph {
+        x0: 94,
+        y0: 67,
+        x1: 102,
+        y1: 77,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 8.627119,
+    },
+    BakedGlyph {
+        x0: 103,
+        y0: 67,
+        x1: 111,
+        y1: 82,
+        xo: 1.000000,
+        yo: -14.000000,
+        xa: 9.881356,
+    },
+    BakedGlyph {
+        x0: 112,
+        y0: 67,
+        x1: 120,
+        y1: 77,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 7.796610,
+    },
+    BakedGlyph {
+        x0: 1,
+        y0: 86,
+        x1: 10,
+        y1: 101,
+        xo: 0.000000,
+        yo: -14.000000,
+        xa: 9.881356,
+    },
+    BakedGlyph {
+        x0: 11,
+        y0: 86,
+        x1: 20,
+        y1: 96,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 9.288136,
+    },
+    BakedGlyph {
+        x0: 21,
+        y0: 86,
+        x1: 27,
+        y1: 100,
+        xo: 1.000000,
+        yo: -14.000000,
+        xa: 6.372881,
+    },
+    BakedGlyph {
+        x0: 28,
+        y0: 86,
+        x1: 37,
+        y1: 99,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 9.711864,
+    },
+    BakedGlyph {
+        x0: 38,
+        y0: 86,
+        x1: 46,
+        y1: 100,
+        xo: 1.000000,
+        yo: -14.000000,
+        xa: 9.644068,
+    },
+    BakedGlyph {
+        x0: 47,
+        y0: 86,
+        x1: 49,
+        y1: 99,
+        xo: 1.000000,
+        yo: -13.000000,
+        xa: 4.016949,
+    },
+    BakedGlyph {
+        x0: 50,
+        y0: 86,
+        x1: 55,
+        y1: 103,
+        xo: -2.000000,
+        yo: -13.000000,
+        xa: 4.016949,
+    },
+    BakedGlyph {
+        x0: 56,
+        y0: 86,
+        x1: 64,
+        y1: 100,
+        xo: 1.000000,
+        yo: -14.000000,
+        xa: 8.389831,
+    },
+    BakedGlyph {
+        x0: 65,
+        y0: 86,
+        x1: 68,
+        y1: 101,
+        xo: 1.000000,
+        yo: -14.000000,
+        xa: 4.322034,
+    },
+    BakedGlyph {
+        x0: 69,
+        y0: 86,
+        x1: 82,
+        y1: 95,
+        xo: 1.000000,
+        yo: -9.000000,
+        xa: 14.627119,
+    },
+    BakedGlyph {
+        x0: 83,
+        y0: 86,
+        x1: 91,
+        y1: 95,
+        xo: 1.000000,
+        yo: -9.000000,
+        xa: 9.644068,
+    },
+    BakedGlyph {
+        x0: 92,
+        y0: 86,
+        x1: 101,
+        y1: 96,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 9.864407,
+    },
+    BakedGlyph {
+        x0: 102,
+        y0: 86,
+        x1: 110,
+        y1: 99,
+        xo: 1.000000,
+        yo: -9.000000,
+        xa: 9.881356,
+    },
+    BakedGlyph {
+        x0: 111,
+        y0: 86,
+        x1: 120,
+        y1: 99,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 9.881356,
+    },
+    BakedGlyph {
+        x0: 1,
+        y0: 104,
+        x1: 7,
+        y1: 113,
+        xo: 1.000000,
+        yo: -9.000000,
+        xa: 6.338983,
+    },
+    BakedGlyph {
+        x0: 8,
+        y0: 104,
+        x1: 15,
+        y1: 114,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 7.254237,
+    },
+    BakedGlyph {
+        x0: 16,
+        y0: 104,
+        x1: 22,
+        y1: 117,
+        xo: 1.000000,
+        yo: -12.000000,
+        xa: 6.559322,
+    },
+    BakedGlyph {
+        x0: 23,
+        y0: 104,
+        x1: 31,
+        y1: 114,
+        xo: 1.000000,
+        yo: -9.000000,
+        xa: 9.644068,
+    },
+    BakedGlyph {
+        x0: 32,
+        y0: 104,
+        x1: 40,
+        y1: 113,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 8.135593,
+    },
+    BakedGlyph {
+        x0: 41,
+        y0: 104,
+        x1: 54,
+        y1: 113,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 13.135593,
+    },
+    BakedGlyph {
+        x0: 55,
+        y0: 104,
+        x1: 63,
+        y1: 113,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 8.457627,
+    },
+    BakedGlyph {
+        x0: 64,
+        y0: 104,
+        x1: 72,
+        y1: 117,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 8.033898,
+    },
+    BakedGlyph {
+        x0: 73,
+        y0: 104,
+        x1: 81,
+        y1: 113,
+        xo: 0.000000,
+        yo: -9.000000,
+        xa: 7.711864,
+    },
+    BakedGlyph {
+        x0: 82,
+        y0: 104,
+        x1: 88,
+        y1: 122,
+        xo: 0.000000,
+        yo: -14.000000,
+        xa: 5.406780,
+    },
+    BakedGlyph {
+        x0: 89,
+        y0: 104,
+        x1: 91,
+        y1: 122,
+        xo: 1.000000,
+        yo: -14.000000,
+        xa: 4.440678,
+    },
+    BakedGlyph {
+        x0: 92,
+        y0: 104,
+        x1: 97,
+        y1: 122,
+        xo: 0.000000,
+        yo: -14.000000,
+        xa: 5.406780,
+    },
+    BakedGlyph {
+        x0: 98,
+        y0: 104,
+        x1: 107,
+        y1: 108,
+        xo: 0.000000,
+        yo: -7.000000,
+        xa: 9.559322,
+    },
+    BakedGlyph {
+        x0: 108,
+        y0: 104,
+        x1: 116,
+        y1: 117,
+        xo: 0.000000,
+        yo: -13.000000,
+        xa: 8.474576,
+    },
+];
+
+pub const FONT_BITMAP: [u8; 16384] = [
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x34, 0xae, 0x00, 0x00, 0x81, 0x90, 0x00, 0xe0, 0x31, 0x00, 0x00, 0x00, 0x00, 0x06,
+    0xba, 0x05, 0x00, 0x00, 0xa2, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x0d, 0x00, 0x00, 0x00,
+    0x00, 0x16, 0xb2, 0xec, 0xbc, 0x1f, 0x00, 0x00, 0x00, 0x49, 0x8c, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x0d, 0x9d, 0xeb, 0xe5, 0x89, 0x03, 0x00, 0x00, 0x00, 0x00, 0x81, 0x90, 0x00, 0x00, 0x00,
+    0x00, 0x13, 0x00, 0x00, 0x11, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, 0x6b, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0x76, 0x00,
+    0x3b, 0x70, 0x70, 0x70, 0x22, 0x00, 0x56, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x2d, 0x1e, 0x00, 0x00, 0x18, 0xac, 0xea, 0xd2, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x45, 0xe9, 0x00, 0x00, 0x8a, 0x99, 0x00, 0xf6, 0x2d, 0x00, 0x00, 0x00, 0x00, 0x2f,
+    0xd8, 0x00, 0x00, 0x08, 0xf7, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x52, 0x00, 0x00, 0x00,
+    0x00, 0xa5, 0x8f, 0x0c, 0x7d, 0xba, 0x00, 0x00, 0x06, 0xda, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x92, 0xb9, 0x20, 0x28, 0xd7, 0x64, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x99, 0x00, 0x00, 0x00,
+    0x28, 0xdd, 0x03, 0x00, 0xa5, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x83, 0x7f, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xaa, 0x00,
+    0x54, 0xa0, 0xa0, 0xa0, 0x31, 0x00, 0xbb, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+    0xe0, 0x34, 0x00, 0x0b, 0xd7, 0x95, 0x1d, 0x40, 0xe1, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x45, 0xe9, 0x00, 0x00, 0x7a, 0x88, 0x00, 0xe6, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x60,
+    0xa8, 0x00, 0x00, 0x34, 0xd3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0x6b, 0x0b, 0x00, 0x00,
+    0x00, 0xea, 0x22, 0x00, 0x10, 0xf7, 0x05, 0x00, 0x73, 0xab, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0xd6, 0x50, 0x00, 0x00, 0x81, 0x98, 0x00, 0x00, 0x00, 0x00, 0x7a, 0x88, 0x00, 0x00, 0x00,
+    0xb4, 0x6a, 0x00, 0x00, 0x2e, 0xe4, 0x0d, 0x00, 0x00, 0x24, 0xcd, 0x61, 0x65, 0x62, 0x61, 0xcd,
+    0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9b, 0x83, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c,
+    0xda, 0x00, 0x00, 0x5e, 0xd0, 0x03, 0x00, 0x00, 0x49, 0xe6, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x44, 0xe9, 0x00, 0x00, 0x5a, 0x67, 0x00, 0xbf, 0x02, 0x00, 0x1b, 0x86, 0x86, 0xc4,
+    0xc5, 0x86, 0x86, 0xb0, 0xd9, 0x86, 0x24, 0x00, 0x00, 0x57, 0xc9, 0xea, 0xd3, 0xef, 0x85, 0x00,
+    0x00, 0xed, 0x1e, 0x00, 0x0d, 0xf7, 0x07, 0x15, 0xe8, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0xbf, 0x65, 0x00, 0x00, 0xc6, 0x6a, 0x00, 0x00, 0x00, 0x00, 0x5a, 0x67, 0x00, 0x00, 0x47,
+    0xd7, 0x04, 0x00, 0x00, 0x00, 0x9d, 0x85, 0x00, 0x00, 0x0d, 0x55, 0x8d, 0xcc, 0xcc, 0x8c, 0x54,
+    0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, 0x45, 0x00, 0x00, 0x00, 0x00, 0x01, 0xe0, 0x2e, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x96,
+    0x81, 0x00, 0x00, 0xb7, 0x80, 0x00, 0x00, 0x00, 0x04, 0xf1, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x3d, 0xe3, 0x00, 0x00, 0x0f, 0x12, 0x00, 0x21, 0x00, 0x00, 0x13, 0x5f, 0x5f, 0xe0,
+    0x86, 0x5f, 0x5f, 0xc2, 0x9f, 0x5f, 0x1a, 0x00, 0x16, 0xf9, 0x3d, 0x01, 0x00, 0x03, 0x13, 0x00,
+    0x00, 0xae, 0x80, 0x04, 0x6c, 0xc4, 0x00, 0x96, 0x87, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x56, 0xe2, 0x1f, 0x8e, 0xc2, 0x06, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x12, 0x00, 0x00, 0xc7,
+    0x4f, 0x00, 0x00, 0x00, 0x00, 0x1a, 0xee, 0x0d, 0x00, 0x00, 0x00, 0x3b, 0xb4, 0xb5, 0x37, 0x00,
+    0x00, 0x00, 0x05, 0x5f, 0x5f, 0x5f, 0xd1, 0x8a, 0x5f, 0x5f, 0x3a, 0x00, 0x0b, 0x76, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0xea,
+    0x27, 0x00, 0x00, 0xd8, 0x4f, 0x00, 0x00, 0x00, 0x00, 0xc2, 0x67, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x35, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xf2,
+    0x16, 0x00, 0x00, 0xc6, 0x3f, 0x00, 0x00, 0x00, 0x65, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x1f, 0xc7, 0xef, 0xd1, 0x29, 0x2c, 0xe4, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x0d, 0xd1, 0xfa, 0x8f, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xf6,
+    0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0xca, 0x3b, 0x00, 0x00, 0x23, 0xec, 0x3c, 0x3d, 0xea, 0x21,
+    0x00, 0x00, 0x07, 0x93, 0x93, 0x93, 0xe0, 0xb0, 0x93, 0x93, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0xcd,
+    0x00, 0x00, 0x00, 0xf0, 0x3e, 0x00, 0x00, 0x00, 0x00, 0xb0, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x25, 0xc5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x25, 0xe4,
+    0x00, 0x00, 0x03, 0xf4, 0x0f, 0x00, 0x00, 0x00, 0x4d, 0xec, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0xba, 0x63, 0x07, 0x82, 0xc2, 0x8d, 0x0c, 0x00, 0x00, 0x00,
+    0x24, 0xda, 0x7b, 0x97, 0xc2, 0x0b, 0x00, 0x01, 0xa2, 0x03, 0x00, 0x00, 0x00, 0x00, 0x2a, 0xe1,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa3, 0x69, 0x00, 0x00, 0x0a, 0x5b, 0x00, 0x00, 0x5d, 0x0a,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa4, 0x74,
+    0x00, 0x00, 0x00, 0xf6, 0x3a, 0x00, 0x00, 0x00, 0x00, 0xab, 0x85, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x09, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x1e, 0x67, 0xbe,
+    0x1e, 0x1e, 0x40, 0xe4, 0x1e, 0x1e, 0x08, 0x00, 0x01, 0x96, 0xf2, 0x91, 0x2b, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x4c, 0xce, 0x02, 0x8c, 0xb1, 0x37, 0xa3, 0xa2, 0x00, 0x00, 0x00,
+    0xc6, 0x75, 0x00, 0x01, 0x9c, 0xbe, 0x0a, 0x28, 0xd1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x57, 0xbc,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7d, 0x96, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0xf1, 0x1c,
+    0x00, 0x00, 0x00, 0xde, 0x49, 0x00, 0x00, 0x00, 0x00, 0xbc, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0xcb, 0xea, 0xe2,
+    0xcb, 0xcb, 0xe1, 0xeb, 0xcb, 0xcb, 0x37, 0x00, 0x00, 0x00, 0x2d, 0x93, 0xee, 0xb4, 0x1f, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x05, 0xd8, 0x40, 0x00, 0xe2, 0x2e, 0x00, 0x19, 0xf5, 0x02, 0x00, 0x13,
+    0xff, 0x1d, 0x00, 0x00, 0x02, 0xa1, 0xbb, 0x94, 0x7b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, 0x96,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x56, 0xc4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0xc0, 0x00,
+    0x00, 0x00, 0x00, 0xc4, 0x6e, 0x00, 0x00, 0x00, 0x00, 0xe2, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, 0x52,
+    0x00, 0x00, 0x8b, 0x7d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0xa7, 0xd7, 0x04,
+    0x00, 0x00, 0x00, 0x00, 0x70, 0xad, 0x00, 0x00, 0xf2, 0x15, 0x00, 0x06, 0xf7, 0x0a, 0x00, 0x10,
+    0xff, 0x23, 0x00, 0x00, 0x00, 0x03, 0xb2, 0xfb, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x9b,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x31, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb3, 0x67, 0x00,
+    0x00, 0x00, 0x00, 0x77, 0xb5, 0x00, 0x00, 0x00, 0x2b, 0xf5, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x33, 0x8c, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe8, 0x20,
+    0x00, 0x00, 0xbd, 0x4b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xfc, 0x2c,
+    0x00, 0x00, 0x00, 0x13, 0xe7, 0x23, 0x00, 0x00, 0xc1, 0x5c, 0x00, 0x47, 0xd7, 0x00, 0x00, 0x00,
+    0xb5, 0xaf, 0x0d, 0x00, 0x0a, 0x60, 0xe1, 0xbd, 0xa5, 0x01, 0x00, 0x00, 0x00, 0x00, 0x51, 0xc1,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x82, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0xf4, 0x13, 0x00,
+    0x00, 0x00, 0x00, 0x19, 0xf3, 0x57, 0x00, 0x0b, 0xc0, 0x98, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7f, 0xfe, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0xec, 0x00,
+    0x00, 0x01, 0xee, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0xef, 0x0c,
+    0x00, 0x00, 0x00, 0x94, 0x8a, 0x00, 0x00, 0x00, 0x35, 0xe8, 0xc4, 0xed, 0x44, 0x00, 0x00, 0x00,
+    0x15, 0xa9, 0xf1, 0xdc, 0xea, 0xaf, 0x1e, 0x07, 0xc7, 0x73, 0x00, 0x00, 0x00, 0x00, 0x24, 0xe6,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa8, 0x62, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0xb3, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x3c, 0xea, 0xdb, 0xf2, 0xae, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x02, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x9b, 0x61, 0x52, 0x6c, 0xd1, 0x89, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x34, 0x10, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x03, 0x1b, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xf4,
+    0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0xcf, 0x35, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc1, 0x5a, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x2c, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x69, 0x8f, 0xe2, 0xa5, 0x28, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xba,
+    0x5e, 0x00, 0x00, 0x00, 0x00, 0x26, 0xe9, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0xf2, 0x0b, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x52, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x35,
+    0xe1, 0x0a, 0x00, 0x00, 0x00, 0xaf, 0x71, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xa6, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x93, 0x46, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xa1, 0x7d, 0x00, 0x00, 0x3e, 0xd9, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xcf, 0x4d, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x1a, 0xd2, 0x03, 0x00, 0xa0, 0x4f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0xed, 0x06, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x05, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x1a, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x28, 0xb6, 0x00, 0x14, 0x93, 0xe1, 0xe8, 0x99, 0x42, 0x00, 0x00, 0x00,
+    0x2b, 0xaa, 0xea, 0xf3, 0xca, 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0xbd,
+    0x30, 0x00, 0x00, 0x00, 0x63, 0xbe, 0xbe, 0xbe, 0xbe, 0x94, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d,
+    0x5a, 0x8c, 0x92, 0x00, 0x00, 0x9e, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0x3a, 0x00, 0x00, 0x39,
+    0xbe, 0xed, 0xde, 0x94, 0x0f, 0x00, 0x00, 0x00, 0x47, 0xc5, 0xf0, 0xce, 0x62, 0x00, 0x00, 0x00,
+    0x6a, 0x88, 0x00, 0x00, 0x00, 0x6a, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x2a, 0x29, 0x00, 0x02, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x18, 0x00, 0x00, 0x4a, 0x0a,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0xd0, 0xf3, 0xe5, 0xa3, 0x17, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x36, 0xe8, 0xfb, 0x00, 0x98, 0x8a, 0x21, 0x16, 0x68, 0xfb, 0x1d, 0x00, 0x00,
+    0x3a, 0x6a, 0x22, 0x11, 0x62, 0xf8, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0xd7, 0xed,
+    0x41, 0x00, 0x00, 0x00, 0x8e, 0x8e, 0x38, 0x38, 0x38, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x42, 0xdd,
+    0xa9, 0x5c, 0x2b, 0x00, 0x00, 0x39, 0x45, 0x45, 0x45, 0x45, 0x5a, 0xf1, 0x24, 0x00, 0x25, 0xef,
+    0x54, 0x05, 0x17, 0xad, 0xb3, 0x00, 0x00, 0x3e, 0xe9, 0x46, 0x08, 0x39, 0xe1, 0x65, 0x00, 0x00,
+    0xaf, 0xd8, 0x00, 0x00, 0x00, 0xaf, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x50, 0xbc,
+    0xe2, 0x4f, 0x00, 0x0a, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0x7c, 0x00, 0x02, 0xaa, 0xe9,
+    0x8b, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0x49, 0x0e, 0x16, 0xae, 0xb1, 0x00, 0x00, 0x00,
+    0x00, 0x0d, 0xad, 0xdf, 0x61, 0xfb, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0xba, 0x7a, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0xac, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0xc6, 0x50, 0xd4,
+    0x41, 0x00, 0x00, 0x00, 0x9b, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0xf1, 0x36,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xae, 0x71, 0x00, 0x00, 0x7d, 0xa3,
+    0x00, 0x00, 0x00, 0x18, 0xff, 0x13, 0x00, 0xb6, 0x78, 0x00, 0x00, 0x00, 0x4b, 0xe0, 0x04, 0x00,
+    0x02, 0x05, 0x00, 0x00, 0x00, 0x02, 0x05, 0x00, 0x00, 0x00, 0x00, 0x13, 0x76, 0xdf, 0xbe, 0x53,
+    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d,
+    0x84, 0xe6, 0xb1, 0x45, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0xeb, 0x00, 0x00, 0x00,
+    0x00, 0x01, 0x58, 0x08, 0x1e, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, 0xa8, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x87, 0x8a, 0x00, 0xd4,
+    0x41, 0x00, 0x00, 0x00, 0xad, 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0xd0, 0x54, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0xd4, 0x02, 0x00, 0x00, 0x8b, 0x93,
+    0x00, 0x00, 0x00, 0x1e, 0xff, 0x18, 0x00, 0xe2, 0x4c, 0x00, 0x00, 0x00, 0x06, 0xfb, 0x24, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x9d, 0xe9, 0x94, 0x2a, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x06, 0x5b, 0xc5, 0xd6, 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x75, 0xbd, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x1e, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 0x69, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x3e, 0xe4, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x39, 0xd0, 0x07, 0x00, 0xd4,
+    0x41, 0x00, 0x00, 0x00, 0xc5, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0xeb, 0x04, 0x01,
+    0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xcc, 0x58, 0x00, 0x00, 0x00, 0x45, 0xe5,
+    0x1a, 0x00, 0x00, 0x74, 0xc5, 0x00, 0x00, 0xd2, 0x80, 0x00, 0x00, 0x00, 0x00, 0xe9, 0x41, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0xd8, 0xcb, 0x58, 0x06, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x08, 0x97, 0x97, 0x97, 0x97, 0x97, 0x97, 0x97, 0x5c, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x21, 0x89, 0xef, 0x73, 0x00, 0x00, 0x00, 0x00, 0x2b, 0xe6, 0x32, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x1e, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd1, 0x06, 0x00, 0x00,
+    0x00, 0x00, 0x82, 0xd2, 0xed, 0x2e, 0x00, 0x00, 0x00, 0x00, 0x03, 0xcd, 0x3d, 0x00, 0x00, 0xd4,
+    0x41, 0x00, 0x00, 0x00, 0xc5, 0xf6, 0xe8, 0xb5, 0x3e, 0x00, 0x00, 0x00, 0x71, 0xdc, 0xc6, 0xef,
+    0xf3, 0xc1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0xd8, 0x02, 0x00, 0x00, 0x00, 0x00, 0x5c,
+    0xe9, 0x8e, 0x7d, 0xd8, 0x20, 0x00, 0x00, 0x82, 0xd4, 0x1c, 0x00, 0x00, 0x16, 0xed, 0x39, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x46, 0xb3, 0xe4, 0x81, 0x1b,
+    0x00, 0x00, 0x00, 0x05, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x37, 0x00, 0x00, 0x00, 0x01,
+    0x47, 0xb2, 0xe5, 0x81, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x1d, 0xdf, 0x49, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x1e, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x67, 0xe2, 0x1f, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x25, 0x4b, 0x95, 0xea, 0x3f, 0x00, 0x00, 0x00, 0x63, 0xac, 0x00, 0x00, 0x00, 0xd4,
+    0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x6c, 0xee, 0x5c, 0x00, 0x00, 0x94, 0xb6, 0x29, 0x02,
+    0x1d, 0x8a, 0xef, 0x11, 0x00, 0x00, 0x00, 0x00, 0xaf, 0x78, 0x00, 0x00, 0x00, 0x00, 0x03, 0x9e,
+    0xc5, 0x6f, 0xc8, 0xc2, 0x19, 0x00, 0x00, 0x0c, 0xab, 0xfd, 0xc6, 0xcd, 0xdb, 0xff, 0x17, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x8d, 0xea,
+    0xab, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x71, 0xda,
+    0xc7, 0x5b, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x1e, 0xfb, 0x00, 0x00, 0x00, 0x6f, 0xe1, 0x24, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x6a, 0xe0, 0x01, 0x00, 0x02, 0xd9, 0x45, 0x16, 0x16, 0x16, 0xd7,
+    0x51, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0xed, 0x01, 0x00, 0xa1, 0x81, 0x00, 0x00,
+    0x00, 0x06, 0xf1, 0x65, 0x00, 0x00, 0x00, 0x0d, 0xf6, 0x20, 0x00, 0x00, 0x00, 0x00, 0x76, 0xc8,
+    0x07, 0x00, 0x00, 0x63, 0xe6, 0x0a, 0x00, 0x00, 0x00, 0x16, 0x3b, 0x26, 0x3e, 0xf2, 0x00, 0x00,
+    0x56, 0x70, 0x00, 0x00, 0x00, 0x4a, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b,
+    0x66, 0x47, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x83, 0x35,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xea, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x1e, 0xfb, 0x00, 0x00, 0x59, 0xe6, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xff, 0x1c, 0x00, 0x1f, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xf7,
+    0xdc, 0x94, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0x20, 0x00, 0x64, 0x92, 0x00, 0x00,
+    0x00, 0x00, 0xb4, 0x81, 0x00, 0x00, 0x00, 0x5a, 0xc7, 0x00, 0x00, 0x00, 0x00, 0x00, 0xca, 0x72,
+    0x00, 0x00, 0x00, 0x00, 0xdc, 0x4d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8d, 0x9b, 0x00, 0x00,
+    0xbb, 0xe4, 0x00, 0x00, 0x00, 0x80, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x1e, 0xfb, 0x00, 0x13, 0xec, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x4c, 0xfc, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd4,
+    0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0xfe, 0x0d, 0x00, 0x21, 0xcc, 0x00, 0x00,
+    0x00, 0x00, 0xca, 0x63, 0x00, 0x00, 0x00, 0x9b, 0x8e, 0x00, 0x00, 0x00, 0x00, 0x00, 0xca, 0x5e,
+    0x00, 0x00, 0x00, 0x00, 0xce, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0xed, 0x18, 0x00, 0x00,
+    0x09, 0x11, 0x00, 0x00, 0x00, 0x9b, 0x83, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x1e, 0xfb, 0x00, 0x6d, 0xb6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x25, 0x04, 0x00, 0x01, 0x32, 0xc1, 0xa8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd4,
+    0x41, 0x00, 0x00, 0x22, 0x06, 0x00, 0x00, 0x10, 0xb0, 0xac, 0x00, 0x00, 0x00, 0xdd, 0x5b, 0x00,
+    0x00, 0x4b, 0xec, 0x10, 0x00, 0x00, 0x00, 0xc6, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7a, 0xcb,
+    0x11, 0x00, 0x00, 0x52, 0xeb, 0x14, 0x00, 0x00, 0x03, 0x22, 0x88, 0xda, 0x4b, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0xe0, 0x2e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x8e, 0x2b, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x1e, 0xfb, 0x00, 0x9b, 0xfb, 0xf6, 0xf6, 0xf6, 0xf6, 0xf6, 0x21, 0x00,
+    0xa4, 0xf0, 0xcc, 0xe7, 0xfd, 0x9d, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd4,
+    0x41, 0x00, 0x00, 0x8f, 0xf2, 0xce, 0xd2, 0xf4, 0xa2, 0x13, 0x00, 0x00, 0x00, 0x33, 0xe0, 0xd8,
+    0xcd, 0xe0, 0x3a, 0x00, 0x00, 0x00, 0x01, 0xf1, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x91,
+    0xea, 0xbe, 0xcf, 0xdc, 0x3f, 0x00, 0x00, 0x52, 0xe1, 0xbf, 0x89, 0x10, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x0b, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x31, 0xfe, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x17, 0x39, 0x2e, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x12, 0x37, 0x30, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x24,
+    0x28, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x0e, 0x34, 0x24, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x0e, 0x7d, 0xb2, 0xdc, 0xe5, 0xb8, 0x84, 0x14, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x30, 0xbd, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0xb2, 0xce, 0xd4,
+    0xc2, 0x9a, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x90, 0xce, 0xf7, 0xea, 0xb6, 0x4c, 0x00,
+    0x45, 0xb1, 0xcb, 0xd0, 0xb0, 0x8b, 0x34, 0x00, 0x00, 0x00, 0x00, 0x5a, 0xbe, 0xbe, 0xbe, 0xbe,
+    0xbe, 0xbe, 0x3a, 0x00, 0x5a, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0x2a, 0x00, 0x00, 0x00, 0x11,
+    0x8e, 0xcb, 0xf4, 0xdf, 0xbb, 0x5f, 0x00, 0x5a, 0x81, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb4,
+    0x27, 0x00, 0x5a, 0x81, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x94, 0x00, 0x5a, 0x81, 0x00,
+    0x00, 0x00, 0x00, 0x21, 0xb7, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x54, 0xe4, 0x9c, 0x4a, 0x1d, 0x13, 0x45, 0x8c, 0xe7, 0x5c, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xa6, 0xec, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0xc0, 0x2c, 0x27,
+    0x39, 0x74, 0xeb, 0x79, 0x00, 0x00, 0x00, 0x37, 0xe5, 0x9f, 0x43, 0x16, 0x29, 0x63, 0x62, 0x00,
+    0x79, 0xc1, 0x31, 0x2d, 0x4f, 0x7b, 0xe7, 0xa6, 0x0f, 0x00, 0x00, 0x79, 0xc2, 0x41, 0x41, 0x41,
+    0x41, 0x41, 0x14, 0x00, 0x79, 0xc2, 0x41, 0x41, 0x41, 0x41, 0x41, 0x0e, 0x00, 0x00, 0x37, 0xe4,
+    0x9e, 0x3b, 0x13, 0x28, 0x5b, 0x83, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2,
+    0x34, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0xc7, 0x00, 0x79, 0xad, 0x00,
+    0x00, 0x00, 0x1b, 0xda, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x39, 0xeb, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xe5, 0x44, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x1f, 0xed, 0x45, 0xea, 0x09, 0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00,
+    0x00, 0x00, 0x42, 0xf4, 0x06, 0x00, 0x08, 0xd9, 0x73, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x10, 0xb6, 0xa5, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0xda, 0x73,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2,
+    0x34, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0xc7, 0x00, 0x79, 0xad, 0x00,
+    0x00, 0x1b, 0xd9, 0x6b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x07, 0xd6, 0x67, 0x00, 0x00, 0x19, 0x63, 0x86, 0x70, 0x36, 0x00, 0x53, 0xe2, 0x0e, 0x00,
+    0x00, 0x00, 0x00, 0x89, 0x93, 0x00, 0xcb, 0x61, 0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00,
+    0x00, 0x00, 0x1b, 0xfd, 0x13, 0x00, 0x72, 0xd4, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0xf0, 0x4d, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x72, 0xd1, 0x03,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2,
+    0x34, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0xc7, 0x00, 0x79, 0xad, 0x00,
+    0x1d, 0xd9, 0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x4f, 0xd4, 0x02, 0x00, 0x3f, 0xe7, 0x94, 0x64, 0x8d, 0xe1, 0x00, 0x00, 0xc8, 0x50, 0x00,
+    0x00, 0x00, 0x08, 0xea, 0x29, 0x00, 0x60, 0xc9, 0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00,
+    0x00, 0x00, 0x80, 0xc2, 0x00, 0x00, 0xac, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x8d, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xac, 0x81, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2,
+    0x34, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0xc7, 0x00, 0x79, 0xad, 0x22,
+    0xda, 0x5f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x7b, 0x99, 0x00, 0x05, 0xdc, 0x5a, 0x00, 0x00, 0x34, 0xe1, 0x00, 0x00, 0x91, 0x82, 0x00,
+    0x00, 0x00, 0x5d, 0xc5, 0x00, 0x00, 0x0b, 0xf1, 0x32, 0x00, 0x00, 0x00, 0x79, 0xdf, 0x9c, 0x9c,
+    0xa6, 0xd5, 0xcc, 0x12, 0x00, 0x00, 0xd7, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0xbb, 0x00, 0x79, 0xe3, 0xa9, 0xa9, 0xa9,
+    0xa9, 0x75, 0x00, 0x00, 0x79, 0xdf, 0x9c, 0x9c, 0x9c, 0x9c, 0x5c, 0x00, 0x00, 0xd7, 0x5c, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0xe2, 0xa4, 0xa4, 0xa4, 0xa4, 0xa4, 0xa4, 0xfa,
+    0x34, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0xc7, 0x00, 0x79, 0xcf, 0xe6,
+    0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0xa7, 0x70, 0x00, 0x2b, 0xf3, 0x01, 0x00, 0x00, 0x34, 0xe1, 0x00, 0x00, 0x6c, 0xab, 0x00,
+    0x00, 0x00, 0xc1, 0x64, 0x00, 0x00, 0x00, 0x99, 0x97, 0x00, 0x00, 0x00, 0x79, 0xcc, 0x5f, 0x5f,
+    0x66, 0x80, 0xd4, 0xc1, 0x0f, 0x00, 0xe2, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6f, 0xc6, 0x00, 0x79, 0xc7, 0x52, 0x52, 0x52,
+    0x52, 0x39, 0x00, 0x00, 0x79, 0xcd, 0x63, 0x63, 0x63, 0x63, 0x3b, 0x00, 0x00, 0xe2, 0x52, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x16, 0x8f, 0x00, 0x79, 0xca, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0x5b, 0xf7,
+    0x34, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0xc7, 0x00, 0x79, 0xc5, 0xba,
+    0xd2, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0xb9, 0x60, 0x00, 0x3d, 0xe4, 0x00, 0x00, 0x00, 0x34, 0xe1, 0x00, 0x00, 0x79, 0x98, 0x00,
+    0x00, 0x24, 0xfb, 0x41, 0x34, 0x34, 0x34, 0x68, 0xf0, 0x0a, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00,
+    0x00, 0x00, 0x08, 0xce, 0x80, 0x00, 0xbb, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x98, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbb, 0x72, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x27, 0xff, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2,
+    0x34, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0xc7, 0x00, 0x79, 0xad, 0x01,
+    0x74, 0xea, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8f, 0x87, 0x00, 0x16, 0xfd, 0x15, 0x00, 0x00, 0x34, 0xe1, 0x00, 0x00, 0xb4, 0x6d, 0x00,
+    0x00, 0x85, 0xe3, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xee, 0x5e, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x89, 0xb2, 0x00, 0x90, 0xae, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xdb, 0x66, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0xaf, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x27, 0xff, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2,
+    0x34, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xc0, 0x00, 0x79, 0xad, 0x00,
+    0x00, 0x50, 0xec, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x64, 0xb2, 0x00, 0x00, 0xa8, 0xae, 0x0e, 0x00, 0x3d, 0xfb, 0x16, 0x4b, 0xec, 0x0f, 0x00,
+    0x02, 0xe2, 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xbf, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0xb4, 0x90, 0x00, 0x21, 0xf5, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xce, 0x06, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0xf6, 0x32,
+    0x00, 0x00, 0x00, 0x00, 0x27, 0xff, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2,
+    0x34, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb8, 0xa5, 0x00, 0x79, 0xad, 0x00,
+    0x00, 0x00, 0x52, 0xee, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x2a, 0xf3, 0x24, 0x00, 0x0f, 0x96, 0xe4, 0xe3, 0xc4, 0xa4, 0xf0, 0xcd, 0x33, 0x00, 0x00,
+    0x45, 0xe5, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0xfd, 0x22, 0x00, 0x79, 0xad, 0x00, 0x00,
+    0x00, 0x21, 0x8d, 0xed, 0x2a, 0x00, 0x00, 0x74, 0xec, 0x50, 0x04, 0x00, 0x00, 0x11, 0x33, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x0d, 0x37, 0xb5, 0xdf, 0x27, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0xec,
+    0x53, 0x05, 0x00, 0x00, 0x28, 0xff, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2,
+    0x34, 0x00, 0x79, 0xad, 0x00, 0x4f, 0x28, 0x00, 0x04, 0x48, 0xf5, 0x51, 0x00, 0x79, 0xad, 0x00,
+    0x00, 0x00, 0x00, 0x6e, 0xe1, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x8a, 0xbf, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xa4, 0x8d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0x81, 0x00, 0x65, 0xf3, 0xeb, 0xe4,
+    0xef, 0xe5, 0x97, 0x29, 0x00, 0x00, 0x00, 0x00, 0x4c, 0xdd, 0xf5, 0xd1, 0xe2, 0xf7, 0x95, 0x00,
+    0x65, 0xf3, 0xef, 0xeb, 0xf0, 0xcc, 0x74, 0x07, 0x00, 0x00, 0x00, 0x79, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xbe, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4b,
+    0xd9, 0xf7, 0xd1, 0xd4, 0xf2, 0xd1, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2,
+    0x34, 0x00, 0x79, 0xad, 0x00, 0x75, 0xf0, 0xd8, 0xe9, 0xfd, 0x86, 0x02, 0x00, 0x79, 0xad, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xad, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x08, 0xcf, 0xbd, 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x15,
+    0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x3a, 0x30, 0x0a, 0x00, 0x00,
+    0x00, 0x02, 0x10, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x10, 0x34, 0x24, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x30, 0x33, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x07, 0x78, 0xeb, 0xba, 0x91, 0x72, 0x7c, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x41, 0x68, 0x84, 0x76, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x5a, 0x81, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb7, 0x37, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x6e, 0x77, 0x00, 0x00, 0x5a, 0x8e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x7a, 0x5d, 0x00, 0x00, 0x00, 0x13, 0x93, 0xcd, 0xf3, 0xd3, 0x9e, 0x1e, 0x00, 0x00, 0x00, 0x00,
+    0x44, 0xaf, 0xca, 0xd3, 0xbc, 0x9b, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x93, 0xcd, 0xf3,
+    0xd3, 0x9e, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x44, 0xaf, 0xca, 0xd3, 0xbc, 0x99, 0x36, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x09, 0x63, 0xab, 0xee, 0xed, 0xb8, 0x4b, 0x00, 0x00, 0x84, 0xbe, 0xbe, 0xbe,
+    0xbe, 0xbe, 0xbe, 0xbe, 0xbb, 0x00, 0x64, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0xbe, 0x0a,
+    0x00, 0x85, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xb1, 0x34, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0xff, 0xc4, 0x01, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x1a, 0xf4, 0xb2, 0x00, 0x00, 0x79, 0xff, 0x78, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xa4, 0x7d, 0x00, 0x00, 0x3c, 0xe8, 0x9c, 0x3f, 0x16, 0x39, 0x8c, 0xee, 0x4f, 0x00, 0x00, 0x00,
+    0x79, 0xc2, 0x34, 0x2b, 0x3d, 0x7d, 0xed, 0x83, 0x00, 0x00, 0x00, 0x3c, 0xe8, 0x9c, 0x3f, 0x16,
+    0x39, 0x8c, 0xee, 0x4f, 0x00, 0x00, 0x00, 0x79, 0xc2, 0x35, 0x2b, 0x3c, 0x7b, 0xed, 0x78, 0x00,
+    0x00, 0x00, 0x00, 0x6a, 0xc2, 0x30, 0x08, 0x1e, 0x5e, 0x58, 0x00, 0x00, 0x2d, 0x41, 0x41, 0x41,
+    0xf2, 0x6b, 0x41, 0x41, 0x40, 0x00, 0x86, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0xff, 0x0d,
+    0x00, 0x63, 0xe1, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0xed, 0x07, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0xef, 0xdd, 0x4e, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x98, 0xcd, 0xc8, 0x00, 0x00, 0x79, 0xc4, 0xe6, 0x4b, 0x00, 0x00, 0x00, 0x00,
+    0xa4, 0x7d, 0x00, 0x09, 0xdd, 0x71, 0x00, 0x00, 0x00, 0x00, 0x00, 0x53, 0xed, 0x16, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x32, 0xfe, 0x27, 0x00, 0x09, 0xdd, 0x71, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x53, 0xed, 0x16, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x3b, 0xfd, 0x1a,
+    0x00, 0x00, 0x02, 0xd8, 0x2b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xee, 0x38, 0x00, 0x00, 0x00, 0x00, 0x86, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0xff, 0x0d,
+    0x00, 0x0f, 0xf7, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x98, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x37, 0xd8, 0x61, 0xcb, 0x01, 0x00,
+    0x00, 0x00, 0x1f, 0xea, 0x4a, 0xdd, 0x00, 0x00, 0x79, 0xa9, 0x3d, 0xe9, 0x1f, 0x00, 0x00, 0x00,
+    0xa4, 0x7d, 0x00, 0x76, 0xd4, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb9, 0x94, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x5b, 0x00, 0x76, 0xd4, 0x03, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0xb9, 0x94, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x49,
+    0x00, 0x00, 0x09, 0xfc, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xee, 0x38, 0x00, 0x00, 0x00, 0x00, 0x86, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0xff, 0x0d,
+    0x00, 0x00, 0xac, 0x93, 0x00, 0x00, 0x00, 0x00, 0x04, 0xe9, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0xc4, 0x05, 0xdf, 0x4a, 0x00,
+    0x00, 0x00, 0x97, 0x84, 0x27, 0xf3, 0x00, 0x00, 0x79, 0xa9, 0x00, 0x79, 0xc8, 0x06, 0x00, 0x00,
+    0xa4, 0x7d, 0x00, 0xad, 0x81, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0xcb, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x01, 0xed, 0x47, 0x00, 0xad, 0x81, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x62, 0xcb, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x09, 0xf9, 0x1f,
+    0x00, 0x00, 0x00, 0xaf, 0xc5, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xee, 0x38, 0x00, 0x00, 0x00, 0x00, 0x86, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0xff, 0x0d,
+    0x00, 0x00, 0x4f, 0xea, 0x05, 0x00, 0x00, 0x00, 0x49, 0xdc, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xb1, 0x00, 0x6a, 0xc2, 0x00,
+    0x00, 0x18, 0xed, 0x15, 0x14, 0xff, 0x09, 0x00, 0x79, 0xa9, 0x00, 0x02, 0xbc, 0x88, 0x00, 0x00,
+    0xa4, 0x7d, 0x00, 0xd8, 0x5c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0xf4, 0x02, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x75, 0xeb, 0x10, 0x00, 0xd8, 0x5c, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x3e, 0xf4, 0x02, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x08, 0x95, 0xc4, 0x00,
+    0x00, 0x00, 0x00, 0x0e, 0x9f, 0xf5, 0x9e, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xee, 0x38, 0x00, 0x00, 0x00, 0x00, 0x86, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0xff, 0x0d,
+    0x00, 0x00, 0x05, 0xe8, 0x4a, 0x00, 0x00, 0x00, 0xa4, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x75, 0x9d, 0x00, 0x09, 0xe8, 0x3a,
+    0x00, 0x88, 0x92, 0x00, 0x03, 0xfd, 0x1b, 0x00, 0x79, 0xa9, 0x00, 0x00, 0x1b, 0xea, 0x3c, 0x00,
+    0xa4, 0x7d, 0x00, 0xe1, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x35, 0xfa, 0x05, 0x00,
+    0x79, 0xd9, 0x8a, 0x90, 0xa1, 0xdb, 0xda, 0x38, 0x00, 0x00, 0xe2, 0x54, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x35, 0xfc, 0x06, 0x00, 0x79, 0xe9, 0xba, 0xbd, 0xc9, 0xf2, 0x90, 0x0f, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x25, 0x92, 0xf2, 0xa9, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xee, 0x38, 0x00, 0x00, 0x00, 0x00, 0x86, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0xff, 0x0d,
+    0x00, 0x00, 0x00, 0x8c, 0xa9, 0x00, 0x00, 0x0f, 0xf3, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x83, 0x90, 0x00, 0x00, 0x79, 0xb0,
+    0x0d, 0xea, 0x1f, 0x00, 0x00, 0xf2, 0x29, 0x00, 0x79, 0xa9, 0x00, 0x00, 0x00, 0x57, 0xdc, 0x0b,
+    0xa4, 0x7d, 0x00, 0xb8, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0xd6, 0x00, 0x00,
+    0x79, 0xd2, 0x75, 0x6f, 0x5a, 0x37, 0x01, 0x00, 0x00, 0x00, 0xb9, 0x77, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x59, 0xdc, 0x00, 0x00, 0x79, 0xc2, 0x41, 0x40, 0x4a, 0xf0, 0x40, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0xc5, 0xb4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xee, 0x38, 0x00, 0x00, 0x00, 0x00, 0x84, 0xa4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0xff, 0x0b,
+    0x00, 0x00, 0x00, 0x29, 0xf7, 0x12, 0x00, 0x66, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x91, 0x85, 0x00, 0x00, 0x10, 0xf0,
+    0x96, 0xa0, 0x00, 0x00, 0x00, 0xe7, 0x38, 0x00, 0x79, 0xa9, 0x00, 0x00, 0x00, 0x00, 0xab, 0x8a,
+    0xa4, 0x7d, 0x00, 0x8b, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0xa9, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8b, 0xb8, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x9a, 0xab, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x62, 0xe3, 0x12, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0xfd, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xee, 0x38, 0x00, 0x00, 0x00, 0x00, 0x6b, 0xc2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0xef, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xbe, 0x6e, 0x00, 0xca, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9f, 0x7a, 0x00, 0x00, 0x00, 0x8e,
+    0xff, 0x2d, 0x00, 0x00, 0x00, 0xdb, 0x47, 0x00, 0x79, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x16, 0xe8,
+    0xcf, 0x7d, 0x00, 0x1d, 0xf3, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0xf7, 0x31, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0xf4, 0x43, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x29, 0xf9, 0x32, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0xb1, 0xa4, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a, 0xe9, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xee, 0x38, 0x00, 0x00, 0x00, 0x00, 0x3e, 0xf3, 0x16, 0x00, 0x00, 0x00, 0x00, 0x86, 0xc0, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x55, 0xd7, 0x35, 0xe4, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6f, 0x00, 0x00, 0x00, 0x15,
+    0x46, 0x00, 0x00, 0x00, 0x00, 0xd0, 0x56, 0x00, 0x79, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61,
+    0xff, 0x7d, 0x00, 0x00, 0x6b, 0xf1, 0x5c, 0x05, 0x00, 0x03, 0x4c, 0xe9, 0x84, 0x00, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6d, 0xf1, 0x5c, 0x05, 0x00,
+    0x03, 0x4c, 0xe9, 0x84, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x1b, 0xf0, 0x44,
+    0x00, 0x00, 0x15, 0x47, 0x02, 0x00, 0x00, 0x15, 0xb7, 0x7b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xee, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc4, 0xb7, 0x14, 0x00, 0x00, 0x4f, 0xf5, 0x4a, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x04, 0xe1, 0xd3, 0x7b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x79, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x34, 0x00, 0xbb, 0x64, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0xc4, 0x65, 0x00, 0x79, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+    0xc4, 0x7d, 0x00, 0x00, 0x00, 0x41, 0xd4, 0xf7, 0xd4, 0xf4, 0xde, 0x52, 0x00, 0x00, 0x00, 0x00,
+    0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0xd4, 0xf7, 0xd4,
+    0xf4, 0xd5, 0x5f, 0x00, 0x00, 0x00, 0x00, 0x79, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x75, 0xcc,
+    0x01, 0x00, 0x33, 0xda, 0xee, 0xc9, 0xd9, 0xdc, 0x99, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xee, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0xaf, 0xf6, 0xd5, 0xe7, 0xeb, 0x58, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x75, 0xf7, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x35, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x94,
+    0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x1b, 0x38, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x32, 0x1f, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12,
+    0xd6, 0xa9, 0x37, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x0a, 0x75, 0xce, 0xf0, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x3e, 0xa4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0xb1,
+    0x00, 0x18, 0xba, 0x29, 0x00, 0x00, 0x00, 0x00, 0x07, 0xac, 0x40, 0x00, 0x77, 0x85, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x07, 0xb2, 0x32, 0x00, 0x06, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0x7a,
+    0x00, 0x15, 0x45, 0x45, 0x45, 0x08, 0x00, 0x18, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x3c, 0x45, 0x45, 0x27, 0x00, 0x00, 0x00, 0x07, 0xb1, 0x65, 0x00, 0x00, 0x00, 0x00, 0x12, 0xcf,
+    0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0x51, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c,
+    0xc9, 0xea, 0xe5, 0xad, 0x20, 0x00, 0x00, 0x02, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x1e, 0xa5, 0xde, 0xea, 0xc5, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x2c, 0xfc, 0x09, 0x00, 0x00, 0x00, 0x00, 0x39, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x62, 0xc5,
+    0x00, 0x00, 0x81, 0xc9, 0x04, 0x00, 0x00, 0x00, 0x87, 0xbb, 0x01, 0x00, 0x28, 0xf8, 0x29, 0x00,
+    0x00, 0x00, 0x00, 0x6f, 0xc4, 0x01, 0x00, 0x02, 0x41, 0x41, 0x41, 0x41, 0x41, 0x49, 0xe8, 0x72,
+    0x00, 0x4e, 0xe4, 0x93, 0x93, 0x11, 0x00, 0x20, 0xee, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x7f, 0x93, 0xc8, 0x8f, 0x00, 0x00, 0x00, 0x75, 0xae, 0xe3, 0x14, 0x00, 0x00, 0x00, 0x03, 0x23,
+    0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x0d, 0x00, 0xae, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x09,
+    0x35, 0x12, 0x1b, 0x8f, 0xd4, 0x03, 0x00, 0x89, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x1f, 0xeb, 0xb4, 0x47, 0x13, 0x3e, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x05, 0xf9, 0x33, 0x00, 0x00, 0x00, 0x05, 0xec, 0x96, 0x00, 0x00, 0x00, 0x00, 0x94, 0x96,
+    0x00, 0x00, 0x05, 0xcd, 0x78, 0x00, 0x00, 0x37, 0xec, 0x1d, 0x00, 0x00, 0x00, 0x95, 0xa9, 0x00,
+    0x00, 0x00, 0x0c, 0xe7, 0x35, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa4, 0xaa, 0x01,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc4, 0x51, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x00, 0x14, 0xe3, 0x19, 0x7d, 0x93, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0xe0, 0x76, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x05, 0xef, 0x2b, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0xa3, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0xd2, 0x62, 0x00, 0x00, 0x00, 0x47, 0xee, 0xe7, 0x02, 0x00, 0x00, 0x00, 0xc7, 0x66,
+    0x00, 0x00, 0x00, 0x2c, 0xf1, 0x2b, 0x08, 0xd6, 0x5d, 0x00, 0x00, 0x00, 0x00, 0x15, 0xee, 0x35,
+    0x00, 0x00, 0x81, 0xa2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0xd9, 0x0d, 0x00,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6b, 0xab, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x00, 0x92, 0x7e, 0x00, 0x0a, 0xdf, 0x27, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xc2, 0x3d, 0x00, 0x00, 0x00,
+    0x03, 0x27, 0x3b, 0x24, 0xd7, 0x44, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0xe6, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x9f, 0x96, 0x00, 0x00, 0x00, 0x9c, 0x7a, 0xe1, 0x3e, 0x00, 0x00, 0x04, 0xf5, 0x31,
+    0x00, 0x00, 0x00, 0x00, 0x75, 0xcb, 0x8e, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, 0xcd,
+    0x03, 0x23, 0xe7, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0xee, 0x2f, 0x00, 0x00,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xf2, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x26, 0xe0, 0x0b, 0x00, 0x00, 0x61, 0xb0, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x45,
+    0xde, 0xd0, 0xb7, 0xce, 0xfc, 0x45, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x08, 0xfe, 0x2b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x6a, 0xca, 0x00, 0x00, 0x05, 0xec, 0x22, 0x8d, 0x92, 0x00, 0x00, 0x2d, 0xf5, 0x05,
+    0x00, 0x00, 0x00, 0x00, 0x03, 0xcb, 0xf2, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xc7,
+    0x6e, 0xb5, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xd0, 0x6d, 0x00, 0x00, 0x00,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb7, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0xaf, 0x62, 0x00, 0x00, 0x00, 0x02, 0xd0, 0x3f, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xf0,
+    0x54, 0x00, 0x00, 0x00, 0xd4, 0x45, 0x00, 0x8a, 0xb6, 0xa9, 0xdd, 0xd7, 0x9c, 0x15, 0x00, 0x00,
+    0x00, 0xf0, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x34, 0xf9, 0x09, 0x00, 0x48, 0xc6, 0x00, 0x33, 0xe4, 0x03, 0x00, 0x66, 0xc3, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x1e, 0xeb, 0xe9, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e,
+    0xf4, 0xc9, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x87, 0xb9, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5e, 0xba, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x31, 0x03, 0x00, 0x00, 0x00, 0x00, 0x26, 0x0d, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0xfc,
+    0x04, 0x00, 0x00, 0x00, 0xd4, 0x45, 0x00, 0x8a, 0xdb, 0x56, 0x19, 0x2e, 0x95, 0xe3, 0x11, 0x00,
+    0x00, 0xbf, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x04, 0xf0, 0x41, 0x00, 0xa7, 0x6a, 0x00, 0x00, 0xd6, 0x4a, 0x00, 0xa5, 0x84, 0x00,
+    0x00, 0x00, 0x00, 0x02, 0xbd, 0x79, 0x49, 0xe9, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xc0, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x35, 0xec, 0x1c, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xf3, 0x18, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0xf1,
+    0x58, 0x00, 0x00, 0x00, 0xd4, 0x45, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xb4, 0x83, 0x00,
+    0x00, 0x42, 0xef, 0x4c, 0x01, 0x00, 0x0b, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0xb2, 0x7f, 0x12, 0xef, 0x12, 0x00, 0x00, 0x76, 0xaf, 0x00, 0xe4, 0x41, 0x00,
+    0x00, 0x00, 0x00, 0x6f, 0xc7, 0x03, 0x00, 0x9b, 0xb0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xbe, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xcf, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0x6e, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4b,
+    0xe4, 0xdf, 0xce, 0xdb, 0xf0, 0x39, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x5b, 0xc7, 0x00,
+    0x00, 0x00, 0x50, 0xe1, 0xf1, 0xda, 0xee, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x6d, 0xc7, 0x6c, 0xa2, 0x00, 0x00, 0x00, 0x17, 0xf6, 0x3e, 0xf4, 0x08, 0x00,
+    0x00, 0x00, 0x1c, 0xed, 0x29, 0x00, 0x00, 0x0f, 0xe4, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xbe, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xc3, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0xc8, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x03, 0x1f, 0x2c, 0x1a, 0x01, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x44, 0xe8, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x15, 0x23, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x1c, 0xfb, 0xd9, 0x3a, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xd7, 0xad, 0x00, 0x00,
+    0x00, 0x00, 0xa8, 0x82, 0x00, 0x00, 0x00, 0x00, 0x53, 0xdf, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xbe, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0xec, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xef, 0x23, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x80, 0xcd, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xc6, 0xd0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0xff, 0x5c, 0x00, 0x00,
+    0x00, 0x40, 0xe4, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xbe, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xcf,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9e, 0x7d, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x96, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0xd6, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x26, 0x8e, 0xf0, 0x19, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0xe7, 0x31, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0xf4, 0xdc, 0xd5, 0xfd, 0xc8, 0x2d, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x91, 0x8b, 0x00, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x1d, 0x2d, 0x0f, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x37, 0xe3, 0x02, 0x00,
+    0x00, 0x00, 0x7d, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x4e, 0xec, 0xb6, 0xb6, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xdc, 0x40, 0x00,
+    0x9d, 0xb6, 0xda, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x0b, 0x23, 0x23, 0x23, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x10, 0x00,
+    0x1e, 0x23, 0x23, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x0c, 0x00, 0x00, 0x00, 0x24, 0xae, 0xe4,
+    0xa9, 0x40, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x11, 0x00, 0x00, 0x00, 0x00, 0x17, 0x98,
+    0xd1, 0xe4, 0xce, 0xa5, 0x20, 0x00, 0x02, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14,
+    0x14, 0x00, 0x00, 0x00, 0x00, 0x14, 0x14, 0x00, 0x02, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x02, 0x0e, 0x00, 0x00, 0x48, 0xb3, 0xd3, 0xe5, 0xa4, 0x3e, 0x2f, 0xa4, 0xdd, 0xdb, 0xa5,
+    0x17, 0x00, 0x00, 0x48, 0xb1, 0xd2, 0xe6, 0xcf, 0x8e, 0x09, 0x00, 0x00, 0x00, 0x00, 0x1b, 0xa3,
+    0xdf, 0xd8, 0x94, 0x0e, 0x00, 0x00, 0x49, 0xb7, 0xd7, 0xe7, 0xc5, 0x80, 0x08, 0x00, 0x00, 0x00,
+    0x00, 0x11, 0x8f, 0xcb, 0xe7, 0xd4, 0xb1, 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xab, 0x6c, 0x00, 0x00, 0x1f, 0xed, 0x72, 0x18,
+    0x55, 0xd2, 0x8a, 0x00, 0x00, 0x00, 0x56, 0xbc, 0xe7, 0xec, 0x4e, 0x00, 0x00, 0x1d, 0xe9, 0x93,
+    0x30, 0x18, 0x2f, 0xe3, 0x41, 0x00, 0x89, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6,
+    0xb6, 0x00, 0x00, 0x00, 0x00, 0xb6, 0xb6, 0x00, 0x89, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8d, 0x8a, 0x00, 0x00, 0x8a, 0xab, 0x24, 0x18, 0x43, 0xe0, 0xe3, 0x5f, 0x20, 0x29, 0xa4,
+    0xc8, 0x00, 0x00, 0x8a, 0xac, 0x25, 0x17, 0x35, 0xbd, 0xab, 0x00, 0x00, 0x00, 0x1b, 0xea, 0xb6,
+    0x3e, 0x4d, 0xc7, 0xd6, 0x09, 0x00, 0x8a, 0xac, 0x25, 0x1c, 0x6c, 0xd3, 0xcf, 0x07, 0x00, 0x00,
+    0x15, 0xe3, 0xc8, 0x60, 0x19, 0x29, 0xc3, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0xa5, 0x8c, 0x00, 0x00,
+    0x00, 0x46, 0xf7, 0x0d, 0x00, 0x0a, 0xf1, 0x45, 0x03, 0x01, 0x04, 0x00, 0x00, 0xa4, 0xa1, 0x00,
+    0x00, 0x00, 0x00, 0xd8, 0x41, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x33,
+    0x33, 0x00, 0x00, 0x00, 0x00, 0x33, 0x33, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8f, 0x8a, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x68, 0xbf, 0x00, 0x00, 0x00, 0x11,
+    0xfb, 0x1f, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x1d, 0xfa, 0x18, 0x00, 0x00, 0x9f, 0xb8, 0x00,
+    0x00, 0x00, 0x00, 0xdb, 0x78, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x03, 0xe8, 0x78, 0x00, 0x00,
+    0x9b, 0xcd, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0xe3, 0x4b, 0x16, 0x16,
+    0x16, 0x1c, 0xf5, 0x2f, 0x00, 0x4d, 0xc5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe9, 0x3e, 0x00,
+    0x00, 0x00, 0x00, 0xd8, 0x41, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8f, 0x8a, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x3d, 0xe0, 0x00, 0x00, 0x00, 0x00,
+    0xe2, 0x3a, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xe4, 0x3a, 0x00, 0x00, 0xe4, 0x6e, 0x00,
+    0x00, 0x00, 0x00, 0x90, 0xbf, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x98, 0xc1, 0x00, 0x00,
+    0xe2, 0x79, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x08, 0xfe, 0xdd, 0xd8, 0xd8,
+    0xd8, 0xd8, 0xd8, 0x3a, 0x00, 0x87, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xfe, 0x24, 0x00,
+    0x00, 0x00, 0x00, 0xd8, 0x41, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x69,
+    0x6d, 0x00, 0x00, 0x00, 0x00, 0x69, 0x6d, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8f, 0x8a, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x31, 0xe9, 0x00, 0x00, 0x00, 0x00,
+    0xd5, 0x45, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xd2, 0x49, 0x00, 0x08, 0xfe, 0x2e, 0x00,
+    0x00, 0x00, 0x00, 0x50, 0xe3, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x4b, 0xe7, 0x00, 0x08,
+    0xfe, 0x2d, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x22, 0xa8, 0xdd, 0xd8, 0x9d, 0xc7, 0x6c, 0x00, 0x00, 0xf3, 0x4f, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0xe4, 0xc2, 0xc2, 0xc2, 0x10, 0x00, 0x00, 0xe3, 0x45, 0x00,
+    0x00, 0x00, 0x00, 0xd8, 0x41, 0x00, 0x8a, 0xcf, 0xc2, 0xe4, 0xce, 0x8e, 0x0a, 0x00, 0x00, 0x8a,
+    0x8f, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x0d, 0xad, 0x43, 0x00,
+    0x00, 0x8f, 0x8a, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x30, 0xe9, 0x00, 0x00, 0x00, 0x00,
+    0xd4, 0x45, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xcf, 0x49, 0x00, 0x00, 0xee, 0x5b, 0x00,
+    0x00, 0x00, 0x00, 0x7e, 0xca, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x50, 0xd2, 0x00, 0x00,
+    0xf2, 0x32, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x24, 0xed, 0x7f, 0x28, 0x1d, 0x62, 0xe8, 0x6c, 0x00, 0x00, 0xc3, 0xa2, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0xa2, 0x2b, 0x2b, 0x2b, 0x04, 0x00, 0x00, 0xa3, 0xaf, 0x00,
+    0x00, 0x00, 0x00, 0xd9, 0x41, 0x00, 0x8a, 0xba, 0x37, 0x18, 0x35, 0xbd, 0xae, 0x00, 0x00, 0x8a,
+    0x8f, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x8a, 0x8f, 0x00, 0x09, 0xbd, 0x7e, 0x00, 0x00,
+    0x00, 0x8f, 0x8a, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x30, 0xe9, 0x00, 0x00, 0x00, 0x00,
+    0xd4, 0x45, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xcf, 0x49, 0x00, 0x00, 0xba, 0xa5, 0x00,
+    0x00, 0x00, 0x00, 0xc8, 0x94, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x93, 0xa0, 0x00, 0x00,
+    0xc2, 0x74, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0xa5, 0x95, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0x46, 0xf1, 0x75, 0x1b,
+    0x00, 0x03, 0x18, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xe1, 0xb6,
+    0x62, 0x6e, 0xb6, 0xff, 0x41, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x1d, 0xfb, 0x18, 0x00, 0x8a,
+    0x8f, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x8a, 0x8f, 0x08, 0xb8, 0x85, 0x00, 0x00, 0x00,
+    0x00, 0x8f, 0x8a, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x30, 0xe9, 0x00, 0x00, 0x00, 0x00,
+    0xd4, 0x45, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xcf, 0x49, 0x00, 0x00, 0x36, 0xf2, 0x77,
+    0x0f, 0x18, 0x87, 0xf1, 0x1a, 0x00, 0x8a, 0xbf, 0x18, 0x00, 0x00, 0x4f, 0xf2, 0x28, 0x00, 0x00,
+    0x44, 0xeb, 0x39, 0x00, 0x00, 0x22, 0xd4, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0xe8, 0x3d, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0x00, 0x4d, 0xdd, 0xfc,
+    0xdb, 0xf2, 0x7a, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x6f,
+    0x9e, 0x9c, 0x55, 0xde, 0x3d, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xe4, 0x3a, 0x00, 0x8a,
+    0x8f, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x8a, 0x97, 0xb6, 0x88, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8f, 0x8a, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x30, 0xe9, 0x00, 0x00, 0x00, 0x00,
+    0xd4, 0x45, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xcf, 0x49, 0x00, 0x00, 0x00, 0x48, 0xe3,
+    0xf0, 0xf7, 0xd4, 0x32, 0x00, 0x00, 0x8a, 0xd3, 0xe7, 0xd5, 0xea, 0xdd, 0x40, 0x00, 0x00, 0x00,
+    0x00, 0x56, 0xe8, 0xe4, 0xd9, 0xdf, 0xdc, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x08, 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+    0x25, 0x09, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x27, 0xff, 0x1c, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xd2, 0x49, 0x00, 0x8a,
+    0x8f, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x8a, 0xdf, 0xf4, 0x38, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8f, 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x1e, 0x17, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x02, 0x20, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x1f, 0x1b, 0x01, 0xad, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0xed, 0x62, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x09, 0x00,
+    0x00, 0x1f, 0x9e, 0xcc, 0x01, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xcf, 0x49, 0x00, 0x8a,
+    0x8f, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x8a, 0x8f, 0x4f, 0xed, 0x48, 0x00, 0x00, 0x00,
+    0x00, 0x8f, 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0xb8, 0xb5, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0xf2, 0xd8,
+    0xdc, 0xd3, 0x8e, 0x22, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xcf, 0x49, 0x00, 0x8a,
+    0x8f, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x8a, 0x8f, 0x00, 0x3e, 0xed, 0x3b, 0x00, 0x00,
+    0x00, 0x8e, 0x8b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x31, 0xf6, 0x7f, 0x1c, 0x00, 0x00, 0xae, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x22,
+    0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xcf, 0x49, 0x00, 0x8a,
+    0x8f, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8e, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x52, 0xe7, 0x1d, 0x00,
+    0x00, 0x7b, 0xab, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x3f, 0xd8, 0xfc, 0xd2, 0xe0, 0xf1, 0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0xcf, 0x49, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0xa6, 0x69, 0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x92, 0xbc, 0x01,
+    0x00, 0x1f, 0xdf, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x2d, 0x1a, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x1c, 0xe0, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x03, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x03, 0xd1, 0xe1, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x1e, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x3d, 0xac, 0xd3, 0xe8, 0xd1, 0x0b, 0x00, 0x00, 0x2a, 0x91, 0xdc, 0xe5, 0xbd, 0x0a, 0x00,
+    0x3e, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7d, 0x59, 0x00, 0x00, 0x00, 0x00, 0xb2, 0x24, 0x00,
+    0x6b, 0x76, 0x00, 0x00, 0x00, 0x00, 0x4f, 0x86, 0x00, 0x62, 0x7e, 0x00, 0x00, 0x00, 0x01, 0xb6,
+    0x24, 0x00, 0x00, 0x00, 0x54, 0x7c, 0x00, 0x17, 0xbc, 0x26, 0x00, 0x00, 0x00, 0x92, 0x57, 0x00,
+    0x6b, 0x77, 0x00, 0x00, 0x00, 0x00, 0x6a, 0x6a, 0x00, 0x14, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xaf,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x07, 0x00, 0x15, 0x34, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x15, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x1c, 0xb6, 0xb6, 0xb6,
+    0xb6, 0xb6, 0xb6, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8a, 0xab, 0x1f, 0x13, 0x2a, 0x00, 0x00, 0x00, 0xc2, 0x66, 0x0a, 0x16, 0x44, 0x00, 0x00,
+    0x9c, 0x7d, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa4, 0x75, 0x00, 0x00, 0x00, 0x00, 0xe9, 0x30, 0x00,
+    0x4d, 0xd4, 0x00, 0x00, 0x00, 0x00, 0xa3, 0x6f, 0x00, 0x41, 0xda, 0x00, 0x00, 0x00, 0x27, 0xff,
+    0x5f, 0x00, 0x00, 0x00, 0xa5, 0x63, 0x00, 0x00, 0x7b, 0xc6, 0x03, 0x00, 0x5c, 0xd0, 0x06, 0x00,
+    0x4d, 0xd8, 0x00, 0x00, 0x00, 0x00, 0xc1, 0x51, 0x00, 0x04, 0x2b, 0x2b, 0x2b, 0x2d, 0xcd, 0x96,
+    0x00, 0x00, 0x00, 0x00, 0x11, 0xba, 0xd8, 0x35, 0x00, 0x4e, 0xbe, 0x00, 0xb0, 0xdc, 0x48, 0x00,
+    0x00, 0x00, 0x00, 0x71, 0xdc, 0xde, 0xb6, 0x21, 0x00, 0x60, 0x8a, 0x00, 0x27, 0xbc, 0x23, 0x23,
+    0x23, 0x23, 0x53, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0xfa, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x9c, 0x7d, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa4, 0x75, 0x00, 0x00, 0x00, 0x00, 0xe9, 0x30, 0x00,
+    0x0b, 0xf6, 0x1c, 0x00, 0x00, 0x02, 0xe8, 0x25, 0x00, 0x06, 0xf2, 0x1d, 0x00, 0x00, 0x6a, 0xe9,
+    0x9d, 0x00, 0x00, 0x01, 0xe5, 0x1b, 0x00, 0x00, 0x03, 0xc6, 0x76, 0x1c, 0xe7, 0x2d, 0x00, 0x00,
+    0x0b, 0xf5, 0x26, 0x00, 0x00, 0x07, 0xf4, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xc1, 0x05,
+    0x00, 0x00, 0x00, 0x00, 0x78, 0xaa, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x46, 0xdb, 0x00,
+    0x00, 0x00, 0x0a, 0xdb, 0x0c, 0x02, 0x69, 0xe5, 0xc0, 0xdb, 0x21, 0x00, 0x27, 0xb1, 0x00, 0x00,
+    0x00, 0x00, 0x38, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xd4, 0xa4, 0x1b, 0x00, 0x00, 0x00, 0x00,
+    0x9c, 0xe0, 0xc2, 0xc2, 0xbf, 0x00, 0x00, 0xa4, 0x75, 0x00, 0x00, 0x00, 0x00, 0xe9, 0x30, 0x00,
+    0x00, 0xad, 0x6b, 0x00, 0x00, 0x3c, 0xce, 0x00, 0x00, 0x00, 0xb1, 0x67, 0x00, 0x00, 0xb1, 0x67,
+    0xe4, 0x01, 0x00, 0x31, 0xd1, 0x00, 0x00, 0x00, 0x00, 0x25, 0xec, 0xc9, 0x75, 0x00, 0x00, 0x00,
+    0x00, 0xa8, 0x7b, 0x00, 0x00, 0x3e, 0xcc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0xe4, 0x19, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x9e, 0x70, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x08, 0xfe, 0x06,
+    0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x2e, 0x0a, 0x00, 0x00, 0x27, 0xb1, 0x00, 0x00,
+    0x00, 0x00, 0x38, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0x99, 0xf2, 0xa7, 0x26, 0x00, 0x00,
+    0x9c, 0x93, 0x2b, 0x2b, 0x2a, 0x00, 0x00, 0xa4, 0x75, 0x00, 0x00, 0x00, 0x00, 0xe9, 0x30, 0x00,
+    0x00, 0x56, 0xc1, 0x00, 0x00, 0x91, 0x75, 0x00, 0x00, 0x00, 0x6a, 0xb5, 0x00, 0x0a, 0xe7, 0x08,
+    0xd7, 0x32, 0x00, 0x7d, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0xfc, 0x24, 0x00, 0x00, 0x00,
+    0x00, 0x4d, 0xd9, 0x01, 0x00, 0x81, 0x8a, 0x00, 0x00, 0x00, 0x00, 0x13, 0xe4, 0x44, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xa4, 0x69, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x01, 0xff, 0x0d,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0xb1, 0x00, 0x00,
+    0x00, 0x00, 0x38, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x84, 0xf1, 0x2b, 0x00,
+    0x9c, 0x7d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9b, 0x84, 0x00, 0x00, 0x00, 0x00, 0xe9, 0x30, 0x00,
+    0x00, 0x08, 0xec, 0x22, 0x05, 0xe4, 0x19, 0x00, 0x00, 0x00, 0x17, 0xf4, 0x11, 0x51, 0xa7, 0x00,
+    0x84, 0x84, 0x00, 0xd1, 0x31, 0x00, 0x00, 0x00, 0x00, 0x50, 0xd4, 0x80, 0xc8, 0x04, 0x00, 0x00,
+    0x00, 0x04, 0xe1, 0x40, 0x00, 0xc9, 0x3c, 0x00, 0x00, 0x00, 0x00, 0xab, 0x8e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xa4, 0x68, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0xff, 0x0d,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0xb1, 0x00, 0x00,
+    0x00, 0x00, 0x38, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa6, 0x7f, 0x00,
+    0x9c, 0x7d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xab, 0x00, 0x00, 0x00, 0x00, 0xe9, 0x30, 0x00,
+    0x00, 0x00, 0x8f, 0x86, 0x52, 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x66, 0xa8, 0x50, 0x00,
+    0x2d, 0xdc, 0x2c, 0xcd, 0x00, 0x00, 0x00, 0x00, 0x16, 0xe4, 0x2f, 0x02, 0xc3, 0x7d, 0x00, 0x00,
+    0x00, 0x00, 0x77, 0xad, 0x1a, 0xdd, 0x01, 0x00, 0x00, 0x00, 0x57, 0xda, 0x08, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xa4, 0x64, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0xfc, 0x0d,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0xb1, 0x00, 0x00,
+    0x00, 0x00, 0x38, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x14, 0x00, 0x00, 0x10, 0xe4, 0x39, 0x00,
+    0x9c, 0x7d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0xf7, 0x3f, 0x00, 0x00, 0x02, 0xeb, 0x30, 0x00,
+    0x00, 0x00, 0x25, 0xea, 0xc8, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0xd3, 0xe2, 0x06, 0x00,
+    0x00, 0xce, 0xc6, 0x6a, 0x00, 0x00, 0x00, 0x00, 0x9f, 0x81, 0x00, 0x00, 0x24, 0xef, 0x27, 0x00,
+    0x00, 0x00, 0x10, 0xed, 0x92, 0x86, 0x00, 0x00, 0x00, 0x0c, 0xe3, 0x45, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xc0, 0x49, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0xe3, 0x26,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0xb1, 0x00, 0x00,
+    0x00, 0x00, 0x38, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8a, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2d, 0xf2, 0xd7, 0xd5, 0xdf, 0x8c, 0x00, 0x00,
+    0x9b, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0xeb, 0xe6, 0xdc, 0xf1, 0xda, 0x22, 0x00,
+    0x00, 0x00, 0x00, 0xae, 0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0xe1, 0x8d, 0x00, 0x00,
+    0x00, 0x68, 0xf6, 0x10, 0x00, 0x00, 0x00, 0x3b, 0xdc, 0x08, 0x00, 0x00, 0x00, 0x7e, 0xb3, 0x00,
+    0x00, 0x00, 0x00, 0x7f, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x58, 0xfe, 0xee, 0xee, 0xee, 0xee, 0xee,
+    0x0c, 0x00, 0x17, 0x8c, 0xd4, 0x09, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x76, 0xc9,
+    0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0xb1, 0x00, 0x00,
+    0x00, 0x00, 0x38, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x20, 0x21, 0x00, 0x00, 0x00, 0x00,
+    0x8a, 0x8e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x1d, 0x08, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x53, 0xc7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x26, 0xc3, 0xaf, 0x02, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0x4e, 0xe4,
+    0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0xb1, 0x00, 0x00,
+    0x00, 0x00, 0x38, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x5c, 0xd5, 0x0b, 0x00, 0x0f, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x17, 0xd9, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x02, 0xd3, 0x3d, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0xd7, 0x3b,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0xb1, 0x00, 0x00,
+    0x00, 0x00, 0x38, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x08, 0xb5, 0xef, 0xd9, 0xec, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xc2, 0xc7, 0xe5, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xa5, 0x60, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0xf9, 0x0e,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0xb1, 0x00, 0x00,
+    0x00, 0x00, 0x38, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x14, 0x24, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x14, 0x25, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xa4, 0x68, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0xff, 0x0d,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0xf3, 0xd8, 0xd8,
+    0xd8, 0xd8, 0xe1, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xa4, 0x68, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x00, 0xff, 0x0d,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xa1, 0x6d, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x05, 0xff, 0x09,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x87, 0x8f, 0x00, 0x00, 0x00, 0x4e, 0xbe, 0x00, 0x00, 0x2a, 0xeb, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x24, 0xe1, 0xa3, 0x26, 0x00, 0x4e, 0xbe, 0x00, 0x80, 0xd9, 0x71, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x3c, 0x16, 0x00, 0x0b, 0x1a, 0x00, 0x3e, 0x20, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+];
+
diff --git a/gfx/wr/webrender/src/debug_item.rs b/gfx/wr/webrender/src/debug_item.rs
new file mode 100644
index 0000000000..1efea167ff
--- /dev/null
+++ b/gfx/wr/webrender/src/debug_item.rs
@@ -0,0 +1,26 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{units::*, ColorF};
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum DebugItem {
+    Text {
+        msg: String,
+        color: ColorF,
+        position: DevicePoint,
+    },
+    Rect {
+        outer_color: ColorF,
+        inner_color: ColorF,
+        rect: DeviceRect,
+    },
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct DebugMessage {
+    pub msg: String,
+    pub timestamp: u64,
+}
diff --git a/gfx/wr/webrender/src/device/gl.rs b/gfx/wr/webrender/src/device/gl.rs
new file mode 100644
index 0000000000..0295cde496
--- /dev/null
+++ b/gfx/wr/webrender/src/device/gl.rs
@@ -0,0 +1,4796 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use super::super::shader_source::{OPTIMIZED_SHADERS, UNOPTIMIZED_SHADERS};
+use api::{ImageDescriptor, ImageFormat, Parameter, BoolParameter, IntParameter, ImageRendering};
+use api::{MixBlendMode, ImageBufferKind, VoidPtrToSizeFn};
+use api::{CrashAnnotator, CrashAnnotation, CrashAnnotatorGuard};
+use api::units::*;
+use euclid::default::Transform3D;
+use gleam::gl;
+use crate::render_api::MemoryReport;
+use crate::internal_types::{FastHashMap, RenderTargetInfo, Swizzle, SwizzleSettings};
+use crate::util::round_up_to_multiple;
+use crate::profiler;
+use log::Level;
+use smallvec::SmallVec;
+use std::{
+    borrow::Cow,
+    cell::{Cell, RefCell},
+    cmp,
+    collections::hash_map::Entry,
+    marker::PhantomData,
+    mem,
+    num::NonZeroUsize,
+    os::raw::c_void,
+    ops::Add,
+    path::PathBuf,
+    ptr,
+    rc::Rc,
+    slice,
+    sync::Arc,
+    thread,
+    time::Duration,
+};
+use webrender_build::shader::{
+    ProgramSourceDigest, ShaderKind, ShaderVersion, build_shader_main_string,
+    build_shader_prefix_string, do_build_shader_string, shader_source_from_file,
+};
+use malloc_size_of::MallocSizeOfOps;
+
+/// Sequence number for frames, as tracked by the device layer.
+#[derive(Debug, Copy, Clone, PartialEq, Ord, Eq, PartialOrd)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuFrameId(usize);
+
+impl GpuFrameId {
+    pub fn new(value: usize) -> Self {
+        GpuFrameId(value)
+    }
+}
+
+impl Add<usize> for GpuFrameId {
+    type Output = GpuFrameId;
+
+    fn add(self, other: usize) -> GpuFrameId {
+        GpuFrameId(self.0 + other)
+    }
+}
+
+pub struct TextureSlot(pub usize);
+
+// In some places we need to temporarily bind a texture to any slot.
+const DEFAULT_TEXTURE: TextureSlot = TextureSlot(0);
+
+#[repr(u32)]
+pub enum DepthFunction {
+    Always = gl::ALWAYS,
+    Less = gl::LESS,
+    LessEqual = gl::LEQUAL,
+}
+
+#[repr(u32)]
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum TextureFilter {
+    Nearest,
+    Linear,
+    Trilinear,
+}
+
+/// A structure defining a particular workflow of texture transfers.
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct TextureFormatPair<T> {
+    /// Format the GPU natively stores texels in.
+    pub internal: T,
+    /// Format we expect the users to provide the texels in.
+    pub external: T,
+}
+
+impl<T: Copy> From<T> for TextureFormatPair<T> {
+    fn from(value: T) -> Self {
+        TextureFormatPair {
+            internal: value,
+            external: value,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum VertexAttributeKind {
+    F32,
+    U8Norm,
+    U16Norm,
+    I32,
+    U16,
+}
+
+#[derive(Debug)]
+pub struct VertexAttribute {
+    pub name: &'static str,
+    pub count: u32,
+    pub kind: VertexAttributeKind,
+}
+
+#[derive(Debug)]
+pub struct VertexDescriptor {
+    pub vertex_attributes: &'static [VertexAttribute],
+    pub instance_attributes: &'static [VertexAttribute],
+}
+
+enum FBOTarget {
+    Read,
+    Draw,
+}
+
+/// Method of uploading texel data from CPU to GPU.
+#[derive(Debug, Clone)]
+pub enum UploadMethod {
+    /// Just call `glTexSubImage` directly with the CPU data pointer
+    Immediate,
+    /// Accumulate the changes in PBO first before transferring to a texture.
+    PixelBuffer(VertexUsageHint),
+}
+
+/// Plain old data that can be used to initialize a texture.
+pub unsafe trait Texel: Copy {}
+unsafe impl Texel for u8 {}
+unsafe impl Texel for f32 {}
+
+/// Returns the size in bytes of a depth target with the given dimensions.
+fn depth_target_size_in_bytes(dimensions: &DeviceIntSize) -> usize {
+    // DEPTH24 textures generally reserve 3 bytes for depth and 1 byte
+    // for stencil, so we measure them as 32 bits.
+    let pixels = dimensions.width * dimensions.height;
+    (pixels as usize) * 4
+}
+
+pub fn get_gl_target(target: ImageBufferKind) -> gl::GLuint {
+    match target {
+        ImageBufferKind::Texture2D => gl::TEXTURE_2D,
+        ImageBufferKind::TextureRect => gl::TEXTURE_RECTANGLE,
+        ImageBufferKind::TextureExternal => gl::TEXTURE_EXTERNAL_OES,
+    }
+}
+
+pub fn from_gl_target(target: gl::GLuint) -> ImageBufferKind {
+    match target {
+        gl::TEXTURE_2D => ImageBufferKind::Texture2D,
+        gl::TEXTURE_RECTANGLE => ImageBufferKind::TextureRect,
+        gl::TEXTURE_EXTERNAL_OES => ImageBufferKind::TextureExternal,
+        _ => panic!("Unexpected target {:?}", target),
+    }
+}
+
+fn supports_extension(extensions: &[String], extension: &str) -> bool {
+    extensions.iter().any(|s| s == extension)
+}
+
+fn get_shader_version(gl: &dyn gl::Gl) -> ShaderVersion {
+    match gl.get_type() {
+        gl::GlType::Gl => ShaderVersion::Gl,
+        gl::GlType::Gles => ShaderVersion::Gles,
+    }
+}
+
+// Get an unoptimized shader string by name, from the built in resources or
+// an override path, if supplied.
+pub fn get_unoptimized_shader_source(shader_name: &str, base_path: Option<&PathBuf>) -> Cow<'static, str> {
+    if let Some(ref base) = base_path {
+        let shader_path = base.join(&format!("{}.glsl", shader_name));
+        Cow::Owned(shader_source_from_file(&shader_path))
+    } else {
+        Cow::Borrowed(
+            UNOPTIMIZED_SHADERS
+            .get(shader_name)
+            .expect("Shader not found")
+            .source
+        )
+    }
+}
+
+pub trait FileWatcherHandler: Send {
+    fn file_changed(&self, path: PathBuf);
+}
+
+impl VertexAttributeKind {
+    fn size_in_bytes(&self) -> u32 {
+        match *self {
+            VertexAttributeKind::F32 => 4,
+            VertexAttributeKind::U8Norm => 1,
+            VertexAttributeKind::U16Norm => 2,
+            VertexAttributeKind::I32 => 4,
+            VertexAttributeKind::U16 => 2,
+        }
+    }
+}
+
+impl VertexAttribute {
+    fn size_in_bytes(&self) -> u32 {
+        self.count * self.kind.size_in_bytes()
+    }
+
+    fn bind_to_vao(
+        &self,
+        attr_index: gl::GLuint,
+        divisor: gl::GLuint,
+        stride: gl::GLint,
+        offset: gl::GLuint,
+        gl: &dyn gl::Gl,
+    ) {
+        gl.enable_vertex_attrib_array(attr_index);
+        gl.vertex_attrib_divisor(attr_index, divisor);
+
+        match self.kind {
+            VertexAttributeKind::F32 => {
+                gl.vertex_attrib_pointer(
+                    attr_index,
+                    self.count as gl::GLint,
+                    gl::FLOAT,
+                    false,
+                    stride,
+                    offset,
+                );
+            }
+            VertexAttributeKind::U8Norm => {
+                gl.vertex_attrib_pointer(
+                    attr_index,
+                    self.count as gl::GLint,
+                    gl::UNSIGNED_BYTE,
+                    true,
+                    stride,
+                    offset,
+                );
+            }
+            VertexAttributeKind::U16Norm => {
+                gl.vertex_attrib_pointer(
+                    attr_index,
+                    self.count as gl::GLint,
+                    gl::UNSIGNED_SHORT,
+                    true,
+                    stride,
+                    offset,
+                );
+            }
+            VertexAttributeKind::I32 => {
+                gl.vertex_attrib_i_pointer(
+                    attr_index,
+                    self.count as gl::GLint,
+                    gl::INT,
+                    stride,
+                    offset,
+                );
+            }
+            VertexAttributeKind::U16 => {
+                gl.vertex_attrib_i_pointer(
+                    attr_index,
+                    self.count as gl::GLint,
+                    gl::UNSIGNED_SHORT,
+                    stride,
+                    offset,
+                );
+            }
+        }
+    }
+}
+
+impl VertexDescriptor {
+    fn instance_stride(&self) -> u32 {
+        self.instance_attributes
+            .iter()
+            .map(|attr| attr.size_in_bytes())
+            .sum()
+    }
+
+    fn bind_attributes(
+        attributes: &[VertexAttribute],
+        start_index: usize,
+        divisor: u32,
+        gl: &dyn gl::Gl,
+        vbo: VBOId,
+    ) {
+        vbo.bind(gl);
+
+        let stride: u32 = attributes
+            .iter()
+            .map(|attr| attr.size_in_bytes())
+            .sum();
+
+        let mut offset = 0;
+        for (i, attr) in attributes.iter().enumerate() {
+            let attr_index = (start_index + i) as gl::GLuint;
+            attr.bind_to_vao(attr_index, divisor, stride as _, offset, gl);
+            offset += attr.size_in_bytes();
+        }
+    }
+
+    fn bind(&self, gl: &dyn gl::Gl, main: VBOId, instance: VBOId, instance_divisor: u32) {
+        Self::bind_attributes(self.vertex_attributes, 0, 0, gl, main);
+
+        if !self.instance_attributes.is_empty() {
+            Self::bind_attributes(
+                self.instance_attributes,
+                self.vertex_attributes.len(),
+                instance_divisor,
+                gl,
+                instance,
+            );
+        }
+    }
+}
+
+impl VBOId {
+    fn bind(&self, gl: &dyn gl::Gl) {
+        gl.bind_buffer(gl::ARRAY_BUFFER, self.0);
+    }
+}
+
+impl IBOId {
+    fn bind(&self, gl: &dyn gl::Gl) {
+        gl.bind_buffer(gl::ELEMENT_ARRAY_BUFFER, self.0);
+    }
+}
+
+impl FBOId {
+    fn bind(&self, gl: &dyn gl::Gl, target: FBOTarget) {
+        let target = match target {
+            FBOTarget::Read => gl::READ_FRAMEBUFFER,
+            FBOTarget::Draw => gl::DRAW_FRAMEBUFFER,
+        };
+        gl.bind_framebuffer(target, self.0);
+    }
+}
+
+pub struct Stream<'a> {
+    attributes: &'a [VertexAttribute],
+    vbo: VBOId,
+}
+
+pub struct VBO<V> {
+    id: gl::GLuint,
+    target: gl::GLenum,
+    allocated_count: usize,
+    marker: PhantomData<V>,
+}
+
+impl<V> VBO<V> {
+    pub fn allocated_count(&self) -> usize {
+        self.allocated_count
+    }
+
+    pub fn stream_with<'a>(&self, attributes: &'a [VertexAttribute]) -> Stream<'a> {
+        debug_assert_eq!(
+            mem::size_of::<V>(),
+            attributes.iter().map(|a| a.size_in_bytes() as usize).sum::<usize>()
+        );
+        Stream {
+            attributes,
+            vbo: VBOId(self.id),
+        }
+    }
+}
+
+impl<T> Drop for VBO<T> {
+    fn drop(&mut self) {
+        debug_assert!(thread::panicking() || self.id == 0);
+    }
+}
+
+#[cfg_attr(feature = "replay", derive(Clone))]
+#[derive(Debug)]
+pub struct ExternalTexture {
+    id: gl::GLuint,
+    target: gl::GLuint,
+    uv_rect: TexelRect,
+    image_rendering: ImageRendering,
+}
+
+impl ExternalTexture {
+    pub fn new(
+        id: u32,
+        target: ImageBufferKind,
+        uv_rect: TexelRect,
+        image_rendering: ImageRendering,
+    ) -> Self {
+        ExternalTexture {
+            id,
+            target: get_gl_target(target),
+            uv_rect,
+            image_rendering,
+        }
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn internal_id(&self) -> gl::GLuint {
+        self.id
+    }
+
+    pub fn get_uv_rect(&self) -> TexelRect {
+        self.uv_rect
+    }
+}
+
+bitflags! {
+    #[derive(Default)]
+    pub struct TextureFlags: u32 {
+        /// This texture corresponds to one of the shared texture caches.
+        const IS_SHARED_TEXTURE_CACHE = 1 << 0;
+    }
+}
+
+/// WebRender interface to an OpenGL texture.
+///
+/// Because freeing a texture requires various device handles that are not
+/// reachable from this struct, manual destruction via `Device` is required.
+/// Our `Drop` implementation asserts that this has happened.
+#[derive(Debug)]
+pub struct Texture {
+    id: gl::GLuint,
+    target: gl::GLuint,
+    format: ImageFormat,
+    size: DeviceIntSize,
+    filter: TextureFilter,
+    flags: TextureFlags,
+    /// An internally mutable swizzling state that may change between batches.
+    active_swizzle: Cell<Swizzle>,
+    /// Framebuffer Object allowing this texture to be rendered to.
+    ///
+    /// Empty if this texture is not used as a render target or if a depth buffer is needed.
+    fbo: Option<FBOId>,
+    /// Same as the above, but with a depth buffer attached.
+    ///
+    /// FBOs are cheap to create but expensive to reconfigure (since doing so
+    /// invalidates framebuffer completeness caching). Moreover, rendering with
+    /// a depth buffer attached but the depth write+test disabled relies on the
+    /// driver to optimize it out of the rendering pass, which most drivers
+    /// probably do but, according to jgilbert, is best not to rely on.
+    ///
+    /// So we lazily generate a second list of FBOs with depth. This list is
+    /// empty if this texture is not used as a render target _or_ if it is, but
+    /// the depth buffer has never been requested.
+    ///
+    /// Note that we always fill fbo, and then lazily create fbo_with_depth
+    /// when needed. We could make both lazy (i.e. render targets would have one
+    /// or the other, but not both, unless they were actually used in both
+    /// configurations). But that would complicate a lot of logic in this module,
+    /// and FBOs are cheap enough to create.
+    fbo_with_depth: Option<FBOId>,
+    last_frame_used: GpuFrameId,
+}
+
+impl Texture {
+    pub fn get_dimensions(&self) -> DeviceIntSize {
+        self.size
+    }
+
+    pub fn get_format(&self) -> ImageFormat {
+        self.format
+    }
+
+    pub fn get_filter(&self) -> TextureFilter {
+        self.filter
+    }
+
+    pub fn get_target(&self) -> ImageBufferKind {
+        from_gl_target(self.target)
+    }
+
+    pub fn supports_depth(&self) -> bool {
+        self.fbo_with_depth.is_some()
+    }
+
+    pub fn last_frame_used(&self) -> GpuFrameId {
+        self.last_frame_used
+    }
+
+    pub fn used_in_frame(&self, frame_id: GpuFrameId) -> bool {
+        self.last_frame_used == frame_id
+    }
+
+    pub fn is_render_target(&self) -> bool {
+        self.fbo.is_some()
+    }
+
+    /// Returns true if this texture was used within `threshold` frames of
+    /// the current frame.
+    pub fn used_recently(&self, current_frame_id: GpuFrameId, threshold: usize) -> bool {
+        self.last_frame_used + threshold >= current_frame_id
+    }
+
+    /// Returns the flags for this texture.
+    pub fn flags(&self) -> &TextureFlags {
+        &self.flags
+    }
+
+    /// Returns a mutable borrow of the flags for this texture.
+    pub fn flags_mut(&mut self) -> &mut TextureFlags {
+        &mut self.flags
+    }
+
+    /// Returns the number of bytes (generally in GPU memory) that this texture
+    /// consumes.
+    pub fn size_in_bytes(&self) -> usize {
+        let bpp = self.format.bytes_per_pixel() as usize;
+        let w = self.size.width as usize;
+        let h = self.size.height as usize;
+        bpp * w * h
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn into_external(mut self) -> ExternalTexture {
+        let ext = ExternalTexture {
+            id: self.id,
+            target: self.target,
+            // TODO(gw): Support custom UV rect for external textures during captures
+            uv_rect: TexelRect::new(
+                0.0,
+                0.0,
+                self.size.width as f32,
+                self.size.height as f32,
+            ),
+            image_rendering: ImageRendering::Auto,
+        };
+        self.id = 0; // don't complain, moved out
+        ext
+    }
+}
+
+impl Drop for Texture {
+    fn drop(&mut self) {
+        debug_assert!(thread::panicking() || self.id == 0);
+    }
+}
+
+pub struct Program {
+    id: gl::GLuint,
+    u_transform: gl::GLint,
+    u_mode: gl::GLint,
+    u_texture_size: gl::GLint,
+    source_info: ProgramSourceInfo,
+    is_initialized: bool,
+}
+
+impl Program {
+    pub fn is_initialized(&self) -> bool {
+        self.is_initialized
+    }
+}
+
+impl Drop for Program {
+    fn drop(&mut self) {
+        debug_assert!(
+            thread::panicking() || self.id == 0,
+            "renderer::deinit not called"
+        );
+    }
+}
+
+pub struct CustomVAO {
+    id: gl::GLuint,
+}
+
+impl Drop for CustomVAO {
+    fn drop(&mut self) {
+        debug_assert!(
+            thread::panicking() || self.id == 0,
+            "renderer::deinit not called"
+        );
+    }
+}
+
+pub struct VAO {
+    id: gl::GLuint,
+    ibo_id: IBOId,
+    main_vbo_id: VBOId,
+    instance_vbo_id: VBOId,
+    instance_stride: usize,
+    instance_divisor: u32,
+    owns_vertices_and_indices: bool,
+}
+
+impl Drop for VAO {
+    fn drop(&mut self) {
+        debug_assert!(
+            thread::panicking() || self.id == 0,
+            "renderer::deinit not called"
+        );
+    }
+}
+
+#[derive(Debug)]
+pub struct PBO {
+    id: gl::GLuint,
+    reserved_size: usize,
+}
+
+impl PBO {
+    pub fn get_reserved_size(&self) -> usize {
+        self.reserved_size
+    }
+}
+
+impl Drop for PBO {
+    fn drop(&mut self) {
+        debug_assert!(
+            thread::panicking() || self.id == 0,
+            "renderer::deinit not called or PBO not returned to pool"
+        );
+    }
+}
+
+pub struct BoundPBO<'a> {
+    device: &'a mut Device,
+    pub data: &'a [u8]
+}
+
+impl<'a> Drop for BoundPBO<'a> {
+    fn drop(&mut self) {
+        self.device.gl.unmap_buffer(gl::PIXEL_PACK_BUFFER);
+        self.device.gl.bind_buffer(gl::PIXEL_PACK_BUFFER, 0);
+    }
+}
+
+#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
+pub struct FBOId(gl::GLuint);
+
+#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
+pub struct RBOId(gl::GLuint);
+
+#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
+pub struct VBOId(gl::GLuint);
+
+#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
+struct IBOId(gl::GLuint);
+
+#[derive(Clone, Debug)]
+enum ProgramSourceType {
+    Unoptimized,
+    Optimized(ShaderVersion),
+}
+
+#[derive(Clone, Debug)]
+pub struct ProgramSourceInfo {
+    base_filename: &'static str,
+    features: Vec<&'static str>,
+    full_name_cstr: Rc<std::ffi::CString>,
+    source_type: ProgramSourceType,
+    digest: ProgramSourceDigest,
+}
+
+impl ProgramSourceInfo {
+    fn new(
+        device: &Device,
+        name: &'static str,
+        features: &[&'static str],
+    ) -> Self {
+
+        // Compute the digest. Assuming the device has a `ProgramCache`, this
+        // will always be needed, whereas the source is rarely needed.
+
+        use std::collections::hash_map::DefaultHasher;
+        use std::hash::Hasher;
+
+        // Setup.
+        let mut hasher = DefaultHasher::new();
+        let gl_version = get_shader_version(&*device.gl());
+
+        // Hash the renderer name.
+        hasher.write(device.capabilities.renderer_name.as_bytes());
+
+        let full_name = Self::make_full_name(name, features);
+
+        let optimized_source = if device.use_optimized_shaders {
+            OPTIMIZED_SHADERS.get(&(gl_version, &full_name)).or_else(|| {
+                warn!("Missing optimized shader source for {}", &full_name);
+                None
+            })
+        } else {
+            None
+        };
+
+        let source_type = match optimized_source {
+            Some(source_and_digest) => {
+                // Optimized shader sources are used as-is, without any run-time processing.
+                // The vertex and fragment shaders are different, so must both be hashed.
+                // We use the hashes that were computed at build time, and verify it in debug builds.
+                if cfg!(debug_assertions) {
+                    let mut h = DefaultHasher::new();
+                    h.write(source_and_digest.vert_source.as_bytes());
+                    h.write(source_and_digest.frag_source.as_bytes());
+                    let d: ProgramSourceDigest = h.into();
+                    let digest = d.to_string();
+                    debug_assert_eq!(digest, source_and_digest.digest);
+                    hasher.write(digest.as_bytes());
+                } else {
+                    hasher.write(source_and_digest.digest.as_bytes());
+                }
+
+                ProgramSourceType::Optimized(gl_version)
+            }
+            None => {
+                // For non-optimized sources we compute the hash by walking the static strings
+                // in the same order as we would when concatenating the source, to avoid
+                // heap-allocating in the common case.
+                //
+                // Note that we cheat a bit to make the hashing more efficient. First, the only
+                // difference between the vertex and fragment shader is a single deterministic
+                // define, so we don't need to hash both. Second, we precompute the digest of the
+                // expanded source file at build time, and then just hash that digest here.
+                let override_path = device.resource_override_path.as_ref();
+                let source_and_digest = UNOPTIMIZED_SHADERS.get(&name).expect("Shader not found");
+
+                // Hash the prefix string.
+                build_shader_prefix_string(
+                    gl_version,
+                    &features,
+                    ShaderKind::Vertex,
+                    &name,
+                    &mut |s| hasher.write(s.as_bytes()),
+                );
+
+                // Hash the shader file contents. We use a precomputed digest, and
+                // verify it in debug builds.
+                if override_path.is_some() || cfg!(debug_assertions) {
+                    let mut h = DefaultHasher::new();
+                    build_shader_main_string(
+                        &name,
+                        &|f| get_unoptimized_shader_source(f, override_path),
+                        &mut |s| h.write(s.as_bytes())
+                    );
+                    let d: ProgramSourceDigest = h.into();
+                    let digest = format!("{}", d);
+                    debug_assert!(override_path.is_some() || digest == source_and_digest.digest);
+                    hasher.write(digest.as_bytes());
+                } else {
+                    hasher.write(source_and_digest.digest.as_bytes());
+                }
+
+                ProgramSourceType::Unoptimized
+            }
+        };
+
+        // Finish.
+        ProgramSourceInfo {
+            base_filename: name,
+            features: features.to_vec(),
+            full_name_cstr: Rc::new(std::ffi::CString::new(full_name).unwrap()),
+            source_type,
+            digest: hasher.into(),
+        }
+    }
+
+    fn compute_source(&self, device: &Device, kind: ShaderKind) -> String {
+        let full_name = self.full_name();
+        match self.source_type {
+            ProgramSourceType::Optimized(gl_version) => {
+                let shader = OPTIMIZED_SHADERS
+                    .get(&(gl_version, &full_name))
+                    .unwrap_or_else(|| panic!("Missing optimized shader source for {}", full_name));
+
+                match kind {
+                    ShaderKind::Vertex => shader.vert_source.to_string(),
+                    ShaderKind::Fragment => shader.frag_source.to_string(),
+                }
+            },
+            ProgramSourceType::Unoptimized => {
+                let mut src = String::new();
+                device.build_shader_string(
+                    &self.features,
+                    kind,
+                    self.base_filename,
+                    |s| src.push_str(s),
+                );
+                src
+            }
+        }
+    }
+
+    fn make_full_name(base_filename: &'static str, features: &[&'static str]) -> String {
+        if features.is_empty() {
+            base_filename.to_string()
+        } else {
+            format!("{}_{}", base_filename, features.join("_"))
+        }
+    }
+
+    fn full_name(&self) -> String {
+        Self::make_full_name(self.base_filename, &self.features)
+    }
+}
+
+#[cfg_attr(feature = "serialize_program", derive(Deserialize, Serialize))]
+pub struct ProgramBinary {
+    bytes: Vec<u8>,
+    format: gl::GLenum,
+    source_digest: ProgramSourceDigest,
+}
+
+impl ProgramBinary {
+    fn new(bytes: Vec<u8>,
+           format: gl::GLenum,
+           source_digest: ProgramSourceDigest) -> Self {
+        ProgramBinary {
+            bytes,
+            format,
+            source_digest,
+        }
+    }
+
+    /// Returns a reference to the source digest hash.
+    pub fn source_digest(&self) -> &ProgramSourceDigest {
+        &self.source_digest
+    }
+}
+
+/// The interfaces that an application can implement to handle ProgramCache update
+pub trait ProgramCacheObserver {
+    fn save_shaders_to_disk(&self, entries: Vec<Arc<ProgramBinary>>);
+    fn set_startup_shaders(&self, entries: Vec<Arc<ProgramBinary>>);
+    fn try_load_shader_from_disk(&self, digest: &ProgramSourceDigest, program_cache: &Rc<ProgramCache>);
+    fn notify_program_binary_failed(&self, program_binary: &Arc<ProgramBinary>);
+}
+
+struct ProgramCacheEntry {
+    /// The binary.
+    binary: Arc<ProgramBinary>,
+    /// True if the binary has been linked, i.e. used for rendering.
+    linked: bool,
+}
+
+pub struct ProgramCache {
+    entries: RefCell<FastHashMap<ProgramSourceDigest, ProgramCacheEntry>>,
+
+    /// Optional trait object that allows the client
+    /// application to handle ProgramCache updating
+    program_cache_handler: Option<Box<dyn ProgramCacheObserver>>,
+
+    /// Programs that have not yet been cached to disk (by program_cache_handler)
+    pending_entries: RefCell<Vec<Arc<ProgramBinary>>>,
+}
+
+impl ProgramCache {
+    pub fn new(program_cache_observer: Option<Box<dyn ProgramCacheObserver>>) -> Rc<Self> {
+        Rc::new(
+            ProgramCache {
+                entries: RefCell::new(FastHashMap::default()),
+                program_cache_handler: program_cache_observer,
+                pending_entries: RefCell::new(Vec::default()),
+            }
+        )
+    }
+
+    /// Save any new program binaries to the disk cache, and if startup has
+    /// just completed then write the list of shaders to load on next startup.
+    fn update_disk_cache(&self, startup_complete: bool) {
+        if let Some(ref handler) = self.program_cache_handler {
+            if !self.pending_entries.borrow().is_empty() {
+                let pending_entries = self.pending_entries.replace(Vec::default());
+                handler.save_shaders_to_disk(pending_entries);
+            }
+
+            if startup_complete {
+                let startup_shaders = self.entries.borrow().values()
+                    .filter(|e| e.linked).map(|e| e.binary.clone())
+                    .collect::<Vec<_>>();
+                handler.set_startup_shaders(startup_shaders);
+            }
+        }
+    }
+
+    /// Add a new ProgramBinary to the cache.
+    /// This function is typically used after compiling and linking a new program.
+    /// The binary will be saved to disk the next time update_disk_cache() is called.
+    fn add_new_program_binary(&self, program_binary: Arc<ProgramBinary>) {
+        self.pending_entries.borrow_mut().push(program_binary.clone());
+
+        let digest = program_binary.source_digest.clone();
+        let entry = ProgramCacheEntry {
+            binary: program_binary,
+            linked: true,
+        };
+        self.entries.borrow_mut().insert(digest, entry);
+    }
+
+    /// Load ProgramBinary to ProgramCache.
+    /// The function is typically used to load ProgramBinary from disk.
+    #[cfg(feature = "serialize_program")]
+    pub fn load_program_binary(&self, program_binary: Arc<ProgramBinary>) {
+        let digest = program_binary.source_digest.clone();
+        let entry = ProgramCacheEntry {
+            binary: program_binary,
+            linked: false,
+        };
+        self.entries.borrow_mut().insert(digest, entry);
+    }
+
+    /// Returns the number of bytes allocated for shaders in the cache.
+    pub fn report_memory(&self, op: VoidPtrToSizeFn) -> usize {
+        self.entries.borrow().values()
+            .map(|e| unsafe { op(e.binary.bytes.as_ptr() as *const c_void ) })
+            .sum()
+    }
+}
+
+#[derive(Debug, Copy, Clone)]
+pub enum VertexUsageHint {
+    Static,
+    Dynamic,
+    Stream,
+}
+
+impl VertexUsageHint {
+    fn to_gl(&self) -> gl::GLuint {
+        match *self {
+            VertexUsageHint::Static => gl::STATIC_DRAW,
+            VertexUsageHint::Dynamic => gl::DYNAMIC_DRAW,
+            VertexUsageHint::Stream => gl::STREAM_DRAW,
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+pub struct UniformLocation(gl::GLint);
+
+impl UniformLocation {
+    pub const INVALID: Self = UniformLocation(-1);
+}
+
+#[derive(Debug)]
+pub struct Capabilities {
+    /// Whether multisampled render targets are supported.
+    pub supports_multisampling: bool,
+    /// Whether the function `glCopyImageSubData` is available.
+    pub supports_copy_image_sub_data: bool,
+    /// Whether the RGBAF32 textures can be bound to framebuffers.
+    pub supports_color_buffer_float: bool,
+    /// Whether the device supports persistently mapped buffers, via glBufferStorage.
+    pub supports_buffer_storage: bool,
+    /// Whether advanced blend equations are supported.
+    pub supports_advanced_blend_equation: bool,
+    /// Whether dual-source blending is supported.
+    pub supports_dual_source_blending: bool,
+    /// Whether KHR_debug is supported for getting debug messages from
+    /// the driver.
+    pub supports_khr_debug: bool,
+    /// Whether we can configure texture units to do swizzling on sampling.
+    pub supports_texture_swizzle: bool,
+    /// Whether the driver supports uploading to textures from a non-zero
+    /// offset within a PBO.
+    pub supports_nonzero_pbo_offsets: bool,
+    /// Whether the driver supports specifying the texture usage up front.
+    pub supports_texture_usage: bool,
+    /// Whether offscreen render targets can be partially updated.
+    pub supports_render_target_partial_update: bool,
+    /// Whether we can use SSBOs.
+    pub supports_shader_storage_object: bool,
+    /// Whether to enforce that texture uploads be batched regardless of what
+    /// the pref says.
+    pub requires_batched_texture_uploads: Option<bool>,
+    /// Whether we are able to ue glClear to clear regions of an alpha render target.
+    /// If false, we must use a shader to clear instead.
+    pub supports_alpha_target_clears: bool,
+    /// Whether we must perform a full unscissored glClear on alpha targets
+    /// prior to rendering.
+    pub requires_alpha_target_full_clear: bool,
+    /// Whether clearing a render target (immediately after binding it) is faster using a scissor
+    /// rect to clear just the required area, or clearing the entire target without a scissor rect.
+    pub prefers_clear_scissor: bool,
+    /// Whether the driver can correctly invalidate render targets. This can be
+    /// a worthwhile optimization, but is buggy on some devices.
+    pub supports_render_target_invalidate: bool,
+    /// Whether the driver can reliably upload data to R8 format textures.
+    pub supports_r8_texture_upload: bool,
+    /// Whether clip-masking is supported natively by the GL implementation
+    /// rather than emulated in shaders.
+    pub uses_native_clip_mask: bool,
+    /// Whether anti-aliasing is supported natively by the GL implementation
+    /// rather than emulated in shaders.
+    pub uses_native_antialiasing: bool,
+    /// Whether the extension GL_OES_EGL_image_external_essl3 is supported. If true, external
+    /// textures can be used as normal. If false, external textures can only be rendered with
+    /// certain shaders, and must first be copied in to regular textures for others.
+    pub supports_image_external_essl3: bool,
+    /// Whether the VAO must be rebound after an attached VBO has been orphaned.
+    pub requires_vao_rebind_after_orphaning: bool,
+    /// The name of the renderer, as reported by GL
+    pub renderer_name: String,
+}
+
+#[derive(Clone, Debug)]
+pub enum ShaderError {
+    Compilation(String, String), // name, error message
+    Link(String, String),        // name, error message
+}
+
+/// A refcounted depth target, which may be shared by multiple textures across
+/// the device.
+struct SharedDepthTarget {
+    /// The Render Buffer Object representing the depth target.
+    rbo_id: RBOId,
+    /// Reference count. When this drops to zero, the RBO is deleted.
+    refcount: usize,
+}
+
+#[cfg(debug_assertions)]
+impl Drop for SharedDepthTarget {
+    fn drop(&mut self) {
+        debug_assert!(thread::panicking() || self.refcount == 0);
+    }
+}
+
+/// Describes for which texture formats to use the glTexStorage*
+/// family of functions.
+#[derive(PartialEq, Debug)]
+enum TexStorageUsage {
+    Never,
+    NonBGRA8,
+    Always,
+}
+
+/// Describes a required alignment for a stride,
+/// which can either be represented in bytes or pixels.
+#[derive(Copy, Clone, Debug)]
+pub enum StrideAlignment {
+    Bytes(NonZeroUsize),
+    Pixels(NonZeroUsize),
+}
+
+impl StrideAlignment {
+    pub fn num_bytes(&self, format: ImageFormat) -> NonZeroUsize {
+        match *self {
+            Self::Bytes(bytes) => bytes,
+            Self::Pixels(pixels) => {
+                assert!(format.bytes_per_pixel() > 0);
+                NonZeroUsize::new(pixels.get() * format.bytes_per_pixel() as usize).unwrap()
+            }
+        }
+    }
+}
+
+// We get 24 bits of Z value - use up 22 bits of it to give us
+// 4 bits to account for GPU issues. This seems to manifest on
+// some GPUs under certain perspectives due to z interpolation
+// precision problems.
+const RESERVE_DEPTH_BITS: i32 = 2;
+
+pub struct Device {
+    gl: Rc<dyn gl::Gl>,
+
+    /// If non-None, |gl| points to a profiling wrapper, and this points to the
+    /// underling Gl instance.
+    base_gl: Option<Rc<dyn gl::Gl>>,
+
+    // device state
+    bound_textures: [gl::GLuint; 16],
+    bound_program: gl::GLuint,
+    bound_program_name: Rc<std::ffi::CString>,
+    bound_vao: gl::GLuint,
+    bound_read_fbo: (FBOId, DeviceIntPoint),
+    bound_draw_fbo: FBOId,
+    program_mode_id: UniformLocation,
+    default_read_fbo: FBOId,
+    default_draw_fbo: FBOId,
+
+    /// Track depth state for assertions. Note that the default FBO has depth,
+    /// so this defaults to true.
+    depth_available: bool,
+
+    upload_method: UploadMethod,
+    use_batched_texture_uploads: bool,
+    /// Whether to use draw calls instead of regular blitting commands.
+    ///
+    /// Note: this currently only applies to the batched texture uploads
+    /// path.
+    use_draw_calls_for_texture_copy: bool,
+    /// Number of pixels below which we prefer batched uploads.
+    batched_upload_threshold: i32,
+
+    // HW or API capabilities
+    capabilities: Capabilities,
+
+    color_formats: TextureFormatPair<ImageFormat>,
+    bgra_formats: TextureFormatPair<gl::GLuint>,
+    bgra_pixel_type: gl::GLuint,
+    swizzle_settings: SwizzleSettings,
+    depth_format: gl::GLuint,
+
+    /// Map from texture dimensions to shared depth buffers for render targets.
+    ///
+    /// Render targets often have the same width/height, so we can save memory
+    /// by sharing these across targets.
+    depth_targets: FastHashMap<DeviceIntSize, SharedDepthTarget>,
+
+    // debug
+    inside_frame: bool,
+    crash_annotator: Option<Box<dyn CrashAnnotator>>,
+    annotate_draw_call_crashes: bool,
+
+    // resources
+    resource_override_path: Option<PathBuf>,
+
+    /// Whether to use shaders that have been optimized at build time.
+    use_optimized_shaders: bool,
+
+    max_texture_size: i32,
+    cached_programs: Option<Rc<ProgramCache>>,
+
+    // Frame counter. This is used to map between CPU
+    // frames and GPU frames.
+    frame_id: GpuFrameId,
+
+    /// When to use glTexStorage*. We prefer this over glTexImage* because it
+    /// guarantees that mipmaps won't be generated (which they otherwise are on
+    /// some drivers, particularly ANGLE). However, it is not always supported
+    /// at all, or for BGRA8 format. If it's not supported for the required
+    /// format, we fall back to glTexImage*.
+    texture_storage_usage: TexStorageUsage,
+
+    /// Required stride alignment for pixel transfers. This may be required for
+    /// correctness reasons due to driver bugs, or for performance reasons to
+    /// ensure we remain on the fast-path for transfers.
+    required_pbo_stride: StrideAlignment,
+
+    /// Whether we must ensure the source strings passed to glShaderSource()
+    /// are null-terminated, to work around driver bugs.
+    requires_null_terminated_shader_source: bool,
+
+    /// Whether we must unbind any texture from GL_TEXTURE_EXTERNAL_OES before
+    /// binding to GL_TEXTURE_2D, to work around an android emulator bug.
+    requires_texture_external_unbind: bool,
+
+    ///
+    is_software_webrender: bool,
+
+    // GL extensions
+    extensions: Vec<String>,
+
+    /// Dumps the source of the shader with the given name
+    dump_shader_source: Option<String>,
+
+    surface_origin_is_top_left: bool,
+
+    /// A debug boolean for tracking if the shader program has been set after
+    /// a blend mode change.
+    ///
+    /// This is needed for compatibility with next-gen
+    /// GPU APIs that switch states using "pipeline object" that bundles
+    /// together the blending state with the shader.
+    ///
+    /// Having the constraint of always binding the shader last would allow
+    /// us to have the "pipeline object" bound at that time. Without this
+    /// constraint, we'd either have to eagerly bind the "pipeline object"
+    /// on changing either the shader or the blend more, or lazily bind it
+    /// at draw call time, neither of which is desirable.
+    #[cfg(debug_assertions)]
+    shader_is_ready: bool,
+
+    // count created/deleted textures to report in the profiler.
+    pub textures_created: u32,
+    pub textures_deleted: u32,
+}
+
+/// Contains the parameters necessary to bind a draw target.
+#[derive(Clone, Copy, Debug)]
+pub enum DrawTarget {
+    /// Use the device's default draw target, with the provided dimensions,
+    /// which are used to set the viewport.
+    Default {
+        /// Target rectangle to draw.
+        rect: FramebufferIntRect,
+        /// Total size of the target.
+        total_size: FramebufferIntSize,
+        surface_origin_is_top_left: bool,
+    },
+    /// Use the provided texture.
+    Texture {
+        /// Size of the texture in pixels
+        dimensions: DeviceIntSize,
+        /// Whether to draw with the texture's associated depth target
+        with_depth: bool,
+        /// FBO that corresponds to the selected layer / depth mode
+        fbo_id: FBOId,
+        /// Native GL texture ID
+        id: gl::GLuint,
+        /// Native GL texture target
+        target: gl::GLuint,
+    },
+    /// Use an FBO attached to an external texture.
+    External {
+        fbo: FBOId,
+        size: FramebufferIntSize,
+    },
+    /// An OS compositor surface
+    NativeSurface {
+        offset: DeviceIntPoint,
+        external_fbo_id: u32,
+        dimensions: DeviceIntSize,
+    },
+}
+
+impl DrawTarget {
+    pub fn new_default(size: DeviceIntSize, surface_origin_is_top_left: bool) -> Self {
+        let total_size = device_size_as_framebuffer_size(size);
+        DrawTarget::Default {
+            rect: total_size.into(),
+            total_size,
+            surface_origin_is_top_left,
+        }
+    }
+
+    /// Returns true if this draw target corresponds to the default framebuffer.
+    pub fn is_default(&self) -> bool {
+        match *self {
+            DrawTarget::Default {..} => true,
+            _ => false,
+        }
+    }
+
+    pub fn from_texture(
+        texture: &Texture,
+        with_depth: bool,
+    ) -> Self {
+        let fbo_id = if with_depth {
+            texture.fbo_with_depth.unwrap()
+        } else {
+            texture.fbo.unwrap()
+        };
+
+        DrawTarget::Texture {
+            dimensions: texture.get_dimensions(),
+            fbo_id,
+            with_depth,
+            id: texture.id,
+            target: texture.target,
+        }
+    }
+
+    /// Returns the dimensions of this draw-target.
+    pub fn dimensions(&self) -> DeviceIntSize {
+        match *self {
+            DrawTarget::Default { total_size, .. } => total_size.cast_unit(),
+            DrawTarget::Texture { dimensions, .. } => dimensions,
+            DrawTarget::External { size, .. } => size.cast_unit(),
+            DrawTarget::NativeSurface { dimensions, .. } => dimensions,
+        }
+    }
+
+    pub fn offset(&self) -> DeviceIntPoint {
+        match *self {
+            DrawTarget::Default { .. } |
+            DrawTarget::Texture { .. } |
+            DrawTarget::External { .. } => {
+                DeviceIntPoint::zero()
+            }
+            DrawTarget::NativeSurface { offset, .. } => offset,
+        }
+    }
+
+    pub fn to_framebuffer_rect(&self, device_rect: DeviceIntRect) -> FramebufferIntRect {
+        let mut fb_rect = device_rect_as_framebuffer_rect(&device_rect);
+        match *self {
+            DrawTarget::Default { ref rect, surface_origin_is_top_left, .. } => {
+                // perform a Y-flip here
+                if !surface_origin_is_top_left {
+                    let w = fb_rect.width();
+                    let h = fb_rect.height();
+                    fb_rect.min.x = fb_rect.min.x + rect.min.x;
+                    fb_rect.min.y = rect.max.y - fb_rect.max.y;
+                    fb_rect.max.x = fb_rect.min.x + w;
+                    fb_rect.max.y = fb_rect.min.y + h;
+                }
+            }
+            DrawTarget::Texture { .. } | DrawTarget::External { .. } | DrawTarget::NativeSurface { .. } => (),
+        }
+        fb_rect
+    }
+
+    pub fn surface_origin_is_top_left(&self) -> bool {
+        match *self {
+            DrawTarget::Default { surface_origin_is_top_left, .. } => surface_origin_is_top_left,
+            DrawTarget::Texture { .. } | DrawTarget::External { .. } | DrawTarget::NativeSurface { .. } => true,
+        }
+    }
+
+    /// Given a scissor rect, convert it to the right coordinate space
+    /// depending on the draw target kind. If no scissor rect was supplied,
+    /// returns a scissor rect that encloses the entire render target.
+    pub fn build_scissor_rect(
+        &self,
+        scissor_rect: Option<DeviceIntRect>,
+    ) -> FramebufferIntRect {
+        let dimensions = self.dimensions();
+
+        match scissor_rect {
+            Some(scissor_rect) => match *self {
+                DrawTarget::Default { ref rect, .. } => {
+                    self.to_framebuffer_rect(scissor_rect)
+                        .intersection(rect)
+                        .unwrap_or_else(FramebufferIntRect::zero)
+                }
+                DrawTarget::NativeSurface { offset, .. } => {
+                    device_rect_as_framebuffer_rect(&scissor_rect.translate(offset.to_vector()))
+                }
+                DrawTarget::Texture { .. } | DrawTarget::External { .. } => {
+                    device_rect_as_framebuffer_rect(&scissor_rect)
+                }
+            }
+            None => {
+                FramebufferIntRect::from_size(
+                    device_size_as_framebuffer_size(dimensions),
+                )
+            }
+        }
+    }
+}
+
+/// Contains the parameters necessary to bind a texture-backed read target.
+#[derive(Clone, Copy, Debug)]
+pub enum ReadTarget {
+    /// Use the device's default draw target.
+    Default,
+    /// Use the provided texture,
+    Texture {
+        /// ID of the FBO to read from.
+        fbo_id: FBOId,
+    },
+    /// Use an FBO attached to an external texture.
+    External {
+        fbo: FBOId,
+    },
+    /// An FBO bound to a native (OS compositor) surface
+    NativeSurface {
+        fbo_id: FBOId,
+        offset: DeviceIntPoint,
+    },
+}
+
+impl ReadTarget {
+    pub fn from_texture(
+        texture: &Texture,
+    ) -> Self {
+        ReadTarget::Texture {
+            fbo_id: texture.fbo.unwrap(),
+        }
+    }
+
+    fn offset(&self) -> DeviceIntPoint {
+        match *self {
+            ReadTarget::Default |
+            ReadTarget::Texture { .. } |
+            ReadTarget::External { .. } => {
+                DeviceIntPoint::zero()
+            }
+
+            ReadTarget::NativeSurface { offset, .. } => {
+                offset
+            }
+        }
+    }
+}
+
+impl From<DrawTarget> for ReadTarget {
+    fn from(t: DrawTarget) -> Self {
+        match t {
+            DrawTarget::Default { .. } => {
+                ReadTarget::Default
+            }
+            DrawTarget::NativeSurface { external_fbo_id, offset, .. } => {
+                ReadTarget::NativeSurface {
+                    fbo_id: FBOId(external_fbo_id),
+                    offset,
+                }
+            }
+            DrawTarget::Texture { fbo_id, .. } => {
+                ReadTarget::Texture { fbo_id }
+            }
+            DrawTarget::External { fbo, .. } => {
+                ReadTarget::External { fbo }
+            }
+        }
+    }
+}
+
+/// Parses the major, release, and patch versions from a GL_VERSION string on
+/// Mali devices. For example, for the version string
+/// "OpenGL ES 3.2 v1.r36p0-01eac0.28ab3a577f105e026887e2b4c93552fb" this
+/// returns Some((1, 36, 0)). Returns None if the version cannot be parsed.
+fn parse_mali_version(version_string: &str) -> Option<(u32, u32, u32)> {
+    let (_prefix, version_string) = version_string.split_once("v")?;
+    let (v_str, version_string) = version_string.split_once(".r")?;
+    let v = v_str.parse().ok()?;
+
+    let (r_str, version_string) = version_string.split_once("p")?;
+    let r = r_str.parse().ok()?;
+
+    let (p_str, _) = version_string.split_once("-")?;
+    let p = p_str.parse().ok()?;
+
+    Some((v, r, p))
+}
+
+impl Device {
+    pub fn new(
+        mut gl: Rc<dyn gl::Gl>,
+        crash_annotator: Option<Box<dyn CrashAnnotator>>,
+        resource_override_path: Option<PathBuf>,
+        use_optimized_shaders: bool,
+        upload_method: UploadMethod,
+        batched_upload_threshold: i32,
+        cached_programs: Option<Rc<ProgramCache>>,
+        allow_texture_storage_support: bool,
+        allow_texture_swizzling: bool,
+        dump_shader_source: Option<String>,
+        surface_origin_is_top_left: bool,
+        panic_on_gl_error: bool,
+    ) -> Device {
+        let mut max_texture_size = [0];
+        unsafe {
+            gl.get_integer_v(gl::MAX_TEXTURE_SIZE, &mut max_texture_size);
+        }
+
+        // We cap the max texture size at 16384. Some hardware report higher
+        // capabilities but get very unstable with very large textures.
+        // Bug 1702494 tracks re-evaluating this cap.
+        let max_texture_size = max_texture_size[0].min(16384);
+
+        let renderer_name = gl.get_string(gl::RENDERER);
+        info!("Renderer: {}", renderer_name);
+        let version_string = gl.get_string(gl::VERSION);
+        info!("Version: {}", version_string);
+        info!("Max texture size: {}", max_texture_size);
+
+        let mut extension_count = [0];
+        unsafe {
+            gl.get_integer_v(gl::NUM_EXTENSIONS, &mut extension_count);
+        }
+        let extension_count = extension_count[0] as gl::GLuint;
+        let mut extensions = Vec::new();
+        for i in 0 .. extension_count {
+            extensions.push(gl.get_string_i(gl::EXTENSIONS, i));
+        }
+
+        // On debug builds, assert that each GL call is error-free. We don't do
+        // this on release builds because the synchronous call can stall the
+        // pipeline.
+        let supports_khr_debug = supports_extension(&extensions, "GL_KHR_debug");
+        if panic_on_gl_error || cfg!(debug_assertions) {
+            gl = gl::ErrorReactingGl::wrap(gl, move |gl, name, code| {
+                if supports_khr_debug {
+                    Self::log_driver_messages(gl);
+                }
+                error!("Caught GL error {:x} at {}", code, name);
+                panic!("Caught GL error {:x} at {}", code, name);
+            });
+        }
+
+        if supports_extension(&extensions, "GL_ANGLE_provoking_vertex") {
+            gl.provoking_vertex_angle(gl::FIRST_VERTEX_CONVENTION);
+        }
+
+        let supports_texture_usage = supports_extension(&extensions, "GL_ANGLE_texture_usage");
+
+        // Our common-case image data in Firefox is BGRA, so we make an effort
+        // to use BGRA as the internal texture storage format to avoid the need
+        // to swizzle during upload. Currently we only do this on GLES (and thus
+        // for Windows, via ANGLE).
+        //
+        // On Mac, Apple docs [1] claim that BGRA is a more efficient internal
+        // format, but they don't support it with glTextureStorage. As a workaround,
+        // we pretend that it's RGBA8 for the purposes of texture transfers,
+        // but swizzle R with B for the texture sampling.
+        //
+        // We also need our internal format types to be sized, since glTexStorage*
+        // will reject non-sized internal format types.
+        //
+        // Unfortunately, with GL_EXT_texture_format_BGRA8888, BGRA8 is not a
+        // valid internal format (for glTexImage* or glTexStorage*) unless
+        // GL_EXT_texture_storage is also available [2][3], which is usually
+        // not the case on GLES 3 as the latter's functionality has been
+        // included by default but the former has not been updated.
+        // The extension is available on ANGLE, but on Android this usually
+        // means we must fall back to using unsized BGRA and glTexImage*.
+        //
+        // Overall, we have the following factors in play when choosing the formats:
+        //   - with glTexStorage, the internal format needs to match the external format,
+        //     or the driver would have to do the conversion, which is slow
+        //   - on desktop GL, there is no BGRA internal format. However, initializing
+        //     the textures with glTexImage as RGBA appears to use BGRA internally,
+        //     preferring BGRA external data [4].
+        //   - when glTexStorage + BGRA internal format is not supported,
+        //     and the external data is BGRA, we have the following options:
+        //       1. use glTexImage with RGBA internal format, this costs us VRAM for mipmaps
+        //       2. use glTexStorage with RGBA internal format, this costs us the conversion by the driver
+        //       3. pretend we are uploading RGBA and set up the swizzling of the texture unit - this costs us batch breaks
+        //
+        // [1] https://developer.apple.com/library/archive/documentation/
+        //     GraphicsImaging/Conceptual/OpenGL-MacProgGuide/opengl_texturedata/
+        //     opengl_texturedata.html#//apple_ref/doc/uid/TP40001987-CH407-SW22
+        // [2] https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_format_BGRA8888.txt
+        // [3] https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_storage.txt
+        // [4] http://http.download.nvidia.com/developer/Papers/2005/Fast_Texture_Transfers/Fast_Texture_Transfers.pdf
+
+        // On the android emulator glTexImage fails to create textures larger than 3379.
+        // So we must use glTexStorage instead. See bug 1591436.
+        let is_emulator = renderer_name.starts_with("Android Emulator");
+        let avoid_tex_image = is_emulator;
+        let mut gl_version = [0; 2];
+        unsafe {
+            gl.get_integer_v(gl::MAJOR_VERSION, &mut gl_version[0..1]);
+            gl.get_integer_v(gl::MINOR_VERSION, &mut gl_version[1..2]);
+        }
+        info!("GL context {:?} {}.{}", gl.get_type(), gl_version[0], gl_version[1]);
+
+        // We block texture storage on mac because it doesn't support BGRA
+        let supports_texture_storage = allow_texture_storage_support && !cfg!(target_os = "macos") &&
+            match gl.get_type() {
+                gl::GlType::Gl => supports_extension(&extensions, "GL_ARB_texture_storage"),
+                gl::GlType::Gles => true,
+            };
+
+        // The GL_EXT_texture_format_BGRA8888 extension allows us to use BGRA as an internal format
+        // with glTexImage on GLES. However, we can only use BGRA8 as an internal format for
+        // glTexStorage when GL_EXT_texture_storage is also explicitly supported. This is because
+        // glTexStorage was added in GLES 3, but GL_EXT_texture_format_BGRA8888 was written against
+        // GLES 2 and GL_EXT_texture_storage.
+        // To complicate things even further, some Intel devices claim to support both extensions
+        // but in practice do not allow BGRA to be used with glTexStorage.
+        let supports_gles_bgra = supports_extension(&extensions, "GL_EXT_texture_format_BGRA8888");
+        let supports_texture_storage_with_gles_bgra = supports_gles_bgra
+            && supports_extension(&extensions, "GL_EXT_texture_storage")
+            && !renderer_name.starts_with("Intel(R) HD Graphics for BayTrail")
+            && !renderer_name.starts_with("Intel(R) HD Graphics for Atom(TM) x5/x7");
+
+        let supports_texture_swizzle = allow_texture_swizzling &&
+            match gl.get_type() {
+                // see https://www.g-truc.net/post-0734.html
+                gl::GlType::Gl => gl_version >= [3, 3] ||
+                    supports_extension(&extensions, "GL_ARB_texture_swizzle"),
+                gl::GlType::Gles => true,
+            };
+
+        let (color_formats, bgra_formats, bgra_pixel_type, bgra8_sampling_swizzle, texture_storage_usage) = match gl.get_type() {
+            // There is `glTexStorage`, use it and expect RGBA on the input.
+            gl::GlType::Gl if supports_texture_storage && supports_texture_swizzle => (
+                TextureFormatPair::from(ImageFormat::RGBA8),
+                TextureFormatPair { internal: gl::RGBA8, external: gl::RGBA },
+                gl::UNSIGNED_BYTE,
+                Swizzle::Bgra, // pretend it's RGBA, rely on swizzling
+                TexStorageUsage::Always
+            ),
+            // There is no `glTexStorage`, upload as `glTexImage` with BGRA input.
+            gl::GlType::Gl => (
+                TextureFormatPair { internal: ImageFormat::BGRA8, external: ImageFormat::BGRA8 },
+                TextureFormatPair { internal: gl::RGBA, external: gl::BGRA },
+                gl::UNSIGNED_INT_8_8_8_8_REV,
+                Swizzle::Rgba, // converted on uploads by the driver, no swizzling needed
+                TexStorageUsage::Never
+            ),
+            // glTexStorage is always supported in GLES 3, but because the GL_EXT_texture_storage
+            // extension is supported we can use glTexStorage with BGRA8 as the internal format.
+            // Prefer BGRA textures over RGBA.
+            gl::GlType::Gles if supports_texture_storage_with_gles_bgra => (
+                TextureFormatPair::from(ImageFormat::BGRA8),
+                TextureFormatPair { internal: gl::BGRA8_EXT, external: gl::BGRA_EXT },
+                gl::UNSIGNED_BYTE,
+                Swizzle::Rgba, // no conversion needed
+                TexStorageUsage::Always,
+            ),
+            // BGRA is not supported as an internal format with glTexStorage, therefore we will
+            // use RGBA textures instead and pretend BGRA data is RGBA when uploading.
+            // The swizzling will happen at the texture unit.
+            gl::GlType::Gles if supports_texture_swizzle => (
+                TextureFormatPair::from(ImageFormat::RGBA8),
+                TextureFormatPair { internal: gl::RGBA8, external: gl::RGBA },
+                gl::UNSIGNED_BYTE,
+                Swizzle::Bgra, // pretend it's RGBA, rely on swizzling
+                TexStorageUsage::Always,
+            ),
+            // BGRA is not supported as an internal format with glTexStorage, and we cannot use
+            // swizzling either. Therefore prefer BGRA textures over RGBA, but use glTexImage
+            // to initialize BGRA textures. glTexStorage can still be used for other formats.
+            gl::GlType::Gles if supports_gles_bgra && !avoid_tex_image => (
+                TextureFormatPair::from(ImageFormat::BGRA8),
+                TextureFormatPair::from(gl::BGRA_EXT),
+                gl::UNSIGNED_BYTE,
+                Swizzle::Rgba, // no conversion needed
+                TexStorageUsage::NonBGRA8,
+            ),
+            // Neither BGRA or swizzling are supported. GLES does not allow format conversion
+            // during upload so we must use RGBA textures and pretend BGRA data is RGBA when
+            // uploading. Images may be rendered incorrectly as a result.
+            gl::GlType::Gles => {
+                warn!("Neither BGRA or texture swizzling are supported. Images may be rendered incorrectly.");
+                (
+                    TextureFormatPair::from(ImageFormat::RGBA8),
+                    TextureFormatPair { internal: gl::RGBA8, external: gl::RGBA },
+                    gl::UNSIGNED_BYTE,
+                    Swizzle::Rgba,
+                    TexStorageUsage::Always,
+                )
+            }
+        };
+
+        let is_software_webrender = renderer_name.starts_with("Software WebRender");
+        let upload_method = if is_software_webrender {
+            // Uploads in SWGL generally reduce to simple memory copies.
+            UploadMethod::Immediate
+        } else {
+            upload_method
+        };
+        // Prefer 24-bit depth format. While 16-bit depth also works, it may exhaust depth ids easily.
+        let depth_format = gl::DEPTH_COMPONENT24;
+
+        info!("GL texture cache {:?}, bgra {:?} swizzle {:?}, texture storage {:?}, depth {:?}",
+            color_formats, bgra_formats, bgra8_sampling_swizzle, texture_storage_usage, depth_format);
+
+        // On Mali-T devices glCopyImageSubData appears to stall the pipeline until any pending
+        // renders to the source texture have completed. On Mali-G, it has been observed to
+        // indefinitely hang in some circumstances. Using an alternative such as glBlitFramebuffer
+        // is preferable on such devices, so pretend we don't support glCopyImageSubData.
+        // See bugs 1669494 and 1677757.
+        let supports_copy_image_sub_data = if renderer_name.starts_with("Mali") {
+            false
+        } else {
+            supports_extension(&extensions, "GL_EXT_copy_image") ||
+            supports_extension(&extensions, "GL_ARB_copy_image")
+        };
+
+        // We have seen crashes on x86 PowerVR Rogue G6430 devices during GPU cache
+        // updates using the scatter shader. It seems likely that GL_EXT_color_buffer_float
+        // is broken. See bug 1709408.
+        let is_x86_powervr_rogue_g6430 = renderer_name.starts_with("PowerVR Rogue G6430")
+            && cfg!(target_arch = "x86");
+        let supports_color_buffer_float = match gl.get_type() {
+            gl::GlType::Gl => true,
+            gl::GlType::Gles if is_x86_powervr_rogue_g6430 => false,
+            gl::GlType::Gles => supports_extension(&extensions, "GL_EXT_color_buffer_float"),
+        };
+
+        let is_adreno = renderer_name.starts_with("Adreno");
+
+        // There appears to be a driver bug on older versions of the Adreno
+        // driver which prevents usage of persistenly mapped buffers.
+        // See bugs 1678585 and 1683936.
+        // TODO: only disable feature for affected driver versions.
+        let supports_buffer_storage = if is_adreno {
+            false
+        } else {
+            supports_extension(&extensions, "GL_EXT_buffer_storage") ||
+            supports_extension(&extensions, "GL_ARB_buffer_storage")
+        };
+
+        // KHR_blend_equation_advanced renders incorrectly on Adreno
+        // devices. This has only been confirmed up to Adreno 5xx, and has been
+        // fixed for Android 9, so this condition could be made more specific.
+        let supports_advanced_blend_equation =
+            supports_extension(&extensions, "GL_KHR_blend_equation_advanced") &&
+            !is_adreno;
+
+        let supports_dual_source_blending = match gl.get_type() {
+            gl::GlType::Gl => supports_extension(&extensions,"GL_ARB_blend_func_extended") &&
+                supports_extension(&extensions,"GL_ARB_explicit_attrib_location"),
+            gl::GlType::Gles => supports_extension(&extensions,"GL_EXT_blend_func_extended"),
+        };
+
+        // Software webrender relies on the unoptimized shader source.
+        let use_optimized_shaders = use_optimized_shaders && !is_software_webrender;
+
+        // On the android emulator, and possibly some Mali devices, glShaderSource
+        // can crash if the source strings are not null-terminated.
+        // See bug 1591945 and bug 1799722.
+        let requires_null_terminated_shader_source = is_emulator || renderer_name == "Mali-T628"
+            || renderer_name == "Mali-T720" || renderer_name == "Mali-T760";
+
+        // The android emulator gets confused if you don't explicitly unbind any texture
+        // from GL_TEXTURE_EXTERNAL_OES before binding another to GL_TEXTURE_2D. See bug 1636085.
+        let requires_texture_external_unbind = is_emulator;
+
+        let is_macos = cfg!(target_os = "macos");
+             //  && renderer_name.starts_with("AMD");
+             //  (XXX: we apply this restriction to all GPUs to handle switching)
+
+        let is_windows_angle = cfg!(target_os = "windows")
+            && renderer_name.starts_with("ANGLE");
+        let is_adreno_3xx = renderer_name.starts_with("Adreno (TM) 3");
+
+        // Some GPUs require the stride of the data during texture uploads to be
+        // aligned to certain requirements, either for correctness or performance
+        // reasons.
+        let required_pbo_stride = if is_adreno_3xx {
+            // On Adreno 3xx, alignments of < 128 bytes can result in corrupted
+            // glyphs. See bug 1696039.
+            StrideAlignment::Bytes(NonZeroUsize::new(128).unwrap())
+        } else if is_adreno {
+            // On later Adreno devices it must be a multiple of 64 *pixels* to
+            // hit the fast path, meaning value in bytes varies with the texture
+            // format. This is purely an optimization.
+            StrideAlignment::Pixels(NonZeroUsize::new(64).unwrap())
+        } else if is_macos {
+            // On AMD Mac, it must always be a multiple of 256 bytes.
+            // We apply this restriction to all GPUs to handle switching
+            StrideAlignment::Bytes(NonZeroUsize::new(256).unwrap())
+        } else if is_windows_angle {
+            // On ANGLE-on-D3D, PBO texture uploads get incorrectly truncated
+            // if the stride is greater than the width * bpp.
+            StrideAlignment::Bytes(NonZeroUsize::new(1).unwrap())
+        } else {
+            // Other platforms may have similar requirements and should be added
+            // here. The default value should be 4 bytes.
+            StrideAlignment::Bytes(NonZeroUsize::new(4).unwrap())
+        };
+
+        // On AMD Macs there is a driver bug which causes some texture uploads
+        // from a non-zero offset within a PBO to fail. See bug 1603783.
+        let supports_nonzero_pbo_offsets = !is_macos;
+
+        // On Mali-Gxx and Txxx there is a driver bug when rendering partial updates to
+        // offscreen render targets, so we must ensure we render to the entire target.
+        // See bug 1663355.
+        let is_mali_g = renderer_name.starts_with("Mali-G");
+        let is_mali_t = renderer_name.starts_with("Mali-T");
+        let supports_render_target_partial_update = !is_mali_g && !is_mali_t;
+
+        let supports_shader_storage_object = match gl.get_type() {
+            // see https://www.g-truc.net/post-0734.html
+            gl::GlType::Gl => supports_extension(&extensions, "GL_ARB_shader_storage_buffer_object"),
+            gl::GlType::Gles => gl_version >= [3, 1],
+        };
+
+        // SWGL uses swgl_clipMask() instead of implementing clip-masking in shaders.
+        // This allows certain shaders to potentially bypass the more expensive alpha-
+        // pass variants if they know the alpha-pass was only required to deal with
+        // clip-masking.
+        let uses_native_clip_mask = is_software_webrender;
+
+        // SWGL uses swgl_antiAlias() instead of implementing anti-aliasing in shaders.
+        // As above, this allows bypassing certain alpha-pass variants.
+        let uses_native_antialiasing = is_software_webrender;
+
+        // If running on android with a mesa driver (eg intel chromebooks), parse the mesa version.
+        let mut android_mesa_version = None;
+        if cfg!(target_os = "android") && renderer_name.starts_with("Mesa") {
+            if let Some((_, mesa_version)) = version_string.split_once("Mesa ") {
+                if let Some((major_str, _)) = mesa_version.split_once(".") {
+                    if let Ok(major) = major_str.parse::<i32>() {
+                        android_mesa_version = Some(major);
+                    }
+                }
+            }
+        }
+
+        // If the device supports OES_EGL_image_external_essl3 we can use it to render
+        // external images. If not, we must use the ESSL 1.0 OES_EGL_image_external
+        // extension instead.
+        // Mesa versions prior to 20.0 do not implement textureSize(samplerExternalOES),
+        // so we must use the fallback path.
+        let supports_image_external_essl3 = match android_mesa_version {
+            Some(major) if major < 20 => false,
+            _ => supports_extension(&extensions, "GL_OES_EGL_image_external_essl3"),
+        };
+
+        let mut requires_batched_texture_uploads = None;
+        if is_software_webrender {
+            // No benefit to batching texture uploads with swgl.
+            requires_batched_texture_uploads = Some(false);
+        } else if is_mali_g {
+            // On Mali-Gxx the driver really struggles with many small texture uploads,
+            // and handles fewer, larger uploads better.
+            requires_batched_texture_uploads = Some(true);
+        }
+
+        // On Mali-Txxx devices we have observed crashes during draw calls when rendering
+        // to an alpha target immediately after using glClear to clear regions of it.
+        // Using a shader to clear the regions avoids the crash. See bug 1638593.
+        let supports_alpha_target_clears = !is_mali_t;
+
+        // On Adreno 4xx devices with older drivers we have seen render tasks to alpha targets have
+        // no effect unless the target is fully cleared prior to rendering. See bug 1714227.
+        let is_adreno_4xx = renderer_name.starts_with("Adreno (TM) 4");
+        let requires_alpha_target_full_clear = is_adreno_4xx;
+
+        // Testing on Intel and nVidia GPUs showed large performance wins applying a scissor rect
+        // when clearing render targets. Assume this is the best default. On Mali, however, it is
+        // much more efficient to clear the entire render target (due to allowing it to skip reading
+        // the previous contents in to tile memory). This may be true for other GPUs too.
+        let prefers_clear_scissor = !renderer_name.starts_with("Mali");
+
+        let mut supports_render_target_invalidate = true;
+
+        // On PowerVR Rogue devices we have seen that invalidating render targets after we are done
+        // with them can incorrectly cause pending renders to be written to different targets
+        // instead. See bug 1719345.
+        let is_powervr_rogue = renderer_name.starts_with("PowerVR Rogue");
+        if is_powervr_rogue {
+            supports_render_target_invalidate = false;
+        }
+
+        // On Mali-G78 devices with a driver version v1.r36p0 we have seen that invalidating render
+        // targets can result in image corruption, perhaps due to subsequent reuses of the render
+        // target not correctly reinitializing them to a valid state. See bug 1787520.
+        if renderer_name.starts_with("Mali-G78") || renderer_name.starts_with("Mali-G710") {
+            match parse_mali_version(&version_string) {
+                Some(version) if version >= (1, 36, 0) => supports_render_target_invalidate = false,
+                _ => {}
+            }
+        }
+
+        // On Linux we we have seen uploads to R8 format textures result in
+        // corruption on some AMD cards.
+        // See https://bugzilla.mozilla.org/show_bug.cgi?id=1687554#c13
+        let supports_r8_texture_upload = if cfg!(target_os = "linux")
+            && renderer_name.starts_with("AMD Radeon RX")
+        {
+            false
+        } else {
+            true
+        };
+
+        // On some Adreno 3xx devices the vertex array object must be unbound and rebound after
+        // an attached buffer has been orphaned.
+        let requires_vao_rebind_after_orphaning = is_adreno_3xx;
+
+        Device {
+            gl,
+            base_gl: None,
+            crash_annotator,
+            annotate_draw_call_crashes: false,
+            resource_override_path,
+            use_optimized_shaders,
+            upload_method,
+            use_batched_texture_uploads: requires_batched_texture_uploads.unwrap_or(false),
+            use_draw_calls_for_texture_copy: false,
+            batched_upload_threshold,
+
+            inside_frame: false,
+
+            capabilities: Capabilities {
+                supports_multisampling: false, //TODO
+                supports_copy_image_sub_data,
+                supports_color_buffer_float,
+                supports_buffer_storage,
+                supports_advanced_blend_equation,
+                supports_dual_source_blending,
+                supports_khr_debug,
+                supports_texture_swizzle,
+                supports_nonzero_pbo_offsets,
+                supports_texture_usage,
+                supports_render_target_partial_update,
+                supports_shader_storage_object,
+                requires_batched_texture_uploads,
+                supports_alpha_target_clears,
+                requires_alpha_target_full_clear,
+                prefers_clear_scissor,
+                supports_render_target_invalidate,
+                supports_r8_texture_upload,
+                uses_native_clip_mask,
+                uses_native_antialiasing,
+                supports_image_external_essl3,
+                requires_vao_rebind_after_orphaning,
+                renderer_name,
+            },
+
+            color_formats,
+            bgra_formats,
+            bgra_pixel_type,
+            swizzle_settings: SwizzleSettings {
+                bgra8_sampling_swizzle,
+            },
+            depth_format,
+
+            depth_targets: FastHashMap::default(),
+
+            bound_textures: [0; 16],
+            bound_program: 0,
+            bound_program_name: Rc::new(std::ffi::CString::new("").unwrap()),
+            bound_vao: 0,
+            bound_read_fbo: (FBOId(0), DeviceIntPoint::zero()),
+            bound_draw_fbo: FBOId(0),
+            program_mode_id: UniformLocation::INVALID,
+            default_read_fbo: FBOId(0),
+            default_draw_fbo: FBOId(0),
+
+            depth_available: true,
+
+            max_texture_size,
+            cached_programs,
+            frame_id: GpuFrameId(0),
+            extensions,
+            texture_storage_usage,
+            requires_null_terminated_shader_source,
+            requires_texture_external_unbind,
+            is_software_webrender,
+            required_pbo_stride,
+            dump_shader_source,
+            surface_origin_is_top_left,
+
+            #[cfg(debug_assertions)]
+            shader_is_ready: false,
+
+            textures_created: 0,
+            textures_deleted: 0,
+        }
+    }
+
+    pub fn gl(&self) -> &dyn gl::Gl {
+        &*self.gl
+    }
+
+    pub fn rc_gl(&self) -> &Rc<dyn gl::Gl> {
+        &self.gl
+    }
+
+    pub fn set_parameter(&mut self, param: &Parameter) {
+        match param {
+            Parameter::Bool(BoolParameter::PboUploads, enabled) => {
+                if !self.is_software_webrender {
+                    self.upload_method = if *enabled {
+                        UploadMethod::PixelBuffer(crate::ONE_TIME_USAGE_HINT)
+                    } else {
+                        UploadMethod::Immediate
+                    };
+                }
+            }
+            Parameter::Bool(BoolParameter::BatchedUploads, enabled) => {
+                self.use_batched_texture_uploads = *enabled;
+            }
+            Parameter::Bool(BoolParameter::DrawCallsForTextureCopy, enabled) => {
+                if self.capabilities.requires_batched_texture_uploads.is_none() {
+                    self.use_draw_calls_for_texture_copy = *enabled;
+                }
+            }
+            Parameter::Int(IntParameter::BatchedUploadThreshold, threshold) => {
+                self.batched_upload_threshold = *threshold;
+            }
+            _ => {}
+        }
+    }
+
+    /// Ensures that the maximum texture size is less than or equal to the
+    /// provided value. If the provided value is less than the value supported
+    /// by the driver, the latter is used.
+    pub fn clamp_max_texture_size(&mut self, size: i32) {
+        self.max_texture_size = self.max_texture_size.min(size);
+    }
+
+    /// Returns the limit on texture dimensions (width or height).
+    pub fn max_texture_size(&self) -> i32 {
+        self.max_texture_size
+    }
+
+    pub fn surface_origin_is_top_left(&self) -> bool {
+        self.surface_origin_is_top_left
+    }
+
+    pub fn get_capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    pub fn preferred_color_formats(&self) -> TextureFormatPair<ImageFormat> {
+        self.color_formats.clone()
+    }
+
+    pub fn swizzle_settings(&self) -> Option<SwizzleSettings> {
+        if self.capabilities.supports_texture_swizzle {
+            Some(self.swizzle_settings)
+        } else {
+            None
+        }
+    }
+
+    pub fn depth_bits(&self) -> i32 {
+        match self.depth_format {
+            gl::DEPTH_COMPONENT16 => 16,
+            gl::DEPTH_COMPONENT24 => 24,
+            _ => panic!("Unknown depth format {:?}", self.depth_format),
+        }
+    }
+
+    // See gpu_types.rs where we declare the number of possible documents and
+    // number of items per document. This should match up with that.
+    pub fn max_depth_ids(&self) -> i32 {
+        return 1 << (self.depth_bits() - RESERVE_DEPTH_BITS);
+    }
+
+    pub fn ortho_near_plane(&self) -> f32 {
+        return -self.max_depth_ids() as f32;
+    }
+
+    pub fn ortho_far_plane(&self) -> f32 {
+        return (self.max_depth_ids() - 1) as f32;
+    }
+
+    pub fn required_pbo_stride(&self) -> StrideAlignment {
+        self.required_pbo_stride
+    }
+
+    pub fn upload_method(&self) -> &UploadMethod {
+        &self.upload_method
+    }
+
+    pub fn use_batched_texture_uploads(&self) -> bool {
+        self.use_batched_texture_uploads
+    }
+
+    pub fn use_draw_calls_for_texture_copy(&self) -> bool {
+        self.use_draw_calls_for_texture_copy
+    }
+
+    pub fn batched_upload_threshold(&self) -> i32 {
+        self.batched_upload_threshold
+    }
+
+    pub fn reset_state(&mut self) {
+        for i in 0 .. self.bound_textures.len() {
+            self.bound_textures[i] = 0;
+            self.gl.active_texture(gl::TEXTURE0 + i as gl::GLuint);
+            self.gl.bind_texture(gl::TEXTURE_2D, 0);
+        }
+
+        self.bound_vao = 0;
+        self.gl.bind_vertex_array(0);
+
+        self.bound_read_fbo = (self.default_read_fbo, DeviceIntPoint::zero());
+        self.gl.bind_framebuffer(gl::READ_FRAMEBUFFER, self.default_read_fbo.0);
+
+        self.bound_draw_fbo = self.default_draw_fbo;
+        self.gl.bind_framebuffer(gl::DRAW_FRAMEBUFFER, self.bound_draw_fbo.0);
+    }
+
+    #[cfg(debug_assertions)]
+    fn print_shader_errors(source: &str, log: &str) {
+        // hacky way to extract the offending lines
+        if !log.starts_with("0:") && !log.starts_with("0(") {
+            return;
+        }
+        let end_pos = match log[2..].chars().position(|c| !c.is_digit(10)) {
+            Some(pos) => 2 + pos,
+            None => return,
+        };
+        let base_line_number = match log[2 .. end_pos].parse::<usize>() {
+            Ok(number) if number >= 2 => number - 2,
+            _ => return,
+        };
+        for (line, prefix) in source.lines().skip(base_line_number).zip(&["|",">","|"]) {
+            error!("{}\t{}", prefix, line);
+        }
+    }
+
+    pub fn compile_shader(
+        &self,
+        name: &str,
+        shader_type: gl::GLenum,
+        source: &String,
+    ) -> Result<gl::GLuint, ShaderError> {
+        debug!("compile {}", name);
+        let id = self.gl.create_shader(shader_type);
+
+        let mut new_source = Cow::from(source.as_str());
+        // Ensure the source strings we pass to glShaderSource are
+        // null-terminated on buggy platforms.
+        if self.requires_null_terminated_shader_source {
+            new_source.to_mut().push('\0');
+        }
+
+        self.gl.shader_source(id, &[new_source.as_bytes()]);
+        self.gl.compile_shader(id);
+        let log = self.gl.get_shader_info_log(id);
+        let mut status = [0];
+        unsafe {
+            self.gl.get_shader_iv(id, gl::COMPILE_STATUS, &mut status);
+        }
+        if status[0] == 0 {
+            let type_str = match shader_type {
+                gl::VERTEX_SHADER => "vertex",
+                gl::FRAGMENT_SHADER => "fragment",
+                _ => panic!("Unexpected shader type {:x}", shader_type),
+            };
+            error!("Failed to compile {} shader: {}\n{}", type_str, name, log);
+            #[cfg(debug_assertions)]
+            Self::print_shader_errors(source, &log);
+            Err(ShaderError::Compilation(name.to_string(), log))
+        } else {
+            if !log.is_empty() {
+                warn!("Warnings detected on shader: {}\n{}", name, log);
+            }
+            Ok(id)
+        }
+    }
+
+    pub fn begin_frame(&mut self) -> GpuFrameId {
+        debug_assert!(!self.inside_frame);
+        self.inside_frame = true;
+        #[cfg(debug_assertions)]
+        {
+            self.shader_is_ready = false;
+        }
+
+        self.textures_created = 0;
+        self.textures_deleted = 0;
+
+        // If our profiler state has changed, apply or remove the profiling
+        // wrapper from our GL context.
+        let being_profiled = profiler::thread_is_being_profiled();
+        let using_wrapper = self.base_gl.is_some();
+
+        // We can usually unwind driver stacks on x86 so we don't need to manually instrument
+        // gl calls there. Timestamps can be pretty expensive on Windows (2us each and perhaps
+        // an opportunity to be descheduled?) which makes the profiles gathered with this
+        // turned on less useful so only profile on ARM.
+        if cfg!(any(target_arch = "arm", target_arch = "aarch64"))
+            && being_profiled
+            && !using_wrapper
+        {
+            fn note(name: &str, duration: Duration) {
+                profiler::add_text_marker("OpenGL Calls", name, duration);
+            }
+            let threshold = Duration::from_millis(1);
+            let wrapped = gl::ProfilingGl::wrap(self.gl.clone(), threshold, note);
+            let base = mem::replace(&mut self.gl, wrapped);
+            self.base_gl = Some(base);
+        } else if !being_profiled && using_wrapper {
+            self.gl = self.base_gl.take().unwrap();
+        }
+
+        // Retrieve the currently set FBO.
+        let mut default_read_fbo = [0];
+        unsafe {
+            self.gl.get_integer_v(gl::READ_FRAMEBUFFER_BINDING, &mut default_read_fbo);
+        }
+        self.default_read_fbo = FBOId(default_read_fbo[0] as gl::GLuint);
+        let mut default_draw_fbo = [0];
+        unsafe {
+            self.gl.get_integer_v(gl::DRAW_FRAMEBUFFER_BINDING, &mut default_draw_fbo);
+        }
+        self.default_draw_fbo = FBOId(default_draw_fbo[0] as gl::GLuint);
+
+        // Shader state
+        self.bound_program = 0;
+        self.program_mode_id = UniformLocation::INVALID;
+        self.gl.use_program(0);
+
+        // Reset common state
+        self.reset_state();
+
+        // Pixel op state
+        self.gl.pixel_store_i(gl::UNPACK_ALIGNMENT, 1);
+        self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
+
+        // Default is sampler 0, always
+        self.gl.active_texture(gl::TEXTURE0);
+
+        self.frame_id
+    }
+
+    fn bind_texture_impl(
+        &mut self,
+        slot: TextureSlot,
+        id: gl::GLuint,
+        target: gl::GLenum,
+        set_swizzle: Option<Swizzle>,
+        image_rendering: Option<ImageRendering>,
+    ) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_textures[slot.0] != id || set_swizzle.is_some() || image_rendering.is_some() {
+            self.gl.active_texture(gl::TEXTURE0 + slot.0 as gl::GLuint);
+            // The android emulator gets confused if you don't explicitly unbind any texture
+            // from GL_TEXTURE_EXTERNAL_OES before binding to GL_TEXTURE_2D. See bug 1636085.
+            if target == gl::TEXTURE_2D && self.requires_texture_external_unbind {
+                self.gl.bind_texture(gl::TEXTURE_EXTERNAL_OES, 0);
+            }
+            self.gl.bind_texture(target, id);
+            if let Some(swizzle) = set_swizzle {
+                if self.capabilities.supports_texture_swizzle {
+                    let components = match swizzle {
+                        Swizzle::Rgba => [gl::RED, gl::GREEN, gl::BLUE, gl::ALPHA],
+                        Swizzle::Bgra => [gl::BLUE, gl::GREEN, gl::RED, gl::ALPHA],
+                    };
+                    self.gl.tex_parameter_i(target, gl::TEXTURE_SWIZZLE_R, components[0] as i32);
+                    self.gl.tex_parameter_i(target, gl::TEXTURE_SWIZZLE_G, components[1] as i32);
+                    self.gl.tex_parameter_i(target, gl::TEXTURE_SWIZZLE_B, components[2] as i32);
+                    self.gl.tex_parameter_i(target, gl::TEXTURE_SWIZZLE_A, components[3] as i32);
+                } else {
+                    debug_assert_eq!(swizzle, Swizzle::default());
+                }
+            }
+            if let Some(image_rendering) = image_rendering {
+                let filter = match image_rendering {
+                    ImageRendering::Auto | ImageRendering::CrispEdges => gl::LINEAR,
+                    ImageRendering::Pixelated => gl::NEAREST,
+                };
+                self.gl.tex_parameter_i(target, gl::TEXTURE_MIN_FILTER, filter as i32);
+                self.gl.tex_parameter_i(target, gl::TEXTURE_MAG_FILTER, filter as i32);
+            }
+            self.gl.active_texture(gl::TEXTURE0);
+            self.bound_textures[slot.0] = id;
+        }
+    }
+
+    pub fn bind_texture<S>(&mut self, slot: S, texture: &Texture, swizzle: Swizzle)
+    where
+        S: Into<TextureSlot>,
+    {
+        let old_swizzle = texture.active_swizzle.replace(swizzle);
+        let set_swizzle = if old_swizzle != swizzle {
+            Some(swizzle)
+        } else {
+            None
+        };
+        self.bind_texture_impl(slot.into(), texture.id, texture.target, set_swizzle, None);
+    }
+
+    pub fn bind_external_texture<S>(&mut self, slot: S, external_texture: &ExternalTexture)
+    where
+        S: Into<TextureSlot>,
+    {
+        self.bind_texture_impl(
+            slot.into(),
+            external_texture.id,
+            external_texture.target,
+            None,
+            Some(external_texture.image_rendering),
+        );
+    }
+
+    pub fn bind_read_target_impl(
+        &mut self,
+        fbo_id: FBOId,
+        offset: DeviceIntPoint,
+    ) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_read_fbo != (fbo_id, offset) {
+            fbo_id.bind(self.gl(), FBOTarget::Read);
+        }
+
+        self.bound_read_fbo = (fbo_id, offset);
+    }
+
+    pub fn bind_read_target(&mut self, target: ReadTarget) {
+        let fbo_id = match target {
+            ReadTarget::Default => self.default_read_fbo,
+            ReadTarget::Texture { fbo_id } => fbo_id,
+            ReadTarget::External { fbo } => fbo,
+            ReadTarget::NativeSurface { fbo_id, .. } => fbo_id,
+        };
+
+        self.bind_read_target_impl(fbo_id, target.offset())
+    }
+
+    fn bind_draw_target_impl(&mut self, fbo_id: FBOId) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_draw_fbo != fbo_id {
+            self.bound_draw_fbo = fbo_id;
+            fbo_id.bind(self.gl(), FBOTarget::Draw);
+        }
+    }
+
+    pub fn reset_read_target(&mut self) {
+        let fbo = self.default_read_fbo;
+        self.bind_read_target_impl(fbo, DeviceIntPoint::zero());
+    }
+
+
+    pub fn reset_draw_target(&mut self) {
+        let fbo = self.default_draw_fbo;
+        self.bind_draw_target_impl(fbo);
+        self.depth_available = true;
+    }
+
+    pub fn bind_draw_target(
+        &mut self,
+        target: DrawTarget,
+    ) {
+        let (fbo_id, rect, depth_available) = match target {
+            DrawTarget::Default { rect, .. } => {
+                (self.default_draw_fbo, rect, false)
+            }
+            DrawTarget::Texture { dimensions, fbo_id, with_depth, .. } => {
+                let rect = FramebufferIntRect::from_size(
+                    device_size_as_framebuffer_size(dimensions),
+                );
+                (fbo_id, rect, with_depth)
+            },
+            DrawTarget::External { fbo, size } => {
+                (fbo, size.into(), false)
+            }
+            DrawTarget::NativeSurface { external_fbo_id, offset, dimensions, .. } => {
+                (
+                    FBOId(external_fbo_id),
+                    device_rect_as_framebuffer_rect(&DeviceIntRect::from_origin_and_size(offset, dimensions)),
+                    true
+                )
+            }
+        };
+
+        self.depth_available = depth_available;
+        self.bind_draw_target_impl(fbo_id);
+        self.gl.viewport(
+            rect.min.x,
+            rect.min.y,
+            rect.width(),
+            rect.height(),
+        );
+    }
+
+    /// Creates an unbound FBO object. Additional attachment API calls are
+    /// required to make it complete.
+    pub fn create_fbo(&mut self) -> FBOId {
+        FBOId(self.gl.gen_framebuffers(1)[0])
+    }
+
+    /// Creates an FBO with the given texture bound as the color attachment.
+    pub fn create_fbo_for_external_texture(&mut self, texture_id: u32) -> FBOId {
+        let fbo = self.create_fbo();
+        fbo.bind(self.gl(), FBOTarget::Draw);
+        self.gl.framebuffer_texture_2d(
+            gl::DRAW_FRAMEBUFFER,
+            gl::COLOR_ATTACHMENT0,
+            gl::TEXTURE_2D,
+            texture_id,
+            0,
+        );
+        debug_assert_eq!(
+            self.gl.check_frame_buffer_status(gl::DRAW_FRAMEBUFFER),
+            gl::FRAMEBUFFER_COMPLETE,
+            "Incomplete framebuffer",
+        );
+        self.bound_draw_fbo.bind(self.gl(), FBOTarget::Draw);
+        fbo
+    }
+
+    pub fn delete_fbo(&mut self, fbo: FBOId) {
+        self.gl.delete_framebuffers(&[fbo.0]);
+    }
+
+    pub fn bind_external_draw_target(&mut self, fbo_id: FBOId) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_draw_fbo != fbo_id {
+            self.bound_draw_fbo = fbo_id;
+            fbo_id.bind(self.gl(), FBOTarget::Draw);
+        }
+    }
+
+    /// Link a program, attaching the supplied vertex format.
+    ///
+    /// If `create_program()` finds a binary shader on disk, it will kick
+    /// off linking immediately, which some drivers (notably ANGLE) run
+    /// in parallel on background threads. As such, this function should
+    /// ideally be run sometime later, to give the driver time to do that
+    /// before blocking due to an API call accessing the shader.
+    ///
+    /// This generally means that the first run of the application will have
+    /// to do a bunch of blocking work to compile the shader from source, but
+    /// subsequent runs should load quickly.
+    pub fn link_program(
+        &mut self,
+        program: &mut Program,
+        descriptor: &VertexDescriptor,
+    ) -> Result<(), ShaderError> {
+        profile_scope!("compile shader");
+
+        let _guard = CrashAnnotatorGuard::new(
+            &self.crash_annotator,
+            CrashAnnotation::CompileShader,
+            &program.source_info.full_name_cstr
+        );
+
+        assert!(!program.is_initialized());
+        let mut build_program = true;
+        let info = &program.source_info;
+
+        // See if we hit the binary shader cache
+        if let Some(ref cached_programs) = self.cached_programs {
+            // If the shader is not in the cache, attempt to load it from disk
+            if cached_programs.entries.borrow().get(&program.source_info.digest).is_none() {
+                if let Some(ref handler) = cached_programs.program_cache_handler {
+                    handler.try_load_shader_from_disk(&program.source_info.digest, cached_programs);
+                    if let Some(entry) = cached_programs.entries.borrow().get(&program.source_info.digest) {
+                        self.gl.program_binary(program.id, entry.binary.format, &entry.binary.bytes);
+                    }
+                }
+            }
+
+            if let Some(entry) = cached_programs.entries.borrow_mut().get_mut(&info.digest) {
+                let mut link_status = [0];
+                unsafe {
+                    self.gl.get_program_iv(program.id, gl::LINK_STATUS, &mut link_status);
+                }
+                if link_status[0] == 0 {
+                    let error_log = self.gl.get_program_info_log(program.id);
+                    error!(
+                      "Failed to load a program object with a program binary: {} renderer {}\n{}",
+                      &info.base_filename,
+                      self.capabilities.renderer_name,
+                      error_log
+                    );
+                    if let Some(ref program_cache_handler) = cached_programs.program_cache_handler {
+                        program_cache_handler.notify_program_binary_failed(&entry.binary);
+                    }
+                } else {
+                    entry.linked = true;
+                    build_program = false;
+                }
+            }
+        }
+
+        // If not, we need to do a normal compile + link pass.
+        if build_program {
+            // Compile the vertex shader
+            let vs_source = info.compute_source(self, ShaderKind::Vertex);
+            let vs_id = match self.compile_shader(&info.full_name(), gl::VERTEX_SHADER, &vs_source) {
+                    Ok(vs_id) => vs_id,
+                    Err(err) => return Err(err),
+                };
+
+            // Compile the fragment shader
+            let fs_source = info.compute_source(self, ShaderKind::Fragment);
+            let fs_id =
+                match self.compile_shader(&info.full_name(), gl::FRAGMENT_SHADER, &fs_source) {
+                    Ok(fs_id) => fs_id,
+                    Err(err) => {
+                        self.gl.delete_shader(vs_id);
+                        return Err(err);
+                    }
+                };
+
+            // Check if shader source should be dumped
+            if Some(info.base_filename) == self.dump_shader_source.as_ref().map(String::as_ref) {
+                let path = std::path::Path::new(info.base_filename);
+                std::fs::write(path.with_extension("vert"), vs_source).unwrap();
+                std::fs::write(path.with_extension("frag"), fs_source).unwrap();
+            }
+
+            // Attach shaders
+            self.gl.attach_shader(program.id, vs_id);
+            self.gl.attach_shader(program.id, fs_id);
+
+            // Bind vertex attributes
+            for (i, attr) in descriptor
+                .vertex_attributes
+                .iter()
+                .chain(descriptor.instance_attributes.iter())
+                .enumerate()
+            {
+                self.gl
+                    .bind_attrib_location(program.id, i as gl::GLuint, attr.name);
+            }
+
+            if self.cached_programs.is_some() {
+                self.gl.program_parameter_i(program.id, gl::PROGRAM_BINARY_RETRIEVABLE_HINT, gl::TRUE as gl::GLint);
+            }
+
+            // Link!
+            self.gl.link_program(program.id);
+
+            // GL recommends detaching and deleting shaders once the link
+            // is complete (whether successful or not). This allows the driver
+            // to free any memory associated with the parsing and compilation.
+            self.gl.detach_shader(program.id, vs_id);
+            self.gl.detach_shader(program.id, fs_id);
+            self.gl.delete_shader(vs_id);
+            self.gl.delete_shader(fs_id);
+
+            let mut link_status = [0];
+            unsafe {
+                self.gl.get_program_iv(program.id, gl::LINK_STATUS, &mut link_status);
+            }
+            if link_status[0] == 0 {
+                let error_log = self.gl.get_program_info_log(program.id);
+                error!(
+                    "Failed to link shader program: {}\n{}",
+                    &info.base_filename,
+                    error_log
+                );
+                self.gl.delete_program(program.id);
+                return Err(ShaderError::Link(info.base_filename.to_owned(), error_log));
+            }
+
+            if let Some(ref cached_programs) = self.cached_programs {
+                if !cached_programs.entries.borrow().contains_key(&info.digest) {
+                    let (buffer, format) = self.gl.get_program_binary(program.id);
+                    if buffer.len() > 0 {
+                        let binary = Arc::new(ProgramBinary::new(buffer, format, info.digest.clone()));
+                        cached_programs.add_new_program_binary(binary);
+                    }
+                }
+            }
+        }
+
+        // If we get here, the link succeeded, so get the uniforms.
+        program.is_initialized = true;
+        program.u_transform = self.gl.get_uniform_location(program.id, "uTransform");
+        program.u_mode = self.gl.get_uniform_location(program.id, "uMode");
+        program.u_texture_size = self.gl.get_uniform_location(program.id, "uTextureSize");
+
+        Ok(())
+    }
+
+    pub fn bind_program(&mut self, program: &Program) -> bool {
+        debug_assert!(self.inside_frame);
+        debug_assert!(program.is_initialized());
+        if !program.is_initialized() {
+            return false;
+        }
+        #[cfg(debug_assertions)]
+        {
+            self.shader_is_ready = true;
+        }
+
+        if self.bound_program != program.id {
+            self.gl.use_program(program.id);
+            self.bound_program = program.id;
+            self.bound_program_name = program.source_info.full_name_cstr.clone();
+            self.program_mode_id = UniformLocation(program.u_mode);
+        }
+        true
+    }
+
+    pub fn create_texture(
+        &mut self,
+        target: ImageBufferKind,
+        format: ImageFormat,
+        mut width: i32,
+        mut height: i32,
+        filter: TextureFilter,
+        render_target: Option<RenderTargetInfo>,
+    ) -> Texture {
+        debug_assert!(self.inside_frame);
+
+        if width > self.max_texture_size || height > self.max_texture_size {
+            error!("Attempting to allocate a texture of size {}x{} above the limit, trimming", width, height);
+            width = width.min(self.max_texture_size);
+            height = height.min(self.max_texture_size);
+        }
+
+        // Set up the texture book-keeping.
+        let mut texture = Texture {
+            id: self.gl.gen_textures(1)[0],
+            target: get_gl_target(target),
+            size: DeviceIntSize::new(width, height),
+            format,
+            filter,
+            active_swizzle: Cell::default(),
+            fbo: None,
+            fbo_with_depth: None,
+            last_frame_used: self.frame_id,
+            flags: TextureFlags::default(),
+        };
+        self.bind_texture(DEFAULT_TEXTURE, &texture, Swizzle::default());
+        self.set_texture_parameters(texture.target, filter);
+
+        if self.capabilities.supports_texture_usage && render_target.is_some() {
+            self.gl.tex_parameter_i(texture.target, gl::TEXTURE_USAGE_ANGLE, gl::FRAMEBUFFER_ATTACHMENT_ANGLE as gl::GLint);
+        }
+
+        // Allocate storage.
+        let desc = self.gl_describe_format(texture.format);
+
+        // Firefox doesn't use mipmaps, but Servo uses them for standalone image
+        // textures images larger than 512 pixels. This is the only case where
+        // we set the filter to trilinear.
+        let mipmap_levels =  if texture.filter == TextureFilter::Trilinear {
+            let max_dimension = cmp::max(width, height);
+            ((max_dimension) as f64).log2() as gl::GLint + 1
+        } else {
+            1
+        };
+
+        // We never want to upload texture data at the same time as allocating the texture.
+        self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
+
+        // Use glTexStorage where available, since it avoids allocating
+        // unnecessary mipmap storage and generally improves performance with
+        // stronger invariants.
+        let use_texture_storage = match self.texture_storage_usage {
+            TexStorageUsage::Always => true,
+            TexStorageUsage::NonBGRA8 => texture.format != ImageFormat::BGRA8,
+            TexStorageUsage::Never => false,
+        };
+        if use_texture_storage {
+            self.gl.tex_storage_2d(
+                texture.target,
+                mipmap_levels,
+                desc.internal,
+                texture.size.width as gl::GLint,
+                texture.size.height as gl::GLint,
+            );
+        } else {
+            self.gl.tex_image_2d(
+                texture.target,
+                0,
+                desc.internal as gl::GLint,
+                texture.size.width as gl::GLint,
+                texture.size.height as gl::GLint,
+                0,
+                desc.external,
+                desc.pixel_type,
+                None,
+            );            
+        }
+
+        // Set up FBOs, if required.
+        if let Some(rt_info) = render_target {
+            self.init_fbos(&mut texture, false);
+            if rt_info.has_depth {
+                self.init_fbos(&mut texture, true);
+            }
+        }
+
+        self.textures_created += 1;
+
+        texture
+    }
+
+    fn set_texture_parameters(&mut self, target: gl::GLuint, filter: TextureFilter) {
+        let mag_filter = match filter {
+            TextureFilter::Nearest => gl::NEAREST,
+            TextureFilter::Linear | TextureFilter::Trilinear => gl::LINEAR,
+        };
+
+        let min_filter = match filter {
+            TextureFilter::Nearest => gl::NEAREST,
+            TextureFilter::Linear => gl::LINEAR,
+            TextureFilter::Trilinear => gl::LINEAR_MIPMAP_LINEAR,
+        };
+
+        self.gl
+            .tex_parameter_i(target, gl::TEXTURE_MAG_FILTER, mag_filter as gl::GLint);
+        self.gl
+            .tex_parameter_i(target, gl::TEXTURE_MIN_FILTER, min_filter as gl::GLint);
+
+        self.gl
+            .tex_parameter_i(target, gl::TEXTURE_WRAP_S, gl::CLAMP_TO_EDGE as gl::GLint);
+        self.gl
+            .tex_parameter_i(target, gl::TEXTURE_WRAP_T, gl::CLAMP_TO_EDGE as gl::GLint);
+    }
+
+    /// Copies the entire contents of one texture to another. The dest texture must be at least
+    /// as large as the source texture in each dimension. No scaling is performed, so if the dest
+    /// texture is larger than the source texture then some of its pixels will not be written to.
+    pub fn copy_entire_texture(
+        &mut self,
+        dst: &mut Texture,
+        src: &Texture,
+    ) {
+        debug_assert!(self.inside_frame);
+        debug_assert!(dst.size.width >= src.size.width);
+        debug_assert!(dst.size.height >= src.size.height);
+
+        self.copy_texture_sub_region(
+            src,
+            0,
+            0,
+            dst,
+            0,
+            0,
+            src.size.width as _,
+            src.size.height as _,
+        );
+    }
+
+    /// Copies the specified subregion from src_texture to dest_texture.
+    pub fn copy_texture_sub_region(
+        &mut self,
+        src_texture: &Texture,
+        src_x: usize,
+        src_y: usize,
+        dest_texture: &Texture,
+        dest_x: usize,
+        dest_y: usize,
+        width: usize,
+        height: usize,
+    ) {
+        if self.capabilities.supports_copy_image_sub_data {
+            assert_ne!(
+                src_texture.id, dest_texture.id,
+                "glCopyImageSubData's behaviour is undefined if src and dst images are identical and the rectangles overlap."
+            );
+            unsafe {
+                self.gl.copy_image_sub_data(
+                    src_texture.id,
+                    src_texture.target,
+                    0,
+                    src_x as _,
+                    src_y as _,
+                    0,
+                    dest_texture.id,
+                    dest_texture.target,
+                    0,
+                    dest_x as _,
+                    dest_y as _,
+                    0,
+                    width as _,
+                    height as _,
+                    1,
+                );
+            }
+        } else {
+            let src_offset = FramebufferIntPoint::new(src_x as i32, src_y as i32);
+            let dest_offset = FramebufferIntPoint::new(dest_x as i32, dest_y as i32);
+            let size = FramebufferIntSize::new(width as i32, height as i32);
+
+            self.blit_render_target(
+                ReadTarget::from_texture(src_texture),
+                FramebufferIntRect::from_origin_and_size(src_offset, size),
+                DrawTarget::from_texture(dest_texture, false),
+                FramebufferIntRect::from_origin_and_size(dest_offset, size),
+                // In most cases the filter shouldn't matter, as there is no scaling involved
+                // in the blit. We were previously using Linear, but this caused issues when
+                // blitting RGBAF32 textures on Mali, so use Nearest to be safe.
+                TextureFilter::Nearest,
+            );
+        }
+    }
+
+    /// Notifies the device that the contents of a render target are no longer
+    /// needed.
+    pub fn invalidate_render_target(&mut self, texture: &Texture) {
+        if self.capabilities.supports_render_target_invalidate {
+            let (fbo, attachments) = if texture.supports_depth() {
+                (&texture.fbo_with_depth,
+                 &[gl::COLOR_ATTACHMENT0, gl::DEPTH_ATTACHMENT] as &[gl::GLenum])
+            } else {
+                (&texture.fbo, &[gl::COLOR_ATTACHMENT0] as &[gl::GLenum])
+            };
+
+            if let Some(fbo_id) = fbo {
+                let original_bound_fbo = self.bound_draw_fbo;
+                // Note: The invalidate extension may not be supported, in which
+                // case this is a no-op. That's ok though, because it's just a
+                // hint.
+                self.bind_external_draw_target(*fbo_id);
+                self.gl.invalidate_framebuffer(gl::FRAMEBUFFER, attachments);
+                self.bind_external_draw_target(original_bound_fbo);
+            }
+        }
+    }
+
+    /// Notifies the device that the contents of the current framebuffer's depth
+    /// attachment is no longer needed. Unlike invalidate_render_target, this can
+    /// be called even when the contents of the colour attachment is still required.
+    /// This should be called before unbinding the framebuffer at the end of a pass,
+    /// to allow tiled GPUs to avoid writing the contents back to memory.
+    pub fn invalidate_depth_target(&mut self) {
+        assert!(self.depth_available);
+        let attachments = if self.bound_draw_fbo == self.default_draw_fbo {
+            &[gl::DEPTH] as &[gl::GLenum]
+        } else {
+            &[gl::DEPTH_ATTACHMENT] as &[gl::GLenum]
+        };
+        self.gl.invalidate_framebuffer(gl::DRAW_FRAMEBUFFER, attachments);
+    }
+
+    /// Notifies the device that a render target is about to be reused.
+    ///
+    /// This method adds or removes a depth target as necessary.
+    pub fn reuse_render_target<T: Texel>(
+        &mut self,
+        texture: &mut Texture,
+        rt_info: RenderTargetInfo,
+    ) {
+        texture.last_frame_used = self.frame_id;
+
+        // Add depth support if needed.
+        if rt_info.has_depth && !texture.supports_depth() {
+            self.init_fbos(texture, true);
+        }
+    }
+
+    fn init_fbos(&mut self, texture: &mut Texture, with_depth: bool) {
+        let (fbo, depth_rb) = if with_depth {
+            let depth_target = self.acquire_depth_target(texture.get_dimensions());
+            (&mut texture.fbo_with_depth, Some(depth_target))
+        } else {
+            (&mut texture.fbo, None)
+        };
+
+        // Generate the FBOs.
+        assert!(fbo.is_none());
+        let fbo_id = FBOId(*self.gl.gen_framebuffers(1).first().unwrap());
+        *fbo = Some(fbo_id);
+
+        // Bind the FBOs.
+        let original_bound_fbo = self.bound_draw_fbo;
+
+        self.bind_external_draw_target(fbo_id);
+
+        self.gl.framebuffer_texture_2d(
+            gl::DRAW_FRAMEBUFFER,
+            gl::COLOR_ATTACHMENT0,
+            texture.target,
+            texture.id,
+            0,
+        );
+
+        if let Some(depth_rb) = depth_rb {
+            self.gl.framebuffer_renderbuffer(
+                gl::DRAW_FRAMEBUFFER,
+                gl::DEPTH_ATTACHMENT,
+                gl::RENDERBUFFER,
+                depth_rb.0,
+            );
+        }
+
+        debug_assert_eq!(
+            self.gl.check_frame_buffer_status(gl::DRAW_FRAMEBUFFER),
+            gl::FRAMEBUFFER_COMPLETE,
+            "Incomplete framebuffer",
+        );
+
+        self.bind_external_draw_target(original_bound_fbo);
+    }
+
+    fn acquire_depth_target(&mut self, dimensions: DeviceIntSize) -> RBOId {
+        let gl = &self.gl;
+        let depth_format = self.depth_format;
+        let target = self.depth_targets.entry(dimensions).or_insert_with(|| {
+            let renderbuffer_ids = gl.gen_renderbuffers(1);
+            let depth_rb = renderbuffer_ids[0];
+            gl.bind_renderbuffer(gl::RENDERBUFFER, depth_rb);
+            gl.renderbuffer_storage(
+                gl::RENDERBUFFER,
+                depth_format,
+                dimensions.width as _,
+                dimensions.height as _,
+            );
+            SharedDepthTarget {
+                rbo_id: RBOId(depth_rb),
+                refcount: 0,
+            }
+        });
+        target.refcount += 1;
+        target.rbo_id
+    }
+
+    fn release_depth_target(&mut self, dimensions: DeviceIntSize) {
+        let mut entry = match self.depth_targets.entry(dimensions) {
+            Entry::Occupied(x) => x,
+            Entry::Vacant(..) => panic!("Releasing unknown depth target"),
+        };
+        debug_assert!(entry.get().refcount != 0);
+        entry.get_mut().refcount -= 1;
+        if entry.get().refcount == 0 {
+            let (_, target) = entry.remove_entry();
+            self.gl.delete_renderbuffers(&[target.rbo_id.0]);
+        }
+    }
+
+    /// Perform a blit between self.bound_read_fbo and self.bound_draw_fbo.
+    fn blit_render_target_impl(
+        &mut self,
+        src_rect: FramebufferIntRect,
+        dest_rect: FramebufferIntRect,
+        filter: TextureFilter,
+    ) {
+        debug_assert!(self.inside_frame);
+
+        let filter = match filter {
+            TextureFilter::Nearest => gl::NEAREST,
+            TextureFilter::Linear | TextureFilter::Trilinear => gl::LINEAR,
+        };
+
+        let src_x0 = src_rect.min.x + self.bound_read_fbo.1.x;
+        let src_y0 = src_rect.min.y + self.bound_read_fbo.1.y;
+
+        self.gl.blit_framebuffer(
+            src_x0,
+            src_y0,
+            src_x0 + src_rect.width(),
+            src_y0 + src_rect.height(),
+            dest_rect.min.x,
+            dest_rect.min.y,
+            dest_rect.max.x,
+            dest_rect.max.y,
+            gl::COLOR_BUFFER_BIT,
+            filter,
+        );
+    }
+
+    /// Perform a blit between src_target and dest_target.
+    /// This will overwrite self.bound_read_fbo and self.bound_draw_fbo.
+    pub fn blit_render_target(
+        &mut self,
+        src_target: ReadTarget,
+        src_rect: FramebufferIntRect,
+        dest_target: DrawTarget,
+        dest_rect: FramebufferIntRect,
+        filter: TextureFilter,
+    ) {
+        debug_assert!(self.inside_frame);
+
+        self.bind_read_target(src_target);
+
+        self.bind_draw_target(dest_target);
+
+        self.blit_render_target_impl(src_rect, dest_rect, filter);
+    }
+
+    /// Performs a blit while flipping vertically. Useful for blitting textures
+    /// (which use origin-bottom-left) to the main framebuffer (which uses
+    /// origin-top-left).
+    pub fn blit_render_target_invert_y(
+        &mut self,
+        src_target: ReadTarget,
+        src_rect: FramebufferIntRect,
+        dest_target: DrawTarget,
+        dest_rect: FramebufferIntRect,
+    ) {
+        debug_assert!(self.inside_frame);
+
+        let mut inverted_dest_rect = dest_rect;
+        inverted_dest_rect.min.y = dest_rect.max.y;
+        inverted_dest_rect.max.y = dest_rect.min.y;
+
+        self.blit_render_target(
+            src_target,
+            src_rect,
+            dest_target,
+            inverted_dest_rect,
+            TextureFilter::Linear,
+        );
+    }
+
+    pub fn delete_texture(&mut self, mut texture: Texture) {
+        debug_assert!(self.inside_frame);
+        let had_depth = texture.supports_depth();
+        if let Some(fbo) = texture.fbo {
+            self.gl.delete_framebuffers(&[fbo.0]);
+            texture.fbo = None;
+        }
+        if let Some(fbo) = texture.fbo_with_depth {
+            self.gl.delete_framebuffers(&[fbo.0]);
+            texture.fbo_with_depth = None;
+        }
+
+        if had_depth {
+            self.release_depth_target(texture.get_dimensions());
+        }
+
+        self.gl.delete_textures(&[texture.id]);
+
+        for bound_texture in &mut self.bound_textures {
+            if *bound_texture == texture.id {
+                *bound_texture = 0;
+            }
+        }
+
+        self.textures_deleted += 1;
+
+        // Disarm the assert in Texture::drop().
+        texture.id = 0;
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn delete_external_texture(&mut self, mut external: ExternalTexture) {
+        self.gl.delete_textures(&[external.id]);
+        external.id = 0;
+    }
+
+    pub fn delete_program(&mut self, mut program: Program) {
+        self.gl.delete_program(program.id);
+        program.id = 0;
+    }
+
+    /// Create a shader program and link it immediately.
+    pub fn create_program_linked(
+        &mut self,
+        base_filename: &'static str,
+        features: &[&'static str],
+        descriptor: &VertexDescriptor,
+    ) -> Result<Program, ShaderError> {
+        let mut program = self.create_program(base_filename, features)?;
+        self.link_program(&mut program, descriptor)?;
+        Ok(program)
+    }
+
+    /// Create a shader program. This does minimal amount of work to start
+    /// loading a binary shader. If a binary shader is found, we invoke
+    /// glProgramBinary, which, at least on ANGLE, will load and link the
+    /// binary on a background thread. This can speed things up later when
+    /// we invoke `link_program()`.
+    pub fn create_program(
+        &mut self,
+        base_filename: &'static str,
+        features: &[&'static str],
+    ) -> Result<Program, ShaderError> {
+        debug_assert!(self.inside_frame);
+
+        let source_info = ProgramSourceInfo::new(self, base_filename, features);
+
+        // Create program
+        let pid = self.gl.create_program();
+
+        // Attempt to load a cached binary if possible.
+        if let Some(ref cached_programs) = self.cached_programs {
+            if let Some(entry) = cached_programs.entries.borrow().get(&source_info.digest) {
+                self.gl.program_binary(pid, entry.binary.format, &entry.binary.bytes);
+            }
+        }
+
+        // Use 0 for the uniforms as they are initialized by link_program.
+        let program = Program {
+            id: pid,
+            u_transform: 0,
+            u_mode: 0,
+            u_texture_size: 0,
+            source_info,
+            is_initialized: false,
+        };
+
+        Ok(program)
+    }
+
+    fn build_shader_string<F: FnMut(&str)>(
+        &self,
+        features: &[&'static str],
+        kind: ShaderKind,
+        base_filename: &str,
+        output: F,
+    ) {
+        do_build_shader_string(
+            get_shader_version(&*self.gl),
+            features,
+            kind,
+            base_filename,
+            &|f| get_unoptimized_shader_source(f, self.resource_override_path.as_ref()),
+            output,
+        )
+    }
+
+    pub fn bind_shader_samplers<S>(&mut self, program: &Program, bindings: &[(&'static str, S)])
+    where
+        S: Into<TextureSlot> + Copy,
+    {
+        // bind_program() must be called before calling bind_shader_samplers
+        assert_eq!(self.bound_program, program.id);
+
+        for binding in bindings {
+            let u_location = self.gl.get_uniform_location(program.id, binding.0);
+            if u_location != -1 {
+                self.bind_program(program);
+                self.gl
+                    .uniform_1i(u_location, binding.1.into().0 as gl::GLint);
+            }
+        }
+    }
+
+    pub fn get_uniform_location(&self, program: &Program, name: &str) -> UniformLocation {
+        UniformLocation(self.gl.get_uniform_location(program.id, name))
+    }
+
+    pub fn set_uniforms(
+        &self,
+        program: &Program,
+        transform: &Transform3D<f32>,
+    ) {
+        debug_assert!(self.inside_frame);
+        #[cfg(debug_assertions)]
+        debug_assert!(self.shader_is_ready);
+
+        self.gl
+            .uniform_matrix_4fv(program.u_transform, false, &transform.to_array());
+    }
+
+    pub fn switch_mode(&self, mode: i32) {
+        debug_assert!(self.inside_frame);
+        #[cfg(debug_assertions)]
+        debug_assert!(self.shader_is_ready);
+
+        self.gl.uniform_1i(self.program_mode_id.0, mode);
+    }
+
+    /// Sets the uTextureSize uniform. Most shaders do not require this to be called
+    /// as they use the textureSize GLSL function instead.
+    pub fn set_shader_texture_size(
+        &self,
+        program: &Program,
+        texture_size: DeviceSize,
+    ) {
+        debug_assert!(self.inside_frame);
+        #[cfg(debug_assertions)]
+        debug_assert!(self.shader_is_ready);
+
+        if program.u_texture_size != -1 {
+            self.gl.uniform_2f(program.u_texture_size, texture_size.width, texture_size.height);
+        }
+    }
+
+    pub fn create_pbo(&mut self) -> PBO {
+        let id = self.gl.gen_buffers(1)[0];
+        PBO {
+            id,
+            reserved_size: 0,
+        }
+    }
+
+    pub fn create_pbo_with_size(&mut self, size: usize) -> PBO {
+        let mut pbo = self.create_pbo();
+
+        self.gl.bind_buffer(gl::PIXEL_PACK_BUFFER, pbo.id);
+        self.gl.pixel_store_i(gl::PACK_ALIGNMENT, 1);
+        self.gl.buffer_data_untyped(
+            gl::PIXEL_PACK_BUFFER,
+            size as _,
+            ptr::null(),
+            gl::STREAM_READ,
+        );
+        self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
+
+        pbo.reserved_size = size;
+        pbo
+    }
+
+    pub fn read_pixels_into_pbo(
+        &mut self,
+        read_target: ReadTarget,
+        rect: DeviceIntRect,
+        format: ImageFormat,
+        pbo: &PBO,
+    ) {
+        let byte_size = rect.area() as usize * format.bytes_per_pixel() as usize;
+
+        assert!(byte_size <= pbo.reserved_size);
+
+        self.bind_read_target(read_target);
+
+        self.gl.bind_buffer(gl::PIXEL_PACK_BUFFER, pbo.id);
+        self.gl.pixel_store_i(gl::PACK_ALIGNMENT, 1);
+
+        let gl_format = self.gl_describe_format(format);
+
+        unsafe {
+            self.gl.read_pixels_into_pbo(
+                rect.min.x as _,
+                rect.min.y as _,
+                rect.width() as _,
+                rect.height() as _,
+                gl_format.read,
+                gl_format.pixel_type,
+            );
+        }
+
+        self.gl.bind_buffer(gl::PIXEL_PACK_BUFFER, 0);
+    }
+
+    pub fn map_pbo_for_readback<'a>(&'a mut self, pbo: &'a PBO) -> Option<BoundPBO<'a>> {
+        self.gl.bind_buffer(gl::PIXEL_PACK_BUFFER, pbo.id);
+
+        let buf_ptr = match self.gl.get_type() {
+            gl::GlType::Gl => {
+                self.gl.map_buffer(gl::PIXEL_PACK_BUFFER, gl::READ_ONLY)
+            }
+
+            gl::GlType::Gles => {
+                self.gl.map_buffer_range(
+                    gl::PIXEL_PACK_BUFFER,
+                    0,
+                    pbo.reserved_size as _,
+                    gl::MAP_READ_BIT)
+            }
+        };
+
+        if buf_ptr.is_null() {
+            return None;
+        }
+
+        let buffer = unsafe { slice::from_raw_parts(buf_ptr as *const u8, pbo.reserved_size) };
+
+        Some(BoundPBO {
+            device: self,
+            data: buffer,
+        })
+    }
+
+    pub fn delete_pbo(&mut self, mut pbo: PBO) {
+        self.gl.delete_buffers(&[pbo.id]);
+        pbo.id = 0;
+        pbo.reserved_size = 0
+    }
+
+    /// Returns the size and stride in bytes required to upload an area of pixels
+    /// of the specified size, to a texture of the specified format.
+    pub fn required_upload_size_and_stride(&self, size: DeviceIntSize, format: ImageFormat) -> (usize, usize) {
+        assert!(size.width >= 0);
+        assert!(size.height >= 0);
+
+        let bytes_pp = format.bytes_per_pixel() as usize;
+        let width_bytes = size.width as usize * bytes_pp;
+
+        let dst_stride = round_up_to_multiple(width_bytes, self.required_pbo_stride.num_bytes(format));
+
+        // The size of the chunk should only need to be (height - 1) * dst_stride + width_bytes,
+        // however, the android emulator will error unless it is height * dst_stride.
+        // See bug 1587047 for details.
+        // Using the full final row also ensures that the offset of the next chunk is
+        // optimally aligned.
+        let dst_size = dst_stride * size.height as usize;
+
+        (dst_size, dst_stride)
+    }
+
+    /// Returns a `TextureUploader` which can be used to upload texture data to `texture`.
+    /// Once uploads have been performed the uploader must be flushed with `TextureUploader::flush()`.
+    pub fn upload_texture<'a>(
+        &mut self,
+        pbo_pool: &'a mut UploadPBOPool,
+    ) -> TextureUploader<'a> {
+        debug_assert!(self.inside_frame);
+
+        pbo_pool.begin_frame(self);
+
+        TextureUploader {
+            buffers: Vec::new(),
+            pbo_pool,
+        }
+    }
+
+    /// Performs an immediate (non-PBO) texture upload.
+    pub fn upload_texture_immediate<T: Texel>(
+        &mut self,
+        texture: &Texture,
+        pixels: &[T]
+    ) {
+        self.bind_texture(DEFAULT_TEXTURE, texture, Swizzle::default());
+        let desc = self.gl_describe_format(texture.format);
+        self.gl.tex_sub_image_2d(
+            texture.target,
+            0,
+            0,
+            0,
+            texture.size.width as gl::GLint,
+            texture.size.height as gl::GLint,
+            desc.external,
+            desc.pixel_type,
+            texels_to_u8_slice(pixels),
+        );
+    }
+
+    pub fn read_pixels(&mut self, img_desc: &ImageDescriptor) -> Vec<u8> {
+        let desc = self.gl_describe_format(img_desc.format);
+        self.gl.read_pixels(
+            0, 0,
+            img_desc.size.width as i32,
+            img_desc.size.height as i32,
+            desc.read,
+            desc.pixel_type,
+        )
+    }
+
+    /// Read rectangle of pixels into the specified output slice.
+    pub fn read_pixels_into(
+        &mut self,
+        rect: FramebufferIntRect,
+        format: ImageFormat,
+        output: &mut [u8],
+    ) {
+        let bytes_per_pixel = format.bytes_per_pixel();
+        let desc = self.gl_describe_format(format);
+        let size_in_bytes = (bytes_per_pixel * rect.area()) as usize;
+        assert_eq!(output.len(), size_in_bytes);
+
+        self.gl.flush();
+        self.gl.read_pixels_into_buffer(
+            rect.min.x as _,
+            rect.min.y as _,
+            rect.width() as _,
+            rect.height() as _,
+            desc.read,
+            desc.pixel_type,
+            output,
+        );
+    }
+
+    /// Get texels of a texture into the specified output slice.
+    pub fn get_tex_image_into(
+        &mut self,
+        texture: &Texture,
+        format: ImageFormat,
+        output: &mut [u8],
+    ) {
+        self.bind_texture(DEFAULT_TEXTURE, texture, Swizzle::default());
+        let desc = self.gl_describe_format(format);
+        self.gl.get_tex_image_into_buffer(
+            texture.target,
+            0,
+            desc.external,
+            desc.pixel_type,
+            output,
+        );
+    }
+
+    /// Attaches the provided texture to the current Read FBO binding.
+    fn attach_read_texture_raw(&mut self, texture_id: gl::GLuint, target: gl::GLuint) {
+        self.gl.framebuffer_texture_2d(
+            gl::READ_FRAMEBUFFER,
+            gl::COLOR_ATTACHMENT0,
+            target,
+            texture_id,
+            0,
+        )
+    }
+
+    pub fn attach_read_texture_external(
+        &mut self, texture_id: gl::GLuint, target: ImageBufferKind
+    ) {
+        self.attach_read_texture_raw(texture_id, get_gl_target(target))
+    }
+
+    pub fn attach_read_texture(&mut self, texture: &Texture) {
+        self.attach_read_texture_raw(texture.id, texture.target)
+    }
+
+    fn bind_vao_impl(&mut self, id: gl::GLuint) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_vao != id {
+            self.bound_vao = id;
+            self.gl.bind_vertex_array(id);
+        }
+    }
+
+    pub fn bind_vao(&mut self, vao: &VAO) {
+        self.bind_vao_impl(vao.id)
+    }
+
+    pub fn bind_custom_vao(&mut self, vao: &CustomVAO) {
+        self.bind_vao_impl(vao.id)
+    }
+
+    fn create_vao_with_vbos(
+        &mut self,
+        descriptor: &VertexDescriptor,
+        main_vbo_id: VBOId,
+        instance_vbo_id: VBOId,
+        instance_divisor: u32,
+        ibo_id: IBOId,
+        owns_vertices_and_indices: bool,
+    ) -> VAO {
+        let instance_stride = descriptor.instance_stride() as usize;
+        let vao_id = self.gl.gen_vertex_arrays(1)[0];
+
+        self.bind_vao_impl(vao_id);
+
+        descriptor.bind(self.gl(), main_vbo_id, instance_vbo_id, instance_divisor);
+        ibo_id.bind(self.gl()); // force it to be a part of VAO
+
+        VAO {
+            id: vao_id,
+            ibo_id,
+            main_vbo_id,
+            instance_vbo_id,
+            instance_stride,
+            instance_divisor,
+            owns_vertices_and_indices,
+        }
+    }
+
+    pub fn create_custom_vao(
+        &mut self,
+        streams: &[Stream],
+    ) -> CustomVAO {
+        debug_assert!(self.inside_frame);
+
+        let vao_id = self.gl.gen_vertex_arrays(1)[0];
+        self.bind_vao_impl(vao_id);
+
+        let mut attrib_index = 0;
+        for stream in streams {
+            VertexDescriptor::bind_attributes(
+                stream.attributes,
+                attrib_index,
+                0,
+                self.gl(),
+                stream.vbo,
+            );
+            attrib_index += stream.attributes.len();
+        }
+
+        CustomVAO {
+            id: vao_id,
+        }
+    }
+
+    pub fn delete_custom_vao(&mut self, mut vao: CustomVAO) {
+        self.gl.delete_vertex_arrays(&[vao.id]);
+        vao.id = 0;
+    }
+
+    pub fn create_vbo<T>(&mut self) -> VBO<T> {
+        let ids = self.gl.gen_buffers(1);
+        VBO {
+            id: ids[0],
+            target: gl::ARRAY_BUFFER,
+            allocated_count: 0,
+            marker: PhantomData,
+        }
+    }
+
+    pub fn delete_vbo<T>(&mut self, mut vbo: VBO<T>) {
+        self.gl.delete_buffers(&[vbo.id]);
+        vbo.id = 0;
+    }
+
+    pub fn create_vao(&mut self, descriptor: &VertexDescriptor, instance_divisor: u32) -> VAO {
+        debug_assert!(self.inside_frame);
+
+        let buffer_ids = self.gl.gen_buffers(3);
+        let ibo_id = IBOId(buffer_ids[0]);
+        let main_vbo_id = VBOId(buffer_ids[1]);
+        let intance_vbo_id = VBOId(buffer_ids[2]);
+
+        self.create_vao_with_vbos(descriptor, main_vbo_id, intance_vbo_id, instance_divisor, ibo_id, true)
+    }
+
+    pub fn delete_vao(&mut self, mut vao: VAO) {
+        self.gl.delete_vertex_arrays(&[vao.id]);
+        vao.id = 0;
+
+        if vao.owns_vertices_and_indices {
+            self.gl.delete_buffers(&[vao.ibo_id.0]);
+            self.gl.delete_buffers(&[vao.main_vbo_id.0]);
+        }
+
+        self.gl.delete_buffers(&[vao.instance_vbo_id.0])
+    }
+
+    pub fn allocate_vbo<V>(
+        &mut self,
+        vbo: &mut VBO<V>,
+        count: usize,
+        usage_hint: VertexUsageHint,
+    ) {
+        debug_assert!(self.inside_frame);
+        vbo.allocated_count = count;
+
+        self.gl.bind_buffer(vbo.target, vbo.id);
+        self.gl.buffer_data_untyped(
+            vbo.target,
+            (count * mem::size_of::<V>()) as _,
+            ptr::null(),
+            usage_hint.to_gl(),
+        );
+    }
+
+    pub fn fill_vbo<V>(
+        &mut self,
+        vbo: &VBO<V>,
+        data: &[V],
+        offset: usize,
+    ) {
+        debug_assert!(self.inside_frame);
+        assert!(offset + data.len() <= vbo.allocated_count);
+        let stride = mem::size_of::<V>();
+
+        self.gl.bind_buffer(vbo.target, vbo.id);
+        self.gl.buffer_sub_data_untyped(
+            vbo.target,
+            (offset * stride) as _,
+            (data.len() * stride) as _,
+            data.as_ptr() as _,
+        );
+    }
+
+    fn update_vbo_data<V>(
+        &mut self,
+        vbo: VBOId,
+        vertices: &[V],
+        usage_hint: VertexUsageHint,
+    ) {
+        debug_assert!(self.inside_frame);
+
+        vbo.bind(self.gl());
+        gl::buffer_data(self.gl(), gl::ARRAY_BUFFER, vertices, usage_hint.to_gl());
+    }
+
+    pub fn create_vao_with_new_instances(
+        &mut self,
+        descriptor: &VertexDescriptor,
+        base_vao: &VAO,
+    ) -> VAO {
+        debug_assert!(self.inside_frame);
+
+        let buffer_ids = self.gl.gen_buffers(1);
+        let intance_vbo_id = VBOId(buffer_ids[0]);
+
+        self.create_vao_with_vbos(
+            descriptor,
+            base_vao.main_vbo_id,
+            intance_vbo_id,
+            base_vao.instance_divisor,
+            base_vao.ibo_id,
+            false,
+        )
+    }
+
+    pub fn update_vao_main_vertices<V>(
+        &mut self,
+        vao: &VAO,
+        vertices: &[V],
+        usage_hint: VertexUsageHint,
+    ) {
+        debug_assert_eq!(self.bound_vao, vao.id);
+        self.update_vbo_data(vao.main_vbo_id, vertices, usage_hint)
+    }
+
+    pub fn update_vao_instances<V: Clone>(
+        &mut self,
+        vao: &VAO,
+        instances: &[V],
+        usage_hint: VertexUsageHint,
+        // if `Some(count)`, each instance is repeated `count` times
+        repeat: Option<NonZeroUsize>,
+    ) {
+        debug_assert_eq!(self.bound_vao, vao.id);
+        debug_assert_eq!(vao.instance_stride as usize, mem::size_of::<V>());
+
+        match repeat {
+            Some(count) => {
+                let target = gl::ARRAY_BUFFER;
+                self.gl.bind_buffer(target, vao.instance_vbo_id.0);
+                let size = instances.len() * count.get() * mem::size_of::<V>();
+                self.gl.buffer_data_untyped(
+                    target,
+                    size as _,
+                    ptr::null(),
+                    usage_hint.to_gl(),
+                );
+
+                let ptr = match self.gl.get_type() {
+                    gl::GlType::Gl => {
+                        self.gl.map_buffer(target, gl::WRITE_ONLY)
+                    }
+                    gl::GlType::Gles => {
+                        self.gl.map_buffer_range(target, 0, size as _, gl::MAP_WRITE_BIT)
+                    }
+                };
+                assert!(!ptr.is_null());
+
+                let buffer_slice = unsafe {
+                    slice::from_raw_parts_mut(ptr as *mut V, instances.len() * count.get())
+                };
+                for (quad, instance) in buffer_slice.chunks_mut(4).zip(instances) {
+                    quad[0] = instance.clone();
+                    quad[1] = instance.clone();
+                    quad[2] = instance.clone();
+                    quad[3] = instance.clone();
+                }
+                self.gl.unmap_buffer(target);
+            }
+            None => {
+                self.update_vbo_data(vao.instance_vbo_id, instances, usage_hint);
+            }
+        }
+
+        // On some devices the VAO must be manually unbound and rebound after an attached buffer has
+        // been orphaned. Failure to do so appeared to result in the orphaned buffer's contents
+        // being used for the subsequent draw call, rather than the new buffer's contents.
+        if self.capabilities.requires_vao_rebind_after_orphaning {
+            self.bind_vao_impl(0);
+            self.bind_vao_impl(vao.id);
+        }
+    }
+
+    pub fn update_vao_indices<I>(&mut self, vao: &VAO, indices: &[I], usage_hint: VertexUsageHint) {
+        debug_assert!(self.inside_frame);
+        debug_assert_eq!(self.bound_vao, vao.id);
+
+        vao.ibo_id.bind(self.gl());
+        gl::buffer_data(
+            self.gl(),
+            gl::ELEMENT_ARRAY_BUFFER,
+            indices,
+            usage_hint.to_gl(),
+        );
+    }
+
+    pub fn draw_triangles_u16(&mut self, first_vertex: i32, index_count: i32) {
+        debug_assert!(self.inside_frame);
+        #[cfg(debug_assertions)]
+        debug_assert!(self.shader_is_ready);
+
+        let _guard = if self.annotate_draw_call_crashes {
+            Some(CrashAnnotatorGuard::new(
+                &self.crash_annotator,
+                CrashAnnotation::DrawShader,
+                &self.bound_program_name,
+            ))
+        } else {
+            None
+        };
+
+        self.gl.draw_elements(
+            gl::TRIANGLES,
+            index_count,
+            gl::UNSIGNED_SHORT,
+            first_vertex as u32 * 2,
+        );
+    }
+
+    pub fn draw_triangles_u32(&mut self, first_vertex: i32, index_count: i32) {
+        debug_assert!(self.inside_frame);
+        #[cfg(debug_assertions)]
+        debug_assert!(self.shader_is_ready);
+
+        let _guard = if self.annotate_draw_call_crashes {
+            Some(CrashAnnotatorGuard::new(
+                &self.crash_annotator,
+                CrashAnnotation::DrawShader,
+                &self.bound_program_name,
+            ))
+        } else {
+            None
+        };
+
+        self.gl.draw_elements(
+            gl::TRIANGLES,
+            index_count,
+            gl::UNSIGNED_INT,
+            first_vertex as u32 * 4,
+        );
+    }
+
+    pub fn draw_nonindexed_points(&mut self, first_vertex: i32, vertex_count: i32) {
+        debug_assert!(self.inside_frame);
+        #[cfg(debug_assertions)]
+        debug_assert!(self.shader_is_ready);
+
+        let _guard = if self.annotate_draw_call_crashes {
+            Some(CrashAnnotatorGuard::new(
+                &self.crash_annotator,
+                CrashAnnotation::DrawShader,
+                &self.bound_program_name,
+            ))
+        } else {
+            None
+        };
+
+        self.gl.draw_arrays(gl::POINTS, first_vertex, vertex_count);
+    }
+
+    pub fn draw_nonindexed_lines(&mut self, first_vertex: i32, vertex_count: i32) {
+        debug_assert!(self.inside_frame);
+        #[cfg(debug_assertions)]
+        debug_assert!(self.shader_is_ready);
+
+        let _guard = if self.annotate_draw_call_crashes {
+            Some(CrashAnnotatorGuard::new(
+                &self.crash_annotator,
+                CrashAnnotation::DrawShader,
+                &self.bound_program_name,
+            ))
+        } else {
+            None
+        };
+
+        self.gl.draw_arrays(gl::LINES, first_vertex, vertex_count);
+    }
+
+    pub fn draw_indexed_triangles(&mut self, index_count: i32) {
+        debug_assert!(self.inside_frame);
+        #[cfg(debug_assertions)]
+        debug_assert!(self.shader_is_ready);
+
+        let _guard = if self.annotate_draw_call_crashes {
+            Some(CrashAnnotatorGuard::new(
+                &self.crash_annotator,
+                CrashAnnotation::DrawShader,
+                &self.bound_program_name,
+            ))
+        } else {
+            None
+        };
+
+        self.gl.draw_elements(
+            gl::TRIANGLES,
+            index_count,
+            gl::UNSIGNED_SHORT,
+            0,
+        );
+    }
+
+    pub fn draw_indexed_triangles_instanced_u16(&mut self, index_count: i32, instance_count: i32) {
+        debug_assert!(self.inside_frame);
+        #[cfg(debug_assertions)]
+        debug_assert!(self.shader_is_ready);
+
+        let _guard = if self.annotate_draw_call_crashes {
+            Some(CrashAnnotatorGuard::new(
+                &self.crash_annotator,
+                CrashAnnotation::DrawShader,
+                &self.bound_program_name,
+            ))
+        } else {
+            None
+        };
+
+        self.gl.draw_elements_instanced(
+            gl::TRIANGLES,
+            index_count,
+            gl::UNSIGNED_SHORT,
+            0,
+            instance_count,
+        );
+    }
+
+    pub fn end_frame(&mut self) {
+        self.reset_draw_target();
+        self.reset_read_target();
+
+        debug_assert!(self.inside_frame);
+        self.inside_frame = false;
+
+        self.gl.bind_texture(gl::TEXTURE_2D, 0);
+        self.gl.use_program(0);
+
+        for i in 0 .. self.bound_textures.len() {
+            self.gl.active_texture(gl::TEXTURE0 + i as gl::GLuint);
+            self.gl.bind_texture(gl::TEXTURE_2D, 0);
+        }
+
+        self.gl.active_texture(gl::TEXTURE0);
+
+        self.frame_id.0 += 1;
+
+        // Save any shaders compiled this frame to disk.
+        // If this is the tenth frame then treat startup as complete, meaning the
+        // current set of in-use shaders are the ones to load on the next startup.
+        if let Some(ref cache) = self.cached_programs {
+            cache.update_disk_cache(self.frame_id.0 == 10);
+        }
+    }
+
+    pub fn clear_target(
+        &self,
+        color: Option<[f32; 4]>,
+        depth: Option<f32>,
+        rect: Option<FramebufferIntRect>,
+    ) {
+        let mut clear_bits = 0;
+
+        if let Some(color) = color {
+            self.gl.clear_color(color[0], color[1], color[2], color[3]);
+            clear_bits |= gl::COLOR_BUFFER_BIT;
+        }
+
+        if let Some(depth) = depth {
+            if cfg!(debug_assertions) {
+                let mut mask = [0];
+                unsafe {
+                    self.gl.get_boolean_v(gl::DEPTH_WRITEMASK, &mut mask);
+                }
+                assert_ne!(mask[0], 0);
+            }
+            self.gl.clear_depth(depth as f64);
+            clear_bits |= gl::DEPTH_BUFFER_BIT;
+        }
+
+        if clear_bits != 0 {
+            match rect {
+                Some(rect) => {
+                    self.gl.enable(gl::SCISSOR_TEST);
+                    self.gl.scissor(
+                        rect.min.x,
+                        rect.min.y,
+                        rect.width(),
+                        rect.height(),
+                    );
+                    self.gl.clear(clear_bits);
+                    self.gl.disable(gl::SCISSOR_TEST);
+                }
+                None => {
+                    self.gl.clear(clear_bits);
+                }
+            }
+        }
+    }
+
+    pub fn enable_depth(&self, depth_func: DepthFunction) {
+        assert!(self.depth_available, "Enabling depth test without depth target");
+        self.gl.enable(gl::DEPTH_TEST);
+        self.gl.depth_func(depth_func as gl::GLuint);
+    }
+
+    pub fn disable_depth(&self) {
+        self.gl.disable(gl::DEPTH_TEST);
+    }
+
+    pub fn enable_depth_write(&self) {
+        assert!(self.depth_available, "Enabling depth write without depth target");
+        self.gl.depth_mask(true);
+    }
+
+    pub fn disable_depth_write(&self) {
+        self.gl.depth_mask(false);
+    }
+
+    pub fn disable_stencil(&self) {
+        self.gl.disable(gl::STENCIL_TEST);
+    }
+
+    pub fn set_scissor_rect(&self, rect: FramebufferIntRect) {
+        self.gl.scissor(
+            rect.min.x,
+            rect.min.y,
+            rect.width(),
+            rect.height(),
+        );
+    }
+
+    pub fn enable_scissor(&self) {
+        self.gl.enable(gl::SCISSOR_TEST);
+    }
+
+    pub fn disable_scissor(&self) {
+        self.gl.disable(gl::SCISSOR_TEST);
+    }
+
+    pub fn enable_color_write(&self) {
+        self.gl.color_mask(true, true, true, true);
+    }
+
+    pub fn disable_color_write(&self) {
+        self.gl.color_mask(false, false, false, false);
+    }
+
+    pub fn set_blend(&mut self, enable: bool) {
+        if enable {
+            self.gl.enable(gl::BLEND);
+        } else {
+            self.gl.disable(gl::BLEND);
+        }
+        #[cfg(debug_assertions)]
+        {
+            self.shader_is_ready = false;
+        }
+    }
+
+    fn set_blend_factors(
+        &mut self,
+        color: (gl::GLenum, gl::GLenum),
+        alpha: (gl::GLenum, gl::GLenum),
+    ) {
+        self.gl.blend_equation(gl::FUNC_ADD);
+        if color == alpha {
+            self.gl.blend_func(color.0, color.1);
+        } else {
+            self.gl.blend_func_separate(color.0, color.1, alpha.0, alpha.1);
+        }
+        #[cfg(debug_assertions)]
+        {
+            self.shader_is_ready = false;
+        }
+    }
+
+    pub fn set_blend_mode_alpha(&mut self) {
+        self.set_blend_factors(
+            (gl::SRC_ALPHA, gl::ONE_MINUS_SRC_ALPHA),
+            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
+        );
+    }
+
+    pub fn set_blend_mode_premultiplied_alpha(&mut self) {
+        self.set_blend_factors(
+            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
+            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
+        );
+    }
+
+    pub fn set_blend_mode_premultiplied_dest_out(&mut self) {
+        self.set_blend_factors(
+            (gl::ZERO, gl::ONE_MINUS_SRC_ALPHA),
+            (gl::ZERO, gl::ONE_MINUS_SRC_ALPHA),
+        );
+    }
+
+    pub fn set_blend_mode_multiply(&mut self) {
+        self.set_blend_factors(
+            (gl::ZERO, gl::SRC_COLOR),
+            (gl::ZERO, gl::SRC_ALPHA),
+        );
+    }
+    pub fn set_blend_mode_subpixel_pass0(&mut self) {
+        self.set_blend_factors(
+            (gl::ZERO, gl::ONE_MINUS_SRC_COLOR),
+            (gl::ZERO, gl::ONE_MINUS_SRC_ALPHA),
+        );
+    }
+    pub fn set_blend_mode_subpixel_pass1(&mut self) {
+        self.set_blend_factors(
+            (gl::ONE, gl::ONE),
+            (gl::ONE, gl::ONE),
+        );
+    }
+    pub fn set_blend_mode_subpixel_with_bg_color_pass0(&mut self) {
+        self.set_blend_factors(
+            (gl::ZERO, gl::ONE_MINUS_SRC_COLOR),
+            (gl::ZERO, gl::ONE),
+        );
+    }
+    pub fn set_blend_mode_subpixel_with_bg_color_pass1(&mut self) {
+        self.set_blend_factors(
+            (gl::ONE_MINUS_DST_ALPHA, gl::ONE),
+            (gl::ZERO, gl::ONE),
+        );
+    }
+    pub fn set_blend_mode_subpixel_with_bg_color_pass2(&mut self) {
+        self.set_blend_factors(
+            (gl::ONE, gl::ONE),
+            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
+        );
+    }
+    pub fn set_blend_mode_subpixel_dual_source(&mut self) {
+        self.set_blend_factors(
+            (gl::ONE, gl::ONE_MINUS_SRC1_COLOR),
+            (gl::ONE, gl::ONE_MINUS_SRC1_ALPHA),
+        );
+    }
+    pub fn set_blend_mode_multiply_dual_source(&mut self) {
+        self.set_blend_factors(
+            (gl::ONE_MINUS_DST_ALPHA, gl::ONE_MINUS_SRC1_COLOR),
+            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
+        );
+    }
+    pub fn set_blend_mode_screen(&mut self) {
+        self.set_blend_factors(
+            (gl::ONE, gl::ONE_MINUS_SRC_COLOR),
+            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
+        );
+    }
+    pub fn set_blend_mode_plus_lighter(&mut self) {
+        self.set_blend_factors(
+            (gl::ONE, gl::ONE),
+            (gl::ONE, gl::ONE),
+        );
+    }
+    pub fn set_blend_mode_exclusion(&mut self) {
+        self.set_blend_factors(
+            (gl::ONE_MINUS_DST_COLOR, gl::ONE_MINUS_SRC_COLOR),
+            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
+        );
+    }
+    pub fn set_blend_mode_show_overdraw(&mut self) {
+        self.set_blend_factors(
+            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
+            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
+        );
+    }
+
+    pub fn set_blend_mode_max(&mut self) {
+        self.gl
+            .blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE);
+        self.gl.blend_equation_separate(gl::MAX, gl::FUNC_ADD);
+        #[cfg(debug_assertions)]
+        {
+            self.shader_is_ready = false;
+        }
+    }
+    pub fn set_blend_mode_min(&mut self) {
+        self.gl
+            .blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE);
+        self.gl.blend_equation_separate(gl::MIN, gl::FUNC_ADD);
+        #[cfg(debug_assertions)]
+        {
+            self.shader_is_ready = false;
+        }
+    }
+    pub fn set_blend_mode_advanced(&mut self, mode: MixBlendMode) {
+        self.gl.blend_equation(match mode {
+            MixBlendMode::Normal => {
+                // blend factor only make sense for the normal mode
+                self.gl.blend_func_separate(gl::ZERO, gl::SRC_COLOR, gl::ZERO, gl::SRC_ALPHA);
+                gl::FUNC_ADD
+            },
+            MixBlendMode::PlusLighter => {
+                return self.set_blend_mode_plus_lighter();
+            },
+            MixBlendMode::Multiply => gl::MULTIPLY_KHR,
+            MixBlendMode::Screen => gl::SCREEN_KHR,
+            MixBlendMode::Overlay => gl::OVERLAY_KHR,
+            MixBlendMode::Darken => gl::DARKEN_KHR,
+            MixBlendMode::Lighten => gl::LIGHTEN_KHR,
+            MixBlendMode::ColorDodge => gl::COLORDODGE_KHR,
+            MixBlendMode::ColorBurn => gl::COLORBURN_KHR,
+            MixBlendMode::HardLight => gl::HARDLIGHT_KHR,
+            MixBlendMode::SoftLight => gl::SOFTLIGHT_KHR,
+            MixBlendMode::Difference => gl::DIFFERENCE_KHR,
+            MixBlendMode::Exclusion => gl::EXCLUSION_KHR,
+            MixBlendMode::Hue => gl::HSL_HUE_KHR,
+            MixBlendMode::Saturation => gl::HSL_SATURATION_KHR,
+            MixBlendMode::Color => gl::HSL_COLOR_KHR,
+            MixBlendMode::Luminosity => gl::HSL_LUMINOSITY_KHR,
+        });
+        #[cfg(debug_assertions)]
+        {
+            self.shader_is_ready = false;
+        }
+    }
+
+    pub fn supports_extension(&self, extension: &str) -> bool {
+        supports_extension(&self.extensions, extension)
+    }
+
+    pub fn echo_driver_messages(&self) {
+        if self.capabilities.supports_khr_debug {
+            Device::log_driver_messages(self.gl());
+        }
+    }
+
+    fn log_driver_messages(gl: &dyn gl::Gl) {
+        for msg in gl.get_debug_messages() {
+            let level = match msg.severity {
+                gl::DEBUG_SEVERITY_HIGH => Level::Error,
+                gl::DEBUG_SEVERITY_MEDIUM => Level::Warn,
+                gl::DEBUG_SEVERITY_LOW => Level::Info,
+                gl::DEBUG_SEVERITY_NOTIFICATION => Level::Debug,
+                _ => Level::Trace,
+            };
+            let ty = match msg.ty {
+                gl::DEBUG_TYPE_ERROR => "error",
+                gl::DEBUG_TYPE_DEPRECATED_BEHAVIOR => "deprecated",
+                gl::DEBUG_TYPE_UNDEFINED_BEHAVIOR => "undefined",
+                gl::DEBUG_TYPE_PORTABILITY => "portability",
+                gl::DEBUG_TYPE_PERFORMANCE => "perf",
+                gl::DEBUG_TYPE_MARKER => "marker",
+                gl::DEBUG_TYPE_PUSH_GROUP => "group push",
+                gl::DEBUG_TYPE_POP_GROUP => "group pop",
+                gl::DEBUG_TYPE_OTHER => "other",
+                _ => "?",
+            };
+            log!(level, "({}) {}", ty, msg.message);
+        }
+    }
+
+    pub fn gl_describe_format(&self, format: ImageFormat) -> FormatDesc {
+        match format {
+            ImageFormat::R8 => FormatDesc {
+                internal: gl::R8,
+                external: gl::RED,
+                read: gl::RED,
+                pixel_type: gl::UNSIGNED_BYTE,
+            },
+            ImageFormat::R16 => FormatDesc {
+                internal: gl::R16,
+                external: gl::RED,
+                read: gl::RED,
+                pixel_type: gl::UNSIGNED_SHORT,
+            },
+            ImageFormat::BGRA8 => {
+                FormatDesc {
+                    internal: self.bgra_formats.internal,
+                    external: self.bgra_formats.external,
+                    read: gl::BGRA,
+                    pixel_type: self.bgra_pixel_type,
+                }
+            },
+            ImageFormat::RGBA8 => {
+                FormatDesc {
+                    internal: gl::RGBA8,
+                    external: gl::RGBA,
+                    read: gl::RGBA,
+                    pixel_type: gl::UNSIGNED_BYTE,
+                }
+            },
+            ImageFormat::RGBAF32 => FormatDesc {
+                internal: gl::RGBA32F,
+                external: gl::RGBA,
+                read: gl::RGBA,
+                pixel_type: gl::FLOAT,
+            },
+            ImageFormat::RGBAI32 => FormatDesc {
+                internal: gl::RGBA32I,
+                external: gl::RGBA_INTEGER,
+                read: gl::RGBA_INTEGER,
+                pixel_type: gl::INT,
+            },
+            ImageFormat::RG8 => FormatDesc {
+                internal: gl::RG8,
+                external: gl::RG,
+                read: gl::RG,
+                pixel_type: gl::UNSIGNED_BYTE,
+            },
+            ImageFormat::RG16 => FormatDesc {
+                internal: gl::RG16,
+                external: gl::RG,
+                read: gl::RG,
+                pixel_type: gl::UNSIGNED_SHORT,
+            },
+        }
+    }
+
+    /// Generates a memory report for the resources managed by the device layer.
+    pub fn report_memory(&self, size_op_funs: &MallocSizeOfOps, swgl: *mut c_void) -> MemoryReport {
+        let mut report = MemoryReport::default();
+        report.depth_target_textures += self.depth_targets_memory();
+
+        #[cfg(feature = "sw_compositor")]
+        if !swgl.is_null() {
+            report.swgl += swgl::Context::from(swgl).report_memory(size_op_funs.size_of_op);
+        }
+        // unconditionally use swgl stuff
+        let _ = size_op_funs;
+        let _ = swgl;
+        report
+    }
+
+    pub fn depth_targets_memory(&self) -> usize {
+        let mut total = 0;
+        for dim in self.depth_targets.keys() {
+            total += depth_target_size_in_bytes(dim);
+        }
+
+        total
+    }
+}
+
+pub struct FormatDesc {
+    /// Format the texel data is internally stored in within a texture.
+    pub internal: gl::GLenum,
+    /// Format that we expect the data to be provided when filling the texture.
+    pub external: gl::GLuint,
+    /// Format to read the texels as, so that they can be uploaded as `external`
+    /// later on.
+    pub read: gl::GLuint,
+    /// Associated pixel type.
+    pub pixel_type: gl::GLuint,
+}
+
+#[derive(Debug)]
+struct UploadChunk<'a> {
+    rect: DeviceIntRect,
+    stride: Option<i32>,
+    offset: usize,
+    format_override: Option<ImageFormat>,
+    texture: &'a Texture,
+}
+
+#[derive(Debug)]
+struct PixelBuffer<'a> {
+    size_used: usize,
+    // small vector avoids heap allocation for a single chunk
+    chunks: SmallVec<[UploadChunk<'a>; 1]>,
+    inner: UploadPBO,
+    mapping: &'a mut [mem::MaybeUninit<u8>],
+}
+
+impl<'a> PixelBuffer<'a> {
+    fn new(
+        pbo: UploadPBO,
+    ) -> Self {
+        let mapping = unsafe {
+            slice::from_raw_parts_mut(pbo.mapping.get_ptr().as_ptr(), pbo.pbo.reserved_size)
+        };
+        Self {
+            size_used: 0,
+            chunks: SmallVec::new(),
+            inner: pbo,
+            mapping,
+        }
+    }
+
+    fn flush_chunks(&mut self, device: &mut Device) {
+        for chunk in self.chunks.drain(..) {
+            TextureUploader::update_impl(device, chunk);
+        }
+    }
+}
+
+impl<'a> Drop for PixelBuffer<'a> {
+    fn drop(&mut self) {
+        assert_eq!(self.chunks.len(), 0, "PixelBuffer must be flushed before dropping.");
+    }
+}
+
+#[derive(Debug)]
+enum PBOMapping {
+    Unmapped,
+    Transient(ptr::NonNull<mem::MaybeUninit<u8>>),
+    Persistent(ptr::NonNull<mem::MaybeUninit<u8>>),
+}
+
+impl PBOMapping {
+    fn get_ptr(&self) -> ptr::NonNull<mem::MaybeUninit<u8>> {
+        match self {
+            PBOMapping::Unmapped => unreachable!("Cannot get pointer to unmapped PBO."),
+            PBOMapping::Transient(ptr) => *ptr,
+            PBOMapping::Persistent(ptr) => *ptr,
+        }
+    }
+}
+
+/// A PBO for uploading texture data, managed by UploadPBOPool.
+#[derive(Debug)]
+struct UploadPBO {
+    pbo: PBO,
+    mapping: PBOMapping,
+    can_recycle: bool,
+}
+
+impl UploadPBO {
+    fn empty() -> Self {
+        Self {
+            pbo: PBO {
+                id: 0,
+                reserved_size: 0,
+            },
+            mapping: PBOMapping::Unmapped,
+            can_recycle: false,
+        }
+    }
+}
+
+/// Allocates and recycles PBOs used for uploading texture data.
+/// Tries to allocate and recycle PBOs of a fixed size, but will make exceptions when
+/// a larger buffer is required or to work around driver bugs.
+pub struct UploadPBOPool {
+    /// Usage hint to provide to the driver for optimizations.
+    usage_hint: VertexUsageHint,
+    /// The preferred size, in bytes, of the buffers to allocate.
+    default_size: usize,
+    /// List of allocated PBOs ready to be re-used.
+    available_buffers: Vec<UploadPBO>,
+    /// PBOs which have been returned during the current frame,
+    /// and do not yet have an associated sync object.
+    returned_buffers: Vec<UploadPBO>,
+    /// PBOs which are waiting until their sync object is signalled,
+    /// indicating they can are ready to be re-used.
+    waiting_buffers: Vec<(gl::GLsync, Vec<UploadPBO>)>,
+    /// PBOs which have been orphaned.
+    /// We can recycle their IDs but must reallocate their storage.
+    orphaned_buffers: Vec<PBO>,
+}
+
+impl UploadPBOPool {
+    pub fn new(device: &mut Device, default_size: usize) -> Self {
+        let usage_hint = match device.upload_method {
+            UploadMethod::Immediate => VertexUsageHint::Stream,
+            UploadMethod::PixelBuffer(usage_hint) => usage_hint,
+        };
+        Self {
+            usage_hint,
+            default_size,
+            available_buffers: Vec::new(),
+            returned_buffers: Vec::new(),
+            waiting_buffers: Vec::new(),
+            orphaned_buffers: Vec::new(),
+        }
+    }
+
+    /// To be called at the beginning of a series of uploads.
+    /// Moves any buffers which are now ready to be used from the waiting list to the ready list.
+    pub fn begin_frame(&mut self, device: &mut Device) {
+        // Iterate through the waiting buffers and check if each fence has been signalled.
+        // If a fence is signalled, move its corresponding buffers to the available list.
+        // On error, delete the buffers. Stop when we find the first non-signalled fence,
+        // and clean up the signalled fences.
+        let mut first_not_signalled = self.waiting_buffers.len();
+        for (i, (sync, buffers)) in self.waiting_buffers.iter_mut().enumerate() {
+            match device.gl.client_wait_sync(*sync, 0, 0) {
+                gl::TIMEOUT_EXPIRED => {
+                    first_not_signalled = i;
+                    break;
+                },
+                gl::ALREADY_SIGNALED | gl::CONDITION_SATISFIED => {
+                    self.available_buffers.extend(buffers.drain(..));
+                }
+                gl::WAIT_FAILED | _ => {
+                    warn!("glClientWaitSync error in UploadPBOPool::begin_frame()");
+                    for buffer in buffers.drain(..) {
+                        device.delete_pbo(buffer.pbo);
+                    }
+                }
+            }
+        }
+
+        // Delete signalled fences, and remove their now-empty Vecs from waiting_buffers.
+        for (sync, _) in self.waiting_buffers.drain(0..first_not_signalled) {
+            device.gl.delete_sync(sync);
+        }
+    }
+
+    // To be called at the end of a series of uploads.
+    // Creates a sync object, and adds the buffers returned during this frame to waiting_buffers.
+    pub fn end_frame(&mut self, device: &mut Device) {
+        if !self.returned_buffers.is_empty() {
+            let sync = device.gl.fence_sync(gl::SYNC_GPU_COMMANDS_COMPLETE, 0);
+            if !sync.is_null() {
+                self.waiting_buffers.push((sync, mem::replace(&mut self.returned_buffers, Vec::new())))
+            } else {
+                warn!("glFenceSync error in UploadPBOPool::end_frame()");
+
+                for buffer in self.returned_buffers.drain(..) {
+                    device.delete_pbo(buffer.pbo);
+                }
+            }
+        }
+    }
+
+    /// Obtain a PBO, either by reusing an existing PBO or allocating a new one.
+    /// min_size specifies the minimum required size of the PBO. The returned PBO
+    /// may be larger than required.
+    fn get_pbo(&mut self, device: &mut Device, min_size: usize) -> Result<UploadPBO, String> {
+
+        // If min_size is smaller than our default size, then use the default size.
+        // The exception to this is when due to driver bugs we cannot upload from
+        // offsets other than zero within a PBO. In this case, there is no point in
+        // allocating buffers larger than required, as they cannot be shared.
+        let (can_recycle, size) = if min_size <= self.default_size && device.capabilities.supports_nonzero_pbo_offsets {
+            (true, self.default_size)
+        } else {
+            (false, min_size)
+        };
+
+        // Try to recycle an already allocated PBO.
+        if can_recycle {
+            if let Some(mut buffer) = self.available_buffers.pop() {
+                assert_eq!(buffer.pbo.reserved_size, size);
+                assert!(buffer.can_recycle);
+
+                device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, buffer.pbo.id);
+
+                match buffer.mapping {
+                    PBOMapping::Unmapped => {
+                        // If buffer was unmapped then transiently map it.
+                        let ptr = device.gl.map_buffer_range(
+                            gl::PIXEL_UNPACK_BUFFER,
+                            0,
+                            buffer.pbo.reserved_size as _,
+                            gl::MAP_WRITE_BIT | gl::MAP_UNSYNCHRONIZED_BIT,
+                        ) as *mut _;
+
+                        let ptr = ptr::NonNull::new(ptr).ok_or_else(
+                            || format!("Failed to transiently map PBO of size {} bytes", buffer.pbo.reserved_size)
+                        )?;
+
+                        buffer.mapping = PBOMapping::Transient(ptr);
+                    }
+                    PBOMapping::Transient(_) => {
+                        unreachable!("Transiently mapped UploadPBO must be unmapped before returning to pool.");
+                    }
+                    PBOMapping::Persistent(_) => {
+                    }
+                }
+
+                return Ok(buffer);
+            }
+        }
+
+        // Try to recycle a PBO ID (but not its allocation) from a previously allocated PBO.
+        // If there are none available, create a new PBO.
+        let mut pbo = match self.orphaned_buffers.pop() {
+            Some(pbo) => pbo,
+            None => device.create_pbo(),
+        };
+
+        assert_eq!(pbo.reserved_size, 0);
+        pbo.reserved_size = size;
+
+        device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, pbo.id);
+        let mapping = if device.capabilities.supports_buffer_storage && can_recycle {
+            device.gl.buffer_storage(
+                gl::PIXEL_UNPACK_BUFFER,
+                pbo.reserved_size as _,
+                ptr::null(),
+                gl::MAP_WRITE_BIT | gl::MAP_PERSISTENT_BIT,
+            );
+            let ptr = device.gl.map_buffer_range(
+                gl::PIXEL_UNPACK_BUFFER,
+                0,
+                pbo.reserved_size as _,
+                // GL_MAP_COHERENT_BIT doesn't seem to work on Adreno, so use glFlushMappedBufferRange.
+                // kvark notes that coherent memory can be faster on some platforms, such as nvidia,
+                // so in the future we could choose which to use at run time.
+                gl::MAP_WRITE_BIT | gl::MAP_PERSISTENT_BIT | gl::MAP_FLUSH_EXPLICIT_BIT,
+            ) as *mut _;
+
+            let ptr = ptr::NonNull::new(ptr).ok_or_else(
+                || format!("Failed to transiently map PBO of size {} bytes", pbo.reserved_size)
+            )?;
+
+            PBOMapping::Persistent(ptr)
+        } else {
+            device.gl.buffer_data_untyped(
+                gl::PIXEL_UNPACK_BUFFER,
+                pbo.reserved_size as _,
+                ptr::null(),
+                self.usage_hint.to_gl(),
+            );
+            let ptr = device.gl.map_buffer_range(
+                gl::PIXEL_UNPACK_BUFFER,
+                0,
+                pbo.reserved_size as _,
+                // Unlike the above code path, where we are re-mapping a buffer that has previously been unmapped,
+                // this buffer has just been created there is no need for GL_MAP_UNSYNCHRONIZED_BIT.
+                gl::MAP_WRITE_BIT,
+            ) as *mut _;
+
+            let ptr = ptr::NonNull::new(ptr).ok_or_else(
+                || format!("Failed to transiently map PBO of size {} bytes", pbo.reserved_size)
+            )?;
+
+            PBOMapping::Transient(ptr)
+        };
+
+        Ok(UploadPBO { pbo, mapping, can_recycle })
+    }
+
+    /// Returns a PBO to the pool. If the PBO is recyclable it is placed in the waiting list.
+    /// Otherwise we orphan the allocation immediately, and will subsequently reuse just the ID.
+    fn return_pbo(&mut self, device: &mut Device, mut buffer: UploadPBO) {
+        assert!(
+            !matches!(buffer.mapping, PBOMapping::Transient(_)),
+            "Transiently mapped UploadPBO must be unmapped before returning to pool.",
+        );
+
+        if buffer.can_recycle {
+            self.returned_buffers.push(buffer);
+        } else {
+            device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, buffer.pbo.id);
+            device.gl.buffer_data_untyped(
+                gl::PIXEL_UNPACK_BUFFER,
+                0,
+                ptr::null(),
+                gl::STREAM_DRAW,
+            );
+            buffer.pbo.reserved_size = 0;
+            self.orphaned_buffers.push(buffer.pbo);
+        }
+
+        device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
+    }
+
+    /// Frees all allocated buffers in response to a memory pressure event.
+    pub fn on_memory_pressure(&mut self, device: &mut Device) {
+        for buffer in self.available_buffers.drain(..) {
+            device.delete_pbo(buffer.pbo);
+        }
+        for buffer in self.returned_buffers.drain(..) {
+            device.delete_pbo(buffer.pbo)
+        }
+        for (sync, buffers) in self.waiting_buffers.drain(..) {
+            device.gl.delete_sync(sync);
+            for buffer in buffers {
+                device.delete_pbo(buffer.pbo)
+            }
+        }
+        // There is no need to delete orphaned PBOs on memory pressure.
+    }
+
+    /// Generates a memory report.
+    pub fn report_memory(&self) -> MemoryReport {
+        let mut report = MemoryReport::default();
+        for buffer in &self.available_buffers {
+            report.texture_upload_pbos += buffer.pbo.reserved_size;
+        }
+        for buffer in &self.returned_buffers {
+            report.texture_upload_pbos += buffer.pbo.reserved_size;
+        }
+        for (_, buffers) in &self.waiting_buffers {
+            for buffer in buffers {
+                report.texture_upload_pbos += buffer.pbo.reserved_size;
+            }
+        }
+        report
+    }
+
+    pub fn deinit(&mut self, device: &mut Device) {
+        for buffer in self.available_buffers.drain(..) {
+            device.delete_pbo(buffer.pbo);
+        }
+        for buffer in self.returned_buffers.drain(..) {
+            device.delete_pbo(buffer.pbo)
+        }
+        for (sync, buffers) in self.waiting_buffers.drain(..) {
+            device.gl.delete_sync(sync);
+            for buffer in buffers {
+                device.delete_pbo(buffer.pbo)
+            }
+        }
+        for pbo in self.orphaned_buffers.drain(..) {
+            device.delete_pbo(pbo);
+        }
+    }
+}
+
+/// Used to perform a series of texture uploads.
+/// Create using Device::upload_texture(). Perform a series of uploads using either
+/// upload(), or stage() and upload_staged(), then call flush().
+pub struct TextureUploader<'a> {
+    /// A list of buffers containing uploads that need to be flushed.
+    buffers: Vec<PixelBuffer<'a>>,
+    /// Pool used to obtain PBOs to fill with texture data.
+    pub pbo_pool: &'a mut UploadPBOPool,
+}
+
+impl<'a> Drop for TextureUploader<'a> {
+    fn drop(&mut self) {
+        assert!(
+            thread::panicking() || self.buffers.is_empty(),
+            "TextureUploader must be flushed before it is dropped."
+        );
+    }
+}
+
+/// A buffer used to manually stage data to be uploaded to a texture.
+/// Created by calling TextureUploader::stage(), the data can then be written to via get_mapping().
+#[derive(Debug)]
+pub struct UploadStagingBuffer<'a> {
+    /// The PixelBuffer containing this upload.
+    buffer: PixelBuffer<'a>,
+    /// The offset of this upload within the PixelBuffer.
+    offset: usize,
+    /// The size of this upload.
+    size: usize,
+    /// The stride of the data within the buffer.
+    stride: usize,
+}
+
+impl<'a> UploadStagingBuffer<'a> {
+    /// Returns the required stride of the data to be written to the buffer.
+    pub fn get_stride(&self) -> usize {
+        self.stride
+    }
+
+    /// Returns a mapping of the data in the buffer, to be written to.
+    pub fn get_mapping(&mut self) -> &mut [mem::MaybeUninit<u8>] {
+        &mut self.buffer.mapping[self.offset..self.offset + self.size]
+    }
+}
+
+impl<'a> TextureUploader<'a> {
+    /// Returns an UploadStagingBuffer which can be used to manually stage data to be uploaded.
+    /// Once the data has been staged, it can be uploaded with upload_staged().
+    pub fn stage(
+        &mut self,
+        device: &mut Device,
+        format: ImageFormat,
+        size: DeviceIntSize,
+    ) -> Result<UploadStagingBuffer<'a>, String> {
+        assert!(matches!(device.upload_method, UploadMethod::PixelBuffer(_)), "Texture uploads should only be staged when using pixel buffers.");
+
+        // for optimal PBO texture uploads the offset and stride of the data in
+        // the buffer may have to be a multiple of a certain value.
+        let (dst_size, dst_stride) = device.required_upload_size_and_stride(
+            size,
+            format,
+        );
+
+        // Find a pixel buffer with enough space remaining, creating a new one if required.
+        let buffer_index = self.buffers.iter().position(|buffer| {
+            buffer.size_used + dst_size <= buffer.inner.pbo.reserved_size
+        });
+        let buffer = match buffer_index {
+            Some(i) => self.buffers.swap_remove(i),
+            None => PixelBuffer::new(self.pbo_pool.get_pbo(device, dst_size)?),
+        };
+
+        if !device.capabilities.supports_nonzero_pbo_offsets {
+            assert_eq!(buffer.size_used, 0, "PBO uploads from non-zero offset are not supported.");
+        }
+        assert!(buffer.size_used + dst_size <= buffer.inner.pbo.reserved_size, "PixelBuffer is too small");
+
+        let offset = buffer.size_used;
+
+        Ok(UploadStagingBuffer {
+            buffer,
+            offset,
+            size: dst_size,
+            stride: dst_stride,
+        })
+    }
+
+    /// Uploads manually staged texture data to the specified texture.
+    pub fn upload_staged(
+        &mut self,
+        device: &mut Device,
+        texture: &'a Texture,
+        rect: DeviceIntRect,
+        format_override: Option<ImageFormat>,
+        mut staging_buffer: UploadStagingBuffer<'a>,
+    ) -> usize {
+        let size = staging_buffer.size;
+
+        staging_buffer.buffer.chunks.push(UploadChunk {
+            rect,
+            stride: Some(staging_buffer.stride as i32),
+            offset: staging_buffer.offset,
+            format_override,
+            texture,
+        });
+        staging_buffer.buffer.size_used += staging_buffer.size;
+
+        // Flush the buffer if it is full, otherwise return it to the uploader for further use.
+        if staging_buffer.buffer.size_used < staging_buffer.buffer.inner.pbo.reserved_size {
+            self.buffers.push(staging_buffer.buffer);
+        } else {
+            Self::flush_buffer(device, self.pbo_pool, staging_buffer.buffer);
+        }
+
+        size
+    }
+
+    /// Uploads texture data to the specified texture.
+    pub fn upload<T>(
+        &mut self,
+        device: &mut Device,
+        texture: &'a Texture,
+        mut rect: DeviceIntRect,
+        stride: Option<i32>,
+        format_override: Option<ImageFormat>,
+        data: *const T,
+        len: usize,
+    ) -> usize {
+        // Textures dimensions may have been clamped by the hardware. Crop the
+        // upload region to match.
+        let cropped = rect.intersection(
+            &DeviceIntRect::from_size(texture.get_dimensions())
+        );
+        if cfg!(debug_assertions) && cropped.map_or(true, |r| r != rect) {
+            warn!("Cropping texture upload {:?} to {:?}", rect, cropped);
+        }
+        rect = match cropped {
+            None => return 0,
+            Some(r) => r,
+        };
+
+        let bytes_pp = texture.format.bytes_per_pixel() as usize;
+        let width_bytes = rect.width() as usize * bytes_pp;
+
+        let src_stride = stride.map_or(width_bytes, |stride| {
+            assert!(stride >= 0);
+            stride as usize
+        });
+        let src_size = (rect.height() as usize - 1) * src_stride + width_bytes;
+        assert!(src_size <= len * mem::size_of::<T>());
+
+        match device.upload_method {
+            UploadMethod::Immediate => {
+                if cfg!(debug_assertions) {
+                    let mut bound_buffer = [0];
+                    unsafe {
+                        device.gl.get_integer_v(gl::PIXEL_UNPACK_BUFFER_BINDING, &mut bound_buffer);
+                    }
+                    assert_eq!(bound_buffer[0], 0, "GL_PIXEL_UNPACK_BUFFER must not be bound for immediate uploads.");
+                }
+
+                Self::update_impl(device, UploadChunk {
+                    rect,
+                    stride: Some(src_stride as i32),
+                    offset: data as _,
+                    format_override,
+                    texture,
+                });
+
+                width_bytes * rect.height() as usize
+            }
+            UploadMethod::PixelBuffer(_) => {
+                let mut staging_buffer = match self.stage(device, texture.format, rect.size()) {
+                    Ok(staging_buffer) => staging_buffer,
+                    Err(_) => return 0,
+                };
+                let dst_stride = staging_buffer.get_stride();
+
+                unsafe {
+                    let src: &[mem::MaybeUninit<u8>] = slice::from_raw_parts(data as *const _, src_size);
+
+                    if src_stride == dst_stride {
+                        // the stride is already optimal, so simply copy
+                        // the data as-is in to the buffer
+                        staging_buffer.get_mapping()[..src_size].copy_from_slice(src);
+                    } else {
+                        // copy the data line-by-line in to the buffer so
+                        // that it has an optimal stride
+                        for y in 0..rect.height() as usize {
+                            let src_start = y * src_stride;
+                            let src_end = src_start + width_bytes;
+                            let dst_start = y * staging_buffer.get_stride();
+                            let dst_end = dst_start + width_bytes;
+
+                            staging_buffer.get_mapping()[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
+                        }
+                    }
+                }
+
+                self.upload_staged(device, texture, rect, format_override, staging_buffer)
+            }
+        }
+    }
+
+    fn flush_buffer(device: &mut Device, pbo_pool: &mut UploadPBOPool, mut buffer: PixelBuffer) {
+        device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, buffer.inner.pbo.id);
+        match buffer.inner.mapping {
+            PBOMapping::Unmapped => unreachable!("UploadPBO should be mapped at this stage."),
+            PBOMapping::Transient(_) => {
+                device.gl.unmap_buffer(gl::PIXEL_UNPACK_BUFFER);
+                buffer.inner.mapping = PBOMapping::Unmapped;
+            }
+            PBOMapping::Persistent(_) => {
+                device.gl.flush_mapped_buffer_range(gl::PIXEL_UNPACK_BUFFER, 0, buffer.size_used as _);
+            }
+        }
+        buffer.flush_chunks(device);
+        let pbo = mem::replace(&mut buffer.inner, UploadPBO::empty());
+        pbo_pool.return_pbo(device, pbo);
+    }
+
+    /// Flushes all pending texture uploads. Must be called after all
+    /// required upload() or upload_staged() calls have been made.
+    pub fn flush(mut self, device: &mut Device) {
+        for buffer in self.buffers.drain(..) {
+            Self::flush_buffer(device, self.pbo_pool, buffer);
+        }
+
+        device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
+    }
+
+    fn update_impl(device: &mut Device, chunk: UploadChunk) {
+        device.bind_texture(DEFAULT_TEXTURE, chunk.texture, Swizzle::default());
+
+        let format = chunk.format_override.unwrap_or(chunk.texture.format);
+        let (gl_format, bpp, data_type) = match format {
+            ImageFormat::R8 => (gl::RED, 1, gl::UNSIGNED_BYTE),
+            ImageFormat::R16 => (gl::RED, 2, gl::UNSIGNED_SHORT),
+            ImageFormat::BGRA8 => (device.bgra_formats.external, 4, device.bgra_pixel_type),
+            ImageFormat::RGBA8 => (gl::RGBA, 4, gl::UNSIGNED_BYTE),
+            ImageFormat::RG8 => (gl::RG, 2, gl::UNSIGNED_BYTE),
+            ImageFormat::RG16 => (gl::RG, 4, gl::UNSIGNED_SHORT),
+            ImageFormat::RGBAF32 => (gl::RGBA, 16, gl::FLOAT),
+            ImageFormat::RGBAI32 => (gl::RGBA_INTEGER, 16, gl::INT),
+        };
+
+        let row_length = match chunk.stride {
+            Some(value) => value / bpp,
+            None => chunk.texture.size.width,
+        };
+
+        if chunk.stride.is_some() {
+            device.gl.pixel_store_i(
+                gl::UNPACK_ROW_LENGTH,
+                row_length as _,
+            );
+        }
+
+        let pos = chunk.rect.min;
+        let size = chunk.rect.size();
+
+        match chunk.texture.target {
+            gl::TEXTURE_2D | gl::TEXTURE_RECTANGLE | gl::TEXTURE_EXTERNAL_OES => {
+                device.gl.tex_sub_image_2d_pbo(
+                    chunk.texture.target,
+                    0,
+                    pos.x as _,
+                    pos.y as _,
+                    size.width as _,
+                    size.height as _,
+                    gl_format,
+                    data_type,
+                    chunk.offset,
+                );
+            }
+            _ => panic!("BUG: Unexpected texture target!"),
+        }
+
+        // If using tri-linear filtering, build the mip-map chain for this texture.
+        if chunk.texture.filter == TextureFilter::Trilinear {
+            device.gl.generate_mipmap(chunk.texture.target);
+        }
+
+        // Reset row length to 0, otherwise the stride would apply to all texture uploads.
+        if chunk.stride.is_some() {
+            device.gl.pixel_store_i(gl::UNPACK_ROW_LENGTH, 0 as _);
+        }
+    }
+}
+
+fn texels_to_u8_slice<T: Texel>(texels: &[T]) -> &[u8] {
+    unsafe {
+        slice::from_raw_parts(texels.as_ptr() as *const u8, texels.len() * mem::size_of::<T>())
+    }
+}
diff --git a/gfx/wr/webrender/src/device/mod.rs b/gfx/wr/webrender/src/device/mod.rs
new file mode 100644
index 0000000000..21684dea3e
--- /dev/null
+++ b/gfx/wr/webrender/src/device/mod.rs
@@ -0,0 +1,9 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+mod gl;
+pub mod query_gl;
+
+pub use self::gl::*;
+pub use self::query_gl as query;
diff --git a/gfx/wr/webrender/src/device/query_gl.rs b/gfx/wr/webrender/src/device/query_gl.rs
new file mode 100644
index 0000000000..c7fd9a9070
--- /dev/null
+++ b/gfx/wr/webrender/src/device/query_gl.rs
@@ -0,0 +1,318 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use gleam::gl;
+use std::mem;
+use std::rc::Rc;
+
+use crate::device::GpuFrameId;
+use crate::profiler::GpuProfileTag;
+
+#[derive(Copy, Clone, Debug)]
+pub enum GpuDebugMethod {
+    None,
+    MarkerEXT,
+    KHR,
+}
+
+#[derive(Debug, Clone)]
+pub struct GpuTimer {
+    pub tag: GpuProfileTag,
+    pub time_ns: u64,
+}
+
+#[derive(Debug, Clone)]
+pub struct GpuSampler {
+    pub tag: GpuProfileTag,
+    pub count: u64,
+}
+
+pub struct QuerySet<T> {
+    set: Vec<gl::GLuint>,
+    data: Vec<T>,
+    pending: gl::GLuint,
+}
+
+impl<T> QuerySet<T> {
+    fn new() -> Self {
+        QuerySet {
+            set: Vec::new(),
+            data: Vec::new(),
+            pending: 0,
+        }
+    }
+
+    fn reset(&mut self) {
+        self.data.clear();
+        self.pending = 0;
+    }
+
+    fn add(&mut self, value: T) -> Option<gl::GLuint> {
+        assert_eq!(self.pending, 0);
+        self.set.get(self.data.len()).cloned().map(|query_id| {
+            self.data.push(value);
+            self.pending = query_id;
+            query_id
+        })
+    }
+
+    fn take<F: Fn(&mut T, gl::GLuint)>(&mut self, fun: F) -> Vec<T> {
+        let mut data = mem::replace(&mut self.data, Vec::new());
+        for (value, &query) in data.iter_mut().zip(self.set.iter()) {
+            fun(value, query)
+        }
+        data
+    }
+}
+
+pub struct GpuFrameProfile {
+    gl: Rc<dyn gl::Gl>,
+    timers: QuerySet<GpuTimer>,
+    samplers: QuerySet<GpuSampler>,
+    frame_id: GpuFrameId,
+    inside_frame: bool,
+    debug_method: GpuDebugMethod,
+}
+
+impl GpuFrameProfile {
+    fn new(gl: Rc<dyn gl::Gl>, debug_method: GpuDebugMethod) -> Self {
+        GpuFrameProfile {
+            gl,
+            timers: QuerySet::new(),
+            samplers: QuerySet::new(),
+            frame_id: GpuFrameId::new(0),
+            inside_frame: false,
+            debug_method
+        }
+    }
+
+    fn enable_timers(&mut self, count: i32) {
+        self.timers.set = self.gl.gen_queries(count);
+    }
+
+    fn disable_timers(&mut self) {
+        if !self.timers.set.is_empty() {
+            self.gl.delete_queries(&self.timers.set);
+        }
+        self.timers.set = Vec::new();
+    }
+
+    fn enable_samplers(&mut self, count: i32) {
+        self.samplers.set = self.gl.gen_queries(count);
+    }
+
+    fn disable_samplers(&mut self) {
+        if !self.samplers.set.is_empty() {
+            self.gl.delete_queries(&self.samplers.set);
+        }
+        self.samplers.set = Vec::new();
+    }
+
+    fn begin_frame(&mut self, frame_id: GpuFrameId) {
+        self.frame_id = frame_id;
+        self.timers.reset();
+        self.samplers.reset();
+        self.inside_frame = true;
+    }
+
+    fn end_frame(&mut self) {
+        self.finish_timer();
+        self.finish_sampler();
+        self.inside_frame = false;
+    }
+
+    fn finish_timer(&mut self) {
+        debug_assert!(self.inside_frame);
+        if self.timers.pending != 0 {
+            self.gl.end_query(gl::TIME_ELAPSED);
+            self.timers.pending = 0;
+        }
+    }
+
+    fn finish_sampler(&mut self) {
+        debug_assert!(self.inside_frame);
+        if self.samplers.pending != 0 {
+            self.gl.end_query(gl::SAMPLES_PASSED);
+            self.samplers.pending = 0;
+        }
+    }
+
+    fn start_timer(&mut self, tag: GpuProfileTag) -> GpuTimeQuery {
+        self.finish_timer();
+
+        let marker = GpuMarker::new(&self.gl, tag.label, self.debug_method);
+
+        if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) {
+            self.gl.begin_query(gl::TIME_ELAPSED, query);
+        }
+
+        GpuTimeQuery(marker)
+    }
+
+    fn start_sampler(&mut self, tag: GpuProfileTag) -> GpuSampleQuery {
+        self.finish_sampler();
+
+        if let Some(query) = self.samplers.add(GpuSampler { tag, count: 0 }) {
+            self.gl.begin_query(gl::SAMPLES_PASSED, query);
+        }
+
+        GpuSampleQuery
+    }
+
+    fn build_samples(&mut self) -> (GpuFrameId, Vec<GpuTimer>, Vec<GpuSampler>) {
+        debug_assert!(!self.inside_frame);
+        let gl = &self.gl;
+
+        (
+            self.frame_id,
+            self.timers.take(|timer, query| {
+                timer.time_ns = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
+            }),
+            self.samplers.take(|sampler, query| {
+                sampler.count = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
+            }),
+        )
+    }
+}
+
+impl Drop for GpuFrameProfile {
+    fn drop(&mut self) {
+        self.disable_timers();
+        self.disable_samplers();
+    }
+}
+
+const NUM_PROFILE_FRAMES: usize = 4;
+
+pub struct GpuProfiler {
+    gl: Rc<dyn gl::Gl>,
+    frames: [GpuFrameProfile; NUM_PROFILE_FRAMES],
+    next_frame: usize,
+    debug_method: GpuDebugMethod
+}
+
+impl GpuProfiler {
+    pub fn new(gl: Rc<dyn gl::Gl>, debug_method: GpuDebugMethod) -> Self {
+        let f = || GpuFrameProfile::new(Rc::clone(&gl), debug_method);
+
+        let frames = [f(), f(), f(), f()];
+        GpuProfiler {
+            gl,
+            next_frame: 0,
+            frames,
+            debug_method
+        }
+    }
+
+    pub fn enable_timers(&mut self) {
+        const MAX_TIMERS_PER_FRAME: i32 = 256;
+
+        for frame in &mut self.frames {
+            frame.enable_timers(MAX_TIMERS_PER_FRAME);
+        }
+    }
+
+    pub fn disable_timers(&mut self) {
+        for frame in &mut self.frames {
+            frame.disable_timers();
+        }
+    }
+
+    pub fn enable_samplers(&mut self) {
+        const MAX_SAMPLERS_PER_FRAME: i32 = 16;
+        if cfg!(target_os = "macos") {
+            warn!("Expect macOS driver bugs related to sample queries")
+        }
+
+        for frame in &mut self.frames {
+            frame.enable_samplers(MAX_SAMPLERS_PER_FRAME);
+        }
+    }
+
+    pub fn disable_samplers(&mut self) {
+        for frame in &mut self.frames {
+            frame.disable_samplers();
+        }
+    }
+
+    pub fn build_samples(&mut self) -> (GpuFrameId, Vec<GpuTimer>, Vec<GpuSampler>) {
+        self.frames[self.next_frame].build_samples()
+    }
+
+    pub fn begin_frame(&mut self, frame_id: GpuFrameId) {
+        self.frames[self.next_frame].begin_frame(frame_id);
+    }
+
+    pub fn end_frame(&mut self) {
+        self.frames[self.next_frame].end_frame();
+        self.next_frame = (self.next_frame + 1) % self.frames.len();
+    }
+
+    pub fn start_timer(&mut self, tag: GpuProfileTag) -> GpuTimeQuery {
+        self.frames[self.next_frame].start_timer(tag)
+    }
+
+    pub fn start_sampler(&mut self, tag: GpuProfileTag) -> GpuSampleQuery {
+        self.frames[self.next_frame].start_sampler(tag)
+    }
+
+    pub fn finish_sampler(&mut self, _sampler: GpuSampleQuery) {
+        self.frames[self.next_frame].finish_sampler()
+    }
+
+    pub fn start_marker(&mut self, label: &str) -> GpuMarker {
+        GpuMarker::new(&self.gl, label, self.debug_method)
+    }
+
+    pub fn place_marker(&mut self, label: &str) {
+        GpuMarker::fire(&self.gl, label, self.debug_method)
+    }
+}
+
+#[must_use]
+pub struct GpuMarker {
+    gl: Option<(Rc<dyn gl::Gl>, GpuDebugMethod)>,
+}
+
+impl GpuMarker {
+    fn new(gl: &Rc<dyn gl::Gl>, message: &str, debug_method: GpuDebugMethod) -> Self {
+        let gl = match debug_method {
+            GpuDebugMethod::KHR => {
+              gl.push_debug_group_khr(gl::DEBUG_SOURCE_APPLICATION, 0, message);
+              Some((Rc::clone(gl), debug_method))
+            },
+            GpuDebugMethod::MarkerEXT => {
+              gl.push_group_marker_ext(message);
+              Some((Rc::clone(gl), debug_method))
+            },
+            GpuDebugMethod::None => None,
+        };
+        GpuMarker { gl }
+    }
+
+    fn fire(gl: &Rc<dyn gl::Gl>, message: &str, debug_method: GpuDebugMethod) {
+        match debug_method {
+            GpuDebugMethod::KHR => gl.debug_message_insert_khr(gl::DEBUG_SOURCE_APPLICATION, gl::DEBUG_TYPE_MARKER, 0, gl::DEBUG_SEVERITY_NOTIFICATION, message),
+            GpuDebugMethod::MarkerEXT => gl.insert_event_marker_ext(message),
+            GpuDebugMethod::None => {}
+        };
+    }
+}
+
+impl Drop for GpuMarker {
+    fn drop(&mut self) {
+        if let Some((ref gl, debug_method)) = self.gl {
+            match debug_method {
+                GpuDebugMethod::KHR => gl.pop_debug_group_khr(),
+                GpuDebugMethod::MarkerEXT => gl.pop_group_marker_ext(),
+                GpuDebugMethod::None => {}
+            };
+        }
+    }
+}
+
+#[must_use]
+pub struct GpuTimeQuery(GpuMarker);
+#[must_use]
+pub struct GpuSampleQuery;
diff --git a/gfx/wr/webrender/src/ellipse.rs b/gfx/wr/webrender/src/ellipse.rs
new file mode 100644
index 0000000000..fac6765984
--- /dev/null
+++ b/gfx/wr/webrender/src/ellipse.rs
@@ -0,0 +1,187 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::units::*;
+use euclid::Size2D;
+use std::f32::consts::FRAC_PI_2;
+
+
+/// Number of steps to integrate arc length over.
+const STEP_COUNT: usize = 20;
+
+/// Represents an ellipse centred at a local space origin.
+#[derive(Debug, Clone)]
+pub struct Ellipse<U> {
+    pub radius: Size2D<f32, U>,
+    pub total_arc_length: f32,
+}
+
+impl<U> Ellipse<U> {
+    pub fn new(radius: Size2D<f32, U>) -> Ellipse<U> {
+        // Approximate the total length of the first quadrant of this ellipse.
+        let total_arc_length = get_simpson_length(FRAC_PI_2, radius.width, radius.height);
+
+        Ellipse {
+            radius,
+            total_arc_length,
+        }
+    }
+
+    /// Binary search to estimate the angle of an ellipse
+    /// for a given arc length. This only searches over the
+    /// first quadrant of an ellipse.
+    pub fn find_angle_for_arc_length(&self, arc_length: f32) -> f32 {
+        // Clamp arc length to [0, pi].
+        let arc_length = arc_length.max(0.0).min(self.total_arc_length);
+
+        let epsilon = 0.01;
+        let mut low = 0.0;
+        let mut high = FRAC_PI_2;
+        let mut theta = 0.0;
+        let mut new_low = 0.0;
+        let mut new_high = FRAC_PI_2;
+
+        while low <= high {
+            theta = 0.5 * (low + high);
+            let length = get_simpson_length(theta, self.radius.width, self.radius.height);
+
+            if (length - arc_length).abs() < epsilon {
+                break;
+            } else if length < arc_length {
+                new_low = theta;
+            } else {
+                new_high = theta;
+            }
+
+            // If we have stopped moving down the arc, the answer that we have is as good as
+            // it is going to get. We break to avoid going into an infinite loop.
+            if new_low == low && new_high == high {
+                break;
+            }
+
+            high = new_high;
+            low = new_low;
+        }
+
+        theta
+    }
+
+    /// Get a point and tangent on this ellipse from a given angle.
+    /// This only works for the first quadrant of the ellipse.
+    pub fn get_point_and_tangent(&self, theta: f32) -> (LayoutPoint, LayoutPoint) {
+        let (sin_theta, cos_theta) = theta.sin_cos();
+        let point = LayoutPoint::new(
+            self.radius.width * cos_theta,
+            self.radius.height * sin_theta,
+        );
+        let tangent = LayoutPoint::new(
+            -self.radius.width * sin_theta,
+            self.radius.height * cos_theta,
+        );
+        (point, tangent)
+    }
+
+    pub fn contains(&self, point: LayoutPoint) -> bool {
+        self.signed_distance(point.to_vector()) <= 0.0
+    }
+
+    /// Find the signed distance from this ellipse given a point.
+    /// Taken from http://www.iquilezles.org/www/articles/ellipsedist/ellipsedist.htm
+    fn signed_distance(&self, point: LayoutVector2D) -> f32 {
+        // This algorithm fails for circles, so we handle them here.
+        if self.radius.width == self.radius.height {
+            return point.length() - self.radius.width;
+        }
+
+        let mut p = LayoutVector2D::new(point.x.abs(), point.y.abs());
+        let mut ab = self.radius.to_vector();
+        if p.x > p.y {
+            p = p.yx();
+            ab = ab.yx();
+        }
+
+        let l = ab.y * ab.y - ab.x * ab.x;
+
+        let m = ab.x * p.x / l;
+        let n = ab.y * p.y / l;
+        let m2 = m * m;
+        let n2 = n * n;
+
+        let c = (m2 + n2 - 1.0) / 3.0;
+        let c3 = c * c * c;
+
+        let q = c3 + m2 * n2 * 2.0;
+        let d = c3 + m2 * n2;
+        let g = m + m * n2;
+
+        let co = if d < 0.0 {
+            let p = (q / c3).acos() / 3.0;
+            let s = p.cos();
+            let t = p.sin() * (3.0_f32).sqrt();
+            let rx = (-c * (s + t + 2.0) + m2).sqrt();
+            let ry = (-c * (s - t + 2.0) + m2).sqrt();
+            (ry + l.signum() * rx + g.abs() / (rx * ry) - m) / 2.0
+        } else {
+            let h = 2.0 * m * n * d.sqrt();
+            let s = (q + h).signum() * (q + h).abs().powf(1.0 / 3.0);
+            let u = (q - h).signum() * (q - h).abs().powf(1.0 / 3.0);
+            let rx = -s - u - c * 4.0 + 2.0 * m2;
+            let ry = (s - u) * (3.0_f32).sqrt();
+            let rm = (rx * rx + ry * ry).sqrt();
+            let p = ry / (rm - rx).sqrt();
+            (p + 2.0 * g / rm - m) / 2.0
+        };
+
+        let si = (1.0 - co * co).sqrt();
+        let r = LayoutVector2D::new(ab.x * co, ab.y * si);
+        (r - p).length() * (p.y - r.y).signum()
+    }
+}
+
+/// Use Simpsons rule to approximate the arc length of
+/// part of an ellipse. Note that this only works over
+/// the range of [0, pi/2].
+// TODO(gw): This is a simplistic way to estimate the
+// arc length of an ellipse segment. We can probably use
+// a faster / more accurate method!
+fn get_simpson_length(theta: f32, rx: f32, ry: f32) -> f32 {
+    let df = theta / STEP_COUNT as f32;
+    let mut sum = 0.0;
+
+    for i in 0 .. (STEP_COUNT + 1) {
+        let (sin_theta, cos_theta) = (i as f32 * df).sin_cos();
+        let a = rx * sin_theta;
+        let b = ry * cos_theta;
+        let y = (a * a + b * b).sqrt();
+        let q = if i == 0 || i == STEP_COUNT {
+            1.0
+        } else if i % 2 == 0 {
+            2.0
+        } else {
+            4.0
+        };
+
+        sum += q * y;
+    }
+
+    (df / 3.0) * sum
+}
+
+#[cfg(test)]
+pub mod test {
+    use super::*;
+
+    #[test]
+    fn find_angle_for_arc_length_for_long_eclipse() {
+        // Ensure that finding the angle on giant ellipses produces and answer and
+        // doesn't send us into an infinite loop.
+        let ellipse = Ellipse::new(LayoutSize::new(57500.0, 25.0));
+        let _ = ellipse.find_angle_for_arc_length(55674.53);
+        assert!(true);
+
+        let ellipse = Ellipse::new(LayoutSize::new(25.0, 57500.0));
+        let _ = ellipse.find_angle_for_arc_length(55674.53);
+        assert!(true);
+    }
+}
diff --git a/gfx/wr/webrender/src/filterdata.rs b/gfx/wr/webrender/src/filterdata.rs
new file mode 100644
index 0000000000..d399b2252e
--- /dev/null
+++ b/gfx/wr/webrender/src/filterdata.rs
@@ -0,0 +1,219 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use std::{hash};
+use crate::gpu_cache::{GpuCacheHandle};
+use crate::frame_builder::FrameBuildingState;
+use crate::gpu_cache::GpuDataRequest;
+use crate::intern;
+use api::{ComponentTransferFuncType};
+
+
+pub type FilterDataHandle = intern::Handle<FilterDataIntern>;
+
+#[derive(Debug, Clone, MallocSizeOf, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum SFilterDataComponent {
+    Identity,
+    Table(Vec<f32>),
+    Discrete(Vec<f32>),
+    Linear(f32, f32),
+    Gamma(f32, f32, f32),
+}
+
+impl Eq for SFilterDataComponent {}
+
+impl hash::Hash for SFilterDataComponent {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        match self {
+            SFilterDataComponent::Identity => {
+                0.hash(state);
+            }
+            SFilterDataComponent::Table(values) => {
+                1.hash(state);
+                values.len().hash(state);
+                for val in values {
+                    val.to_bits().hash(state);
+                }
+            }
+            SFilterDataComponent::Discrete(values) => {
+                2.hash(state);
+                values.len().hash(state);
+                for val in values {
+                    val.to_bits().hash(state);
+                }
+            }
+            SFilterDataComponent::Linear(a, b) => {
+                3.hash(state);
+                a.to_bits().hash(state);
+                b.to_bits().hash(state);
+            }
+            SFilterDataComponent::Gamma(a, b, c) => {
+                4.hash(state);
+                a.to_bits().hash(state);
+                b.to_bits().hash(state);
+                c.to_bits().hash(state);
+            }
+        }
+    }
+}
+
+impl SFilterDataComponent {
+    pub fn to_int(&self) -> u32 {
+        match self {
+            SFilterDataComponent::Identity => 0,
+            SFilterDataComponent::Table(_) => 1,
+            SFilterDataComponent::Discrete(_) => 2,
+            SFilterDataComponent::Linear(_, _) => 3,
+            SFilterDataComponent::Gamma(_, _, _) => 4,
+        }
+    }
+
+    pub fn from_functype_values(
+        func_type: ComponentTransferFuncType,
+        values: &[f32],
+    ) -> SFilterDataComponent {
+        match func_type {
+            ComponentTransferFuncType::Identity => SFilterDataComponent::Identity,
+            ComponentTransferFuncType::Table => SFilterDataComponent::Table(values.to_vec()),
+            ComponentTransferFuncType::Discrete => SFilterDataComponent::Discrete(values.to_vec()),
+            ComponentTransferFuncType::Linear => SFilterDataComponent::Linear(values[0], values[1]),
+            ComponentTransferFuncType::Gamma => SFilterDataComponent::Gamma(values[0], values[1], values[2]),
+        }
+    }
+}
+
+#[derive(Debug, Clone, MallocSizeOf, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SFilterData {
+    pub r_func: SFilterDataComponent,
+    pub g_func: SFilterDataComponent,
+    pub b_func: SFilterDataComponent,
+    pub a_func: SFilterDataComponent,
+}
+
+#[derive(Debug, Clone, MallocSizeOf, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SFilterDataKey {
+    pub data: SFilterData,
+}
+
+impl intern::InternDebug for SFilterDataKey {}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct SFilterDataTemplate {
+    pub data: SFilterData,
+    pub gpu_cache_handle: GpuCacheHandle,
+}
+
+impl From<SFilterDataKey> for SFilterDataTemplate {
+    fn from(item: SFilterDataKey) -> Self {
+        SFilterDataTemplate {
+            data: item.data,
+            gpu_cache_handle: GpuCacheHandle::new(),
+        }
+    }
+}
+
+impl SFilterData {
+    pub fn is_identity(&self) -> bool {
+        self.r_func == SFilterDataComponent::Identity
+            && self.g_func == SFilterDataComponent::Identity
+            && self.b_func == SFilterDataComponent::Identity
+            && self.a_func == SFilterDataComponent::Identity
+    }
+
+    pub fn update(&self, mut request: GpuDataRequest) {
+        push_component_transfer_data(&self.r_func, &mut request);
+        push_component_transfer_data(&self.g_func, &mut request);
+        push_component_transfer_data(&self.b_func, &mut request);
+        push_component_transfer_data(&self.a_func, &mut request);
+        assert!(!self.is_identity());
+    }
+}
+
+impl SFilterDataTemplate {
+    /// Update the GPU cache for a given filter data template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        frame_state: &mut FrameBuildingState,
+    ) {
+        if let Some(request) = frame_state.gpu_cache.request(&mut self.gpu_cache_handle) {
+            self.data.update(request);
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, MallocSizeOf)]
+#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
+pub enum FilterDataIntern {}
+
+impl intern::Internable for FilterDataIntern {
+    type Key = SFilterDataKey;
+    type StoreData = SFilterDataTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_FILTER_DATA;
+}
+
+fn push_component_transfer_data(
+    func_comp: &SFilterDataComponent,
+    request: &mut GpuDataRequest,
+) {
+    match func_comp {
+        SFilterDataComponent::Identity => {}
+        SFilterDataComponent::Table(values) |
+        SFilterDataComponent::Discrete(values) => {
+            // Push a 256 entry lookup table.
+            assert!(values.len() > 0);
+            for i in 0 .. 64 {
+                let mut arr = [0.0 ; 4];
+                for j in 0 .. 4 {
+                    if (values.len() == 1) || (i == 63 && j == 3) {
+                        arr[j] = values[values.len()-1];
+                    } else {
+                        let c = ((4*i + j) as f32)/255.0;
+                        match func_comp {
+                            SFilterDataComponent::Table(_) => {
+                                let n = (values.len()-1) as f32;
+                                let k = (n * c).floor() as u32;
+                                let ku = k as usize;
+                                assert!(ku < values.len()-1);
+                                arr[j] = values[ku] + (c*n - (k as f32)) * (values[ku+1] - values[ku]);
+                            }
+                            SFilterDataComponent::Discrete(_) => {
+                                let n = values.len() as f32;
+                                let k = (n * c).floor() as usize;
+                                assert!(k < values.len());
+                                arr[j] = values[k];
+                            }
+                            SFilterDataComponent::Identity |
+                            SFilterDataComponent::Linear(_,_) |
+                            SFilterDataComponent::Gamma(_,_,_) => {
+                                unreachable!();
+                            }
+                        }
+
+                    }
+                }
+
+                request.push(arr);
+            }
+        }
+        SFilterDataComponent::Linear(a, b) => {
+            request.push([*a, *b, 0.0, 0.0]);
+        }
+        SFilterDataComponent::Gamma(a, b, c) => {
+            request.push([*a, *b, *c, 0.0]);
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/frame_builder.rs b/gfx/wr/webrender/src/frame_builder.rs
new file mode 100644
index 0000000000..0bfb4cdc33
--- /dev/null
+++ b/gfx/wr/webrender/src/frame_builder.rs
@@ -0,0 +1,968 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ColorF, DebugFlags, FontRenderMode, PremultipliedColorF};
+use api::units::*;
+use plane_split::BspSplitter;
+use crate::batch::{BatchBuilder, AlphaBatchBuilder, AlphaBatchContainer};
+use crate::clip::{ClipStore, ClipTree};
+use crate::command_buffer::CommandBufferList;
+use crate::spatial_tree::{SpatialTree, SpatialNodeIndex};
+use crate::composite::{CompositorKind, CompositeState, CompositeStatePreallocator};
+use crate::debug_item::DebugItem;
+use crate::gpu_cache::{GpuCache, GpuCacheHandle};
+use crate::gpu_types::{PrimitiveHeaders, TransformPalette, ZBufferIdGenerator};
+use crate::gpu_types::TransformData;
+use crate::internal_types::{FastHashMap, PlaneSplitter, FrameId, FrameStamp};
+use crate::picture::{DirtyRegion, SliceId, TileCacheInstance};
+use crate::picture::{SurfaceInfo, SurfaceIndex};
+use crate::picture::{SubpixelMode, RasterConfig, PictureCompositeMode};
+use crate::prepare::prepare_primitives;
+use crate::prim_store::{PictureIndex};
+use crate::prim_store::{DeferredResolve, PrimitiveInstance};
+use crate::profiler::{self, TransactionProfile};
+use crate::render_backend::{DataStores, ScratchBuffer};
+use crate::renderer::{GpuBuffer, GpuBufferBuilder};
+use crate::render_target::{RenderTarget, PictureCacheTarget, TextureCacheRenderTarget, PictureCacheTargetKind};
+use crate::render_target::{RenderTargetContext, RenderTargetKind, AlphaRenderTarget, ColorRenderTarget};
+use crate::render_task_graph::{RenderTaskGraph, Pass, SubPassSurface};
+use crate::render_task_graph::{RenderPass, RenderTaskGraphBuilder};
+use crate::render_task::{RenderTaskKind, StaticRenderTaskSurface};
+use crate::resource_cache::{ResourceCache};
+use crate::scene::{BuiltScene, SceneProperties};
+use crate::space::SpaceMapper;
+use crate::segment::SegmentBuilder;
+use crate::surface::SurfaceBuilder;
+use std::{f32, mem};
+use crate::util::{VecHelper, Preallocator};
+use crate::visibility::{update_prim_visibility, FrameVisibilityState, FrameVisibilityContext};
+
+#[derive(Clone, Copy, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct FrameBuilderConfig {
+    pub default_font_render_mode: FontRenderMode,
+    pub dual_source_blending_is_supported: bool,
+    /// True if we're running tests (i.e. via wrench).
+    pub testing: bool,
+    pub gpu_supports_fast_clears: bool,
+    pub gpu_supports_advanced_blend: bool,
+    pub advanced_blend_is_coherent: bool,
+    pub gpu_supports_render_target_partial_update: bool,
+    /// Whether ImageBufferKind::TextureExternal images must first be copied
+    /// to a regular texture before rendering.
+    pub external_images_require_copy: bool,
+    pub batch_lookback_count: usize,
+    pub background_color: Option<ColorF>,
+    pub compositor_kind: CompositorKind,
+    pub tile_size_override: Option<DeviceIntSize>,
+    pub max_surface_override: Option<usize>,
+    pub max_depth_ids: i32,
+    pub max_target_size: i32,
+    pub force_invalidation: bool,
+    pub is_software: bool,
+    pub low_quality_pinch_zoom: bool,
+}
+
+/// A set of common / global resources that are retained between
+/// new display lists, such that any GPU cache handles can be
+/// persisted even when a new display list arrives.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct FrameGlobalResources {
+    /// The image shader block for the most common / default
+    /// set of image parameters (color white, stretch == rect.size).
+    pub default_image_handle: GpuCacheHandle,
+
+    /// A GPU cache config for drawing transparent rectangle primitives.
+    /// This is used to 'cut out' overlay tiles where a compositor
+    /// surface exists.
+    pub default_transparent_rect_handle: GpuCacheHandle,
+}
+
+impl FrameGlobalResources {
+    pub fn empty() -> Self {
+        FrameGlobalResources {
+            default_image_handle: GpuCacheHandle::new(),
+            default_transparent_rect_handle: GpuCacheHandle::new(),
+        }
+    }
+
+    pub fn update(
+        &mut self,
+        gpu_cache: &mut GpuCache,
+    ) {
+        if let Some(mut request) = gpu_cache.request(&mut self.default_image_handle) {
+            request.push(PremultipliedColorF::WHITE);
+            request.push(PremultipliedColorF::WHITE);
+            request.push([
+                -1.0,       // -ve means use prim rect for stretch size
+                0.0,
+                0.0,
+                0.0,
+            ]);
+        }
+
+        if let Some(mut request) = gpu_cache.request(&mut self.default_transparent_rect_handle) {
+            request.push(PremultipliedColorF::TRANSPARENT);
+        }
+    }
+}
+
+pub struct FrameScratchBuffer {
+    dirty_region_stack: Vec<DirtyRegion>,
+    surface_stack: Vec<(PictureIndex, SurfaceIndex)>,
+}
+
+impl Default for FrameScratchBuffer {
+    fn default() -> Self {
+        FrameScratchBuffer {
+            dirty_region_stack: Vec::new(),
+            surface_stack: Vec::new(),
+        }
+    }
+}
+
+impl FrameScratchBuffer {
+    pub fn begin_frame(&mut self) {
+        self.dirty_region_stack.clear();
+        self.surface_stack.clear();
+    }
+}
+
+/// Produces the frames that are sent to the renderer.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct FrameBuilder {
+    pub globals: FrameGlobalResources,
+    #[cfg_attr(feature = "capture", serde(skip))]
+    prim_headers_prealloc: Preallocator,
+    #[cfg_attr(feature = "capture", serde(skip))]
+    composite_state_prealloc: CompositeStatePreallocator,
+    #[cfg_attr(feature = "capture", serde(skip))]
+    plane_splitters: Vec<PlaneSplitter>,
+}
+
+pub struct FrameBuildingContext<'a> {
+    pub global_device_pixel_scale: DevicePixelScale,
+    pub scene_properties: &'a SceneProperties,
+    pub global_screen_world_rect: WorldRect,
+    pub spatial_tree: &'a SpatialTree,
+    pub max_local_clip: LayoutRect,
+    pub debug_flags: DebugFlags,
+    pub fb_config: &'a FrameBuilderConfig,
+    pub root_spatial_node_index: SpatialNodeIndex,
+}
+
+pub struct FrameBuildingState<'a> {
+    pub rg_builder: &'a mut RenderTaskGraphBuilder,
+    pub clip_store: &'a mut ClipStore,
+    pub resource_cache: &'a mut ResourceCache,
+    pub gpu_cache: &'a mut GpuCache,
+    pub transforms: &'a mut TransformPalette,
+    pub segment_builder: SegmentBuilder,
+    pub surfaces: &'a mut Vec<SurfaceInfo>,
+    pub dirty_region_stack: Vec<DirtyRegion>,
+    pub composite_state: &'a mut CompositeState,
+    pub num_visible_primitives: u32,
+    pub plane_splitters: &'a mut [PlaneSplitter],
+    pub surface_builder: SurfaceBuilder,
+    pub cmd_buffers: &'a mut CommandBufferList,
+    pub clip_tree: &'a ClipTree,
+    pub frame_gpu_data: &'a mut GpuBufferBuilder,
+}
+
+impl<'a> FrameBuildingState<'a> {
+    /// Retrieve the current dirty region during primitive traversal.
+    pub fn current_dirty_region(&self) -> &DirtyRegion {
+        self.dirty_region_stack.last().unwrap()
+    }
+
+    /// Push a new dirty region for child primitives to cull / clip against.
+    pub fn push_dirty_region(&mut self, region: DirtyRegion) {
+        self.dirty_region_stack.push(region);
+    }
+
+    /// Pop the top dirty region from the stack.
+    pub fn pop_dirty_region(&mut self) {
+        self.dirty_region_stack.pop().unwrap();
+    }
+}
+
+/// Immutable context of a picture when processing children.
+#[derive(Debug)]
+pub struct PictureContext {
+    pub pic_index: PictureIndex,
+    pub surface_spatial_node_index: SpatialNodeIndex,
+    pub raster_spatial_node_index: SpatialNodeIndex,
+    /// The surface that this picture will render on.
+    pub surface_index: SurfaceIndex,
+    pub dirty_region_count: usize,
+    pub subpixel_mode: SubpixelMode,
+}
+
+/// Mutable state of a picture that gets modified when
+/// the children are processed.
+pub struct PictureState {
+    pub map_local_to_pic: SpaceMapper<LayoutPixel, PicturePixel>,
+    pub map_pic_to_world: SpaceMapper<PicturePixel, WorldPixel>,
+}
+
+impl FrameBuilder {
+    pub fn new() -> Self {
+        FrameBuilder {
+            globals: FrameGlobalResources::empty(),
+            prim_headers_prealloc: Preallocator::new(0),
+            composite_state_prealloc: CompositeStatePreallocator::default(),
+            plane_splitters: Vec::new(),
+        }
+    }
+
+    /// Compute the contribution (bounding rectangles, and resources) of layers and their
+    /// primitives in screen space.
+    fn build_layer_screen_rects_and_cull_layers(
+        &mut self,
+        scene: &mut BuiltScene,
+        global_screen_world_rect: WorldRect,
+        resource_cache: &mut ResourceCache,
+        gpu_cache: &mut GpuCache,
+        rg_builder: &mut RenderTaskGraphBuilder,
+        global_device_pixel_scale: DevicePixelScale,
+        scene_properties: &SceneProperties,
+        transform_palette: &mut TransformPalette,
+        data_stores: &mut DataStores,
+        scratch: &mut ScratchBuffer,
+        debug_flags: DebugFlags,
+        composite_state: &mut CompositeState,
+        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
+        spatial_tree: &SpatialTree,
+        cmd_buffers: &mut CommandBufferList,
+        frame_gpu_data: &mut GpuBufferBuilder,
+        profile: &mut TransactionProfile,
+    ) {
+        profile_scope!("build_layer_screen_rects_and_cull_layers");
+
+        let root_spatial_node_index = spatial_tree.root_reference_frame_index();
+
+        const MAX_CLIP_COORD: f32 = 1.0e9;
+
+        // Reset all plane splitters. These are retained from frame to frame to reduce
+        // per-frame allocations
+        self.plane_splitters.resize_with(scene.num_plane_splitters, BspSplitter::new);
+        for splitter in &mut self.plane_splitters {
+            splitter.reset();
+        }
+
+        let frame_context = FrameBuildingContext {
+            global_device_pixel_scale,
+            scene_properties,
+            global_screen_world_rect,
+            spatial_tree,
+            max_local_clip: LayoutRect {
+                min: LayoutPoint::new(-MAX_CLIP_COORD, -MAX_CLIP_COORD),
+                max: LayoutPoint::new(MAX_CLIP_COORD, MAX_CLIP_COORD),
+            },
+            debug_flags,
+            fb_config: &scene.config,
+            root_spatial_node_index,
+        };
+
+        scene.picture_graph.build_update_passes(
+            &mut scene.prim_store.pictures,
+            &frame_context,
+        );
+
+        scene.picture_graph.assign_surfaces(
+            &mut scene.prim_store.pictures,
+            &mut scene.surfaces,
+            tile_caches,
+            &frame_context,
+        );
+
+        scene.picture_graph.propagate_bounding_rects(
+            &mut scene.prim_store.pictures,
+            &mut scene.surfaces,
+            &frame_context,
+        );
+
+        {
+            profile_scope!("UpdateVisibility");
+            profile_marker!("UpdateVisibility");
+            profile.start_time(profiler::FRAME_VISIBILITY_TIME);
+
+            let visibility_context = FrameVisibilityContext {
+                global_device_pixel_scale,
+                spatial_tree,
+                global_screen_world_rect,
+                debug_flags,
+                scene_properties,
+                config: scene.config,
+                root_spatial_node_index,
+            };
+
+            for pic_index in scene.tile_cache_pictures.iter().rev() {
+                let pic = &mut scene.prim_store.pictures[pic_index.0];
+
+                match pic.raster_config {
+                    Some(RasterConfig { surface_index, composite_mode: PictureCompositeMode::TileCache { slice_id }, .. }) => {
+                        let tile_cache = tile_caches
+                            .get_mut(&slice_id)
+                            .expect("bug: non-existent tile cache");
+
+                        let mut visibility_state = FrameVisibilityState {
+                            surface_stack: scratch.frame.surface_stack.take(),
+                            resource_cache,
+                            gpu_cache,
+                            clip_store: &mut scene.clip_store,
+                            scratch,
+                            data_stores,
+                            composite_state,
+                            clip_tree: &mut scene.clip_tree,
+                        };
+
+                        // If we have a tile cache for this picture, see if any of the
+                        // relative transforms have changed, which means we need to
+                        // re-map the dependencies of any child primitives.
+                        let surface = &scene.surfaces[surface_index.0];
+                        let world_culling_rect = tile_cache.pre_update(
+                            surface.unclipped_local_rect,
+                            surface_index,
+                            &visibility_context,
+                            &mut visibility_state,
+                        );
+
+                        // Push a new surface, supplying the list of clips that should be
+                        // ignored, since they are handled by clipping when drawing this surface.
+                        visibility_state.push_surface(
+                            *pic_index,
+                            surface_index,
+                        );
+                        visibility_state.clip_tree.push_clip_root_node(tile_cache.shared_clip_node_id);
+
+                        update_prim_visibility(
+                            *pic_index,
+                            None,
+                            &world_culling_rect,
+                            &mut scene.prim_store,
+                            &mut scene.prim_instances,
+                            &mut scene.surfaces,
+                            true,
+                            &visibility_context,
+                            &mut visibility_state,
+                            tile_cache,
+                        );
+
+                        // Build the dirty region(s) for this tile cache.
+                        tile_cache.post_update(
+                            &visibility_context,
+                            &mut visibility_state,
+                        );
+
+                        visibility_state.clip_tree.pop_clip_root();
+                        visibility_state.pop_surface();
+                        visibility_state.scratch.frame.surface_stack = visibility_state.surface_stack.take();
+                    }
+                    _ => {
+                        panic!("bug: not a tile cache");
+                    }
+                }
+            }
+
+            profile.end_time(profiler::FRAME_VISIBILITY_TIME);
+        }
+
+        profile.start_time(profiler::FRAME_PREPARE_TIME);
+
+        let mut frame_state = FrameBuildingState {
+            rg_builder,
+            clip_store: &mut scene.clip_store,
+            resource_cache,
+            gpu_cache,
+            transforms: transform_palette,
+            segment_builder: SegmentBuilder::new(),
+            surfaces: &mut scene.surfaces,
+            dirty_region_stack: scratch.frame.dirty_region_stack.take(),
+            composite_state,
+            num_visible_primitives: 0,
+            plane_splitters: &mut self.plane_splitters,
+            surface_builder: SurfaceBuilder::new(),
+            cmd_buffers,
+            clip_tree: &mut scene.clip_tree,
+            frame_gpu_data,
+        };
+
+        // Push a default dirty region which culls primitives
+        // against the screen world rect, in absence of any
+        // other dirty regions.
+        let mut default_dirty_region = DirtyRegion::new(
+            root_spatial_node_index,
+        );
+        default_dirty_region.add_dirty_region(
+            frame_context.global_screen_world_rect.cast_unit(),
+            frame_context.spatial_tree,
+        );
+        frame_state.push_dirty_region(default_dirty_region);
+
+        for pic_index in &scene.tile_cache_pictures {
+            if let Some((pic_context, mut pic_state, mut prim_list)) = scene
+                .prim_store
+                .pictures[pic_index.0]
+                .take_context(
+                    *pic_index,
+                    None,
+                    SubpixelMode::Allow,
+                    &mut frame_state,
+                    &frame_context,
+                    &mut scratch.primitive,
+                    tile_caches,
+                )
+            {
+                profile_marker!("PreparePrims");
+
+                prepare_primitives(
+                    &mut scene.prim_store,
+                    &mut prim_list,
+                    &pic_context,
+                    &mut pic_state,
+                    &frame_context,
+                    &mut frame_state,
+                    data_stores,
+                    &mut scratch.primitive,
+                    tile_caches,
+                    &mut scene.prim_instances,
+                );
+
+                let pic = &mut scene.prim_store.pictures[pic_index.0];
+                pic.restore_context(
+                    *pic_index,
+                    prim_list,
+                    pic_context,
+                    &scene.prim_instances,
+                    &frame_context,
+                    &mut frame_state,
+                );
+            }
+        }
+
+        frame_state.pop_dirty_region();
+        frame_state.surface_builder.finalize();
+        profile.end_time(profiler::FRAME_PREPARE_TIME);
+        profile.set(profiler::VISIBLE_PRIMITIVES, frame_state.num_visible_primitives);
+
+        scratch.frame.dirty_region_stack = frame_state.dirty_region_stack.take();
+
+        {
+            profile_marker!("BlockOnResources");
+
+            resource_cache.block_until_all_resources_added(
+                gpu_cache,
+                profile,
+            );
+        }
+    }
+
+    pub fn build(
+        &mut self,
+        scene: &mut BuiltScene,
+        resource_cache: &mut ResourceCache,
+        gpu_cache: &mut GpuCache,
+        rg_builder: &mut RenderTaskGraphBuilder,
+        stamp: FrameStamp,
+        device_origin: DeviceIntPoint,
+        scene_properties: &SceneProperties,
+        data_stores: &mut DataStores,
+        scratch: &mut ScratchBuffer,
+        debug_flags: DebugFlags,
+        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
+        spatial_tree: &mut SpatialTree,
+        dirty_rects_are_valid: bool,
+        profile: &mut TransactionProfile,
+    ) -> Frame {
+        profile_scope!("build");
+        profile_marker!("BuildFrame");
+
+        profile.set(profiler::PRIMITIVES, scene.prim_instances.len());
+        profile.set(profiler::PICTURE_CACHE_SLICES, scene.tile_cache_config.picture_cache_slice_count);
+        scratch.begin_frame();
+        gpu_cache.begin_frame(stamp);
+        resource_cache.begin_frame(stamp, gpu_cache, profile);
+
+        // TODO(gw): Follow up patches won't clear this, as they'll be assigned
+        //           statically during scene building.
+        scene.surfaces.clear();
+
+        self.globals.update(gpu_cache);
+
+        spatial_tree.update_tree(scene_properties);
+        let mut transform_palette = spatial_tree.build_transform_palette();
+        scene.clip_store.begin_frame(&mut scratch.clip_store);
+
+        rg_builder.begin_frame(stamp.frame_id());
+
+        // TODO(dp): Remove me completely!!
+        let global_device_pixel_scale = DevicePixelScale::new(1.0);
+
+        let output_size = scene.output_rect.size();
+        let screen_world_rect = (scene.output_rect.to_f32() / global_device_pixel_scale).round_out();
+
+        let mut composite_state = CompositeState::new(
+            scene.config.compositor_kind,
+            scene.config.max_depth_ids,
+            dirty_rects_are_valid,
+            scene.config.low_quality_pinch_zoom,
+        );
+
+        self.composite_state_prealloc.preallocate(&mut composite_state);
+
+        let mut cmd_buffers = CommandBufferList::new();
+
+        // TODO(gw): Recycle backing vec buffers for gpu buffer builder between frames
+        let mut gpu_buffer_builder = GpuBufferBuilder::new();
+
+        self.build_layer_screen_rects_and_cull_layers(
+            scene,
+            screen_world_rect,
+            resource_cache,
+            gpu_cache,
+            rg_builder,
+            global_device_pixel_scale,
+            scene_properties,
+            &mut transform_palette,
+            data_stores,
+            scratch,
+            debug_flags,
+            &mut composite_state,
+            tile_caches,
+            spatial_tree,
+            &mut cmd_buffers,
+            &mut gpu_buffer_builder,
+            profile,
+        );
+
+        profile.start_time(profiler::FRAME_BATCHING_TIME);
+
+        let mut deferred_resolves = vec![];
+
+        // Finish creating the frame graph and build it.
+        let render_tasks = rg_builder.end_frame(
+            resource_cache,
+            gpu_cache,
+            &mut deferred_resolves,
+        );
+
+        let mut passes = Vec::new();
+        let mut has_texture_cache_tasks = false;
+        let mut prim_headers = PrimitiveHeaders::new();
+        self.prim_headers_prealloc.preallocate_vec(&mut prim_headers.headers_int);
+        self.prim_headers_prealloc.preallocate_vec(&mut prim_headers.headers_float);
+
+        {
+            profile_marker!("Batching");
+
+            // Used to generated a unique z-buffer value per primitive.
+            let mut z_generator = ZBufferIdGenerator::new(scene.config.max_depth_ids);
+            let use_dual_source_blending = scene.config.dual_source_blending_is_supported;
+
+            for pass in render_tasks.passes.iter().rev() {
+                let mut ctx = RenderTargetContext {
+                    global_device_pixel_scale,
+                    prim_store: &scene.prim_store,
+                    resource_cache,
+                    use_dual_source_blending,
+                    use_advanced_blending: scene.config.gpu_supports_advanced_blend,
+                    break_advanced_blend_batches: !scene.config.advanced_blend_is_coherent,
+                    batch_lookback_count: scene.config.batch_lookback_count,
+                    spatial_tree,
+                    data_stores,
+                    surfaces: &scene.surfaces,
+                    scratch: &mut scratch.primitive,
+                    screen_world_rect,
+                    globals: &self.globals,
+                    tile_caches,
+                    root_spatial_node_index: spatial_tree.root_reference_frame_index(),
+                };
+
+                let pass = build_render_pass(
+                    pass,
+                    output_size,
+                    &mut ctx,
+                    gpu_cache,
+                    &render_tasks,
+                    &scene.clip_store,
+                    &mut transform_palette,
+                    &mut prim_headers,
+                    &mut z_generator,
+                    scene.config.gpu_supports_fast_clears,
+                    &scene.prim_instances,
+                    &cmd_buffers,
+                );
+
+                has_texture_cache_tasks |= !pass.texture_cache.is_empty();
+                has_texture_cache_tasks |= !pass.picture_cache.is_empty();
+
+                passes.push(pass);
+            }
+
+            let mut ctx = RenderTargetContext {
+                global_device_pixel_scale,
+                prim_store: &scene.prim_store,
+                resource_cache,
+                use_dual_source_blending,
+                use_advanced_blending: scene.config.gpu_supports_advanced_blend,
+                break_advanced_blend_batches: !scene.config.advanced_blend_is_coherent,
+                batch_lookback_count: scene.config.batch_lookback_count,
+                spatial_tree,
+                data_stores,
+                surfaces: &scene.surfaces,
+                scratch: &mut scratch.primitive,
+                screen_world_rect,
+                globals: &self.globals,
+                tile_caches,
+                root_spatial_node_index: spatial_tree.root_reference_frame_index(),
+            };
+
+            self.build_composite_pass(
+                scene,
+                &mut ctx,
+                gpu_cache,
+                &mut deferred_resolves,
+                &mut composite_state,
+            );
+        }
+
+        profile.end_time(profiler::FRAME_BATCHING_TIME);
+
+        let gpu_cache_frame_id = gpu_cache.end_frame(profile).frame_id();
+
+        resource_cache.end_frame(profile);
+
+        self.prim_headers_prealloc.record_vec(&mut prim_headers.headers_int);
+        self.composite_state_prealloc.record(&composite_state);
+
+        composite_state.end_frame();
+        scene.clip_store.end_frame(&mut scratch.clip_store);
+        scratch.end_frame();
+
+        let gpu_buffer = gpu_buffer_builder.finalize(&render_tasks);
+
+        Frame {
+            device_rect: DeviceIntRect::from_origin_and_size(
+                device_origin,
+                scene.output_rect.size(),
+            ),
+            passes,
+            transform_palette: transform_palette.finish(),
+            render_tasks,
+            deferred_resolves,
+            gpu_cache_frame_id,
+            has_been_rendered: false,
+            has_texture_cache_tasks,
+            prim_headers,
+            debug_items: mem::replace(&mut scratch.primitive.debug_items, Vec::new()),
+            composite_state,
+            gpu_buffer,
+        }
+    }
+
+    fn build_composite_pass(
+        &self,
+        scene: &BuiltScene,
+        ctx: &RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        deferred_resolves: &mut Vec<DeferredResolve>,
+        composite_state: &mut CompositeState,
+    ) {
+        for pic_index in &scene.tile_cache_pictures {
+            let pic = &ctx.prim_store.pictures[pic_index.0];
+
+            match pic.raster_config {
+                Some(RasterConfig { composite_mode: PictureCompositeMode::TileCache { slice_id }, .. }) => {
+                    // Tile cache instances are added to the composite config, rather than
+                    // directly added to batches. This allows them to be drawn with various
+                    // present modes during render, such as partial present etc.
+                    let tile_cache = &ctx.tile_caches[&slice_id];
+                    let map_local_to_world = SpaceMapper::new_with_target(
+                        ctx.root_spatial_node_index,
+                        tile_cache.spatial_node_index,
+                        ctx.screen_world_rect,
+                        ctx.spatial_tree,
+                    );
+                    let world_clip_rect = map_local_to_world
+                        .map(&tile_cache.local_clip_rect)
+                        .expect("bug: unable to map clip rect");
+                    let device_clip_rect = (world_clip_rect * ctx.global_device_pixel_scale).round();
+
+                    composite_state.push_surface(
+                        tile_cache,
+                        device_clip_rect,
+                        ctx.resource_cache,
+                        gpu_cache,
+                        deferred_resolves,
+                    );
+                }
+                _ => {
+                    panic!("bug: found a top-level prim that isn't a tile cache");
+                }
+            }
+        }
+    }
+}
+
+/// Processes this pass to prepare it for rendering.
+///
+/// Among other things, this allocates output regions for each of our tasks
+/// (added via `add_render_task`) in a RenderTarget and assigns it into that
+/// target.
+pub fn build_render_pass(
+    src_pass: &Pass,
+    screen_size: DeviceIntSize,
+    ctx: &mut RenderTargetContext,
+    gpu_cache: &mut GpuCache,
+    render_tasks: &RenderTaskGraph,
+    clip_store: &ClipStore,
+    transforms: &mut TransformPalette,
+    prim_headers: &mut PrimitiveHeaders,
+    z_generator: &mut ZBufferIdGenerator,
+    gpu_supports_fast_clears: bool,
+    prim_instances: &[PrimitiveInstance],
+    cmd_buffers: &CommandBufferList,
+) -> RenderPass {
+    profile_scope!("build_render_pass");
+
+    // TODO(gw): In this initial frame graph work, we try to maintain the existing
+    //           build_render_pass code as closely as possible, to make the review
+    //           simpler and reduce chance of regressions. However, future work should
+    //           include refactoring this to more closely match the built frame graph.
+    let mut pass = RenderPass::new(src_pass);
+
+    for sub_pass in &src_pass.sub_passes {
+        match sub_pass.surface {
+            SubPassSurface::Dynamic { target_kind, texture_id, used_rect } => {
+                match target_kind {
+                    RenderTargetKind::Color => {
+                        let mut target = ColorRenderTarget::new(
+                            texture_id,
+                            screen_size,
+                            gpu_supports_fast_clears,
+                            used_rect,
+                        );
+
+                        for task_id in &sub_pass.task_ids {
+                            target.add_task(
+                                *task_id,
+                                ctx,
+                                gpu_cache,
+                                render_tasks,
+                                clip_store,
+                                transforms,
+                            );
+                        }
+
+                        pass.color.targets.push(target);
+                    }
+                    RenderTargetKind::Alpha => {
+                        let mut target = AlphaRenderTarget::new(
+                            texture_id,
+                            screen_size,
+                            gpu_supports_fast_clears,
+                            used_rect,
+                        );
+
+                        for task_id in &sub_pass.task_ids {
+                            target.add_task(
+                                *task_id,
+                                ctx,
+                                gpu_cache,
+                                render_tasks,
+                                clip_store,
+                                transforms,
+                            );
+                        }
+
+                        pass.alpha.targets.push(target);
+                    }
+                }
+            }
+            SubPassSurface::Persistent { surface: StaticRenderTaskSurface::PictureCache { ref surface, .. }, .. } => {
+                assert_eq!(sub_pass.task_ids.len(), 1);
+                let task_id = sub_pass.task_ids[0];
+                let task = &render_tasks[task_id];
+                let target_rect = task.get_target_rect();
+                let mut gpu_buffer_builder = GpuBufferBuilder::new();
+
+                match task.kind {
+                    RenderTaskKind::Picture(ref pic_task) => {
+                        let cmd_buffer = cmd_buffers.get(pic_task.cmd_buffer_index);
+                        let scissor_rect = pic_task.scissor_rect.expect("bug: must be set for cache tasks");
+                        let valid_rect = pic_task.valid_rect.expect("bug: must be set for cache tasks");
+
+                        let batcher = AlphaBatchBuilder::new(
+                            screen_size,
+                            ctx.break_advanced_blend_batches,
+                            ctx.batch_lookback_count,
+                            task_id,
+                            task_id.into(),
+                        );
+
+                        let mut batch_builder = BatchBuilder::new(batcher);
+
+                        cmd_buffer.iter_prims(&mut |cmd, spatial_node_index| {
+                            batch_builder.add_prim_to_batch(
+                                cmd,
+                                spatial_node_index,
+                                ctx,
+                                gpu_cache,
+                                render_tasks,
+                                prim_headers,
+                                transforms,
+                                pic_task.raster_spatial_node_index,
+                                pic_task.surface_spatial_node_index,
+                                z_generator,
+                                prim_instances,
+                                &mut gpu_buffer_builder,
+                            );
+                        });
+
+                        let batcher = batch_builder.finalize();
+
+                        let mut batch_containers = Vec::new();
+                        let mut alpha_batch_container = AlphaBatchContainer::new(Some(scissor_rect));
+
+                        batcher.build(
+                            &mut batch_containers,
+                            &mut alpha_batch_container,
+                            target_rect,
+                            None,
+                        );
+                        debug_assert!(batch_containers.is_empty());
+
+                        let target = PictureCacheTarget {
+                            surface: surface.clone(),
+                            clear_color: pic_task.clear_color,
+                            kind: PictureCacheTargetKind::Draw {
+                                alpha_batch_container,
+                            },
+                            dirty_rect: scissor_rect,
+                            valid_rect,
+                        };
+
+                        pass.picture_cache.push(target);
+                    }
+                    RenderTaskKind::TileComposite(ref tile_task) => {
+                        let target = PictureCacheTarget {
+                            surface: surface.clone(),
+                            clear_color: Some(tile_task.clear_color),
+                            kind: PictureCacheTargetKind::Blit {
+                                task_id: tile_task.task_id.expect("bug: no source task_id set"),
+                                sub_rect_offset: tile_task.sub_rect_offset,
+                            },
+                            dirty_rect: tile_task.scissor_rect,
+                            valid_rect: tile_task.valid_rect,
+                        };
+
+                        pass.picture_cache.push(target);
+                    }
+                    _ => {
+                        unreachable!();
+                    }
+                };
+            }
+            SubPassSurface::Persistent { surface: StaticRenderTaskSurface::TextureCache { target_kind, texture, .. } } => {
+                let texture = pass.texture_cache
+                    .entry(texture)
+                    .or_insert_with(||
+                        TextureCacheRenderTarget::new(target_kind)
+                    );
+                for task_id in &sub_pass.task_ids {
+                    texture.add_task(*task_id, render_tasks);
+                }
+            }
+            SubPassSurface::Persistent { surface: StaticRenderTaskSurface::ReadOnly { .. } } => {
+                panic!("Should not create a render pass for read-only task locations.");
+            }
+        }
+    }
+
+    pass.color.build(
+        ctx,
+        gpu_cache,
+        render_tasks,
+        prim_headers,
+        transforms,
+        z_generator,
+        prim_instances,
+        cmd_buffers,
+    );
+    pass.alpha.build(
+        ctx,
+        gpu_cache,
+        render_tasks,
+        prim_headers,
+        transforms,
+        z_generator,
+        prim_instances,
+        cmd_buffers,
+    );
+
+    pass
+}
+
+/// A rendering-oriented representation of the frame built by the render backend
+/// and presented to the renderer.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct Frame {
+    /// The rectangle to show the frame in, on screen.
+    pub device_rect: DeviceIntRect,
+    pub passes: Vec<RenderPass>,
+
+    pub transform_palette: Vec<TransformData>,
+    pub render_tasks: RenderTaskGraph,
+    pub prim_headers: PrimitiveHeaders,
+
+    /// The GPU cache frame that the contents of Self depend on
+    pub gpu_cache_frame_id: FrameId,
+
+    /// List of textures that we don't know about yet
+    /// from the backend thread. The render thread
+    /// will use a callback to resolve these and
+    /// patch the data structures.
+    pub deferred_resolves: Vec<DeferredResolve>,
+
+    /// True if this frame contains any render tasks
+    /// that write to the texture cache.
+    pub has_texture_cache_tasks: bool,
+
+    /// True if this frame has been drawn by the
+    /// renderer.
+    pub has_been_rendered: bool,
+
+    /// Debugging information to overlay for this frame.
+    pub debug_items: Vec<DebugItem>,
+
+    /// Contains picture cache tiles, and associated information.
+    /// Used by the renderer to composite tiles into the framebuffer,
+    /// or hand them off to an OS compositor.
+    pub composite_state: CompositeState,
+
+    /// Main GPU data buffer constructed (primarily) during the prepare
+    /// pass for primitives that were visible and dirty.
+    pub gpu_buffer: GpuBuffer,
+}
+
+impl Frame {
+    // This frame must be flushed if it writes to the
+    // texture cache, and hasn't been drawn yet.
+    pub fn must_be_drawn(&self) -> bool {
+        self.has_texture_cache_tasks && !self.has_been_rendered
+    }
+
+    // Returns true if this frame doesn't alter what is on screen currently.
+    pub fn is_nop(&self) -> bool {
+        // If there are no off-screen passes, that implies that there are no
+        // picture cache tiles, and no texture cache tasks being updates. If this
+        // is the case, we can consider the frame a nop (higher level checks
+        // test if a composite is needed due to picture cache surfaces moving
+        // or external surfaces being updated).
+        self.passes.is_empty()
+    }
+}
diff --git a/gfx/wr/webrender/src/freelist.rs b/gfx/wr/webrender/src/freelist.rs
new file mode 100644
index 0000000000..aa90aba03c
--- /dev/null
+++ b/gfx/wr/webrender/src/freelist.rs
@@ -0,0 +1,264 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! A generic backing store for caches.
+//!
+//! `FreeList` is a simple vector-backed data structure where each entry in the
+//! vector contains an Option<T>. It maintains an index-based (rather than
+//! pointer-based) free list to efficiently locate the next unused entry. If all
+//! entries are occupied, insertion appends a new element to the vector.
+//!
+//! It also supports both strong and weak handle semantics. There is exactly one
+//! (non-Clonable) strong handle per occupied entry, which must be passed by
+//! value into `free()` to release an entry. Strong handles can produce an
+//! unlimited number of (Clonable) weak handles, which are used to perform
+//! lookups which may fail of the entry has been freed. A per-entry epoch ensures
+//! that weak handle lookups properly fail even if the entry has been freed and
+//! reused.
+//!
+//! TODO(gw): Add an occupied list head, for fast iteration of the occupied list
+//! to implement retain() style functionality.
+
+use std::{fmt, u32};
+use std::marker::PhantomData;
+
+#[derive(Debug, Copy, Clone, MallocSizeOf, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct Epoch(u32);
+
+impl Epoch {
+    /// Mints a new epoch.
+    ///
+    /// We start at 1 so that 0 is always an invalid epoch.
+    fn new() -> Self {
+        Epoch(1)
+    }
+
+    /// Returns an always-invalid epoch.
+    fn invalid() -> Self {
+        Epoch(0)
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct FreeListHandle<M> {
+    index: u32,
+    epoch: Epoch,
+    _marker: PhantomData<M>,
+}
+
+/// More-compact textual representation for debug logging.
+impl<M> fmt::Debug for FreeListHandle<M> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("StrongHandle")
+            .field("index", &self.index)
+            .field("epoch", &self.epoch.0)
+            .finish()
+    }
+}
+
+impl<M> FreeListHandle<M> {
+    pub fn weak(&self) -> WeakFreeListHandle<M> {
+        WeakFreeListHandle {
+            index: self.index,
+            epoch: self.epoch,
+            _marker: PhantomData,
+        }
+    }
+
+    pub fn invalid() -> Self {
+        Self {
+            index: 0,
+            epoch: Epoch::invalid(),
+            _marker: PhantomData,
+        }
+    }
+
+    /// Returns true if this handle and the supplied weak handle reference
+    /// the same underlying location in the freelist.
+    pub fn matches(&self, weak_handle: &WeakFreeListHandle<M>) -> bool {
+        self.index == weak_handle.index &&
+        self.epoch == weak_handle.epoch
+    }
+}
+
+impl<M> Clone for WeakFreeListHandle<M> {
+    fn clone(&self) -> Self {
+        WeakFreeListHandle {
+            index: self.index,
+            epoch: self.epoch,
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<M> PartialEq for WeakFreeListHandle<M> {
+    fn eq(&self, other: &Self) -> bool {
+        self.index == other.index && self.epoch == other.epoch
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct WeakFreeListHandle<M> {
+    index: u32,
+    epoch: Epoch,
+    _marker: PhantomData<M>,
+}
+
+/// More-compact textual representation for debug logging.
+impl<M> fmt::Debug for WeakFreeListHandle<M> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("WeakHandle")
+            .field("index", &self.index)
+            .field("epoch", &self.epoch.0)
+            .finish()
+    }
+}
+
+impl<M> WeakFreeListHandle<M> {
+    /// Returns an always-invalid handle.
+    pub fn invalid() -> Self {
+        Self {
+            index: 0,
+            epoch: Epoch::invalid(),
+            _marker: PhantomData,
+        }
+    }
+}
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct Slot<T> {
+    next: Option<u32>,
+    epoch: Epoch,
+    value: Option<T>,
+}
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct FreeList<T, M> {
+    slots: Vec<Slot<T>>,
+    free_list_head: Option<u32>,
+    active_count: usize,
+    _marker: PhantomData<M>,
+}
+
+impl<T, M> FreeList<T, M> {
+    /// Mints a new `FreeList` with no entries.
+    ///
+    /// Triggers a 1-entry allocation.
+    pub fn new() -> Self {
+        // We guarantee that we never have zero entries by starting with one
+        // free entry. This allows WeakFreeListHandle::invalid() to work
+        // without adding any additional branches.
+        let first_slot = Slot {
+            next: None,
+            epoch: Epoch::new(),
+            value: None,
+        };
+        FreeList {
+            slots: vec![first_slot],
+            free_list_head: Some(0),
+            active_count: 0,
+            _marker: PhantomData,
+        }
+    }
+
+    pub fn clear(&mut self) {
+        self.slots.truncate(1);
+        self.slots[0] = Slot {
+            next: None,
+            epoch: Epoch::new(),
+            value: None,
+        };
+        self.free_list_head = Some(0);
+        self.active_count = 0;
+    }
+
+    #[allow(dead_code)]
+    pub fn get(&self, id: &FreeListHandle<M>) -> &T {
+        self.slots[id.index as usize].value.as_ref().unwrap()
+    }
+
+    #[allow(dead_code)]
+    pub fn get_mut(&mut self, id: &FreeListHandle<M>) -> &mut T {
+        self.slots[id.index as usize].value.as_mut().unwrap()
+    }
+
+    pub fn get_opt(&self, id: &WeakFreeListHandle<M>) -> Option<&T> {
+        let slot = &self.slots[id.index as usize];
+        if slot.epoch == id.epoch {
+            slot.value.as_ref()
+        } else {
+            None
+        }
+    }
+
+    pub fn get_opt_mut(&mut self, id: &WeakFreeListHandle<M>) -> Option<&mut T> {
+        let slot = &mut self.slots[id.index as usize];
+        if slot.epoch == id.epoch {
+            slot.value.as_mut()
+        } else {
+            None
+        }
+    }
+
+    pub fn insert(&mut self, item: T) -> FreeListHandle<M> {
+        self.active_count += 1;
+
+        match self.free_list_head {
+            Some(free_index) => {
+                let slot = &mut self.slots[free_index as usize];
+
+                // Remove from free list.
+                self.free_list_head = slot.next;
+                slot.next = None;
+                slot.value = Some(item);
+
+                FreeListHandle {
+                    index: free_index,
+                    epoch: slot.epoch,
+                    _marker: PhantomData,
+                }
+            }
+            None => {
+                let index = self.slots.len() as u32;
+                let epoch = Epoch::new();
+
+                self.slots.push(Slot {
+                    next: None,
+                    epoch,
+                    value: Some(item),
+                });
+
+                FreeListHandle {
+                    index,
+                    epoch,
+                    _marker: PhantomData,
+                }
+            }
+        }
+    }
+
+    pub fn free(&mut self, id: FreeListHandle<M>) -> T {
+        self.active_count -= 1;
+        let slot = &mut self.slots[id.index as usize];
+        slot.next = self.free_list_head;
+        slot.epoch = Epoch(slot.epoch.0 + 1);
+        self.free_list_head = Some(id.index);
+        slot.value.take().unwrap()
+    }
+
+    #[allow(dead_code)]
+    pub fn len(&self) -> usize {
+        self.active_count
+    }
+}
diff --git a/gfx/wr/webrender/src/glyph_cache.rs b/gfx/wr/webrender/src/glyph_cache.rs
new file mode 100644
index 0000000000..0f895e3212
--- /dev/null
+++ b/gfx/wr/webrender/src/glyph_cache.rs
@@ -0,0 +1,208 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{FontKey, FontInstanceKey, IdNamespace};
+use glyph_rasterizer::{FontInstance, GlyphFormat, GlyphKey, GlyphRasterizer};
+use crate::internal_types::{FrameId, FrameStamp, FastHashMap};
+use crate::resource_cache::ResourceClassCache;
+use std::sync::Arc;
+use crate::texture_cache::{EvictionNotice, TextureCache};
+use crate::texture_cache::TextureCacheHandle;
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone, Debug)]
+pub struct CachedGlyphInfo {
+    pub format: GlyphFormat,
+    pub texture_cache_handle: TextureCacheHandle,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum GlyphCacheEntry {
+    // A glyph that has been successfully rasterized.
+    Cached(CachedGlyphInfo),
+    // A glyph that should not be rasterized (i.e. a space).
+    Blank,
+    // A glyph that has been submitted to the font backend for rasterization,
+    // but is still pending a result.
+    #[allow(dead_code)]
+    Pending,
+}
+
+impl GlyphCacheEntry {
+    fn has_been_evicted(&self, texture_cache: &TextureCache) -> bool {
+        match *self {
+            GlyphCacheEntry::Cached(ref glyph) => {
+                !texture_cache.is_allocated(&glyph.texture_cache_handle)
+            }
+            GlyphCacheEntry::Pending | GlyphCacheEntry::Blank => false,
+        }
+    }
+}
+
+#[allow(dead_code)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone)]
+pub enum CachedGlyphData {
+    Memory(Arc<Vec<u8>>),
+    Gpu,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Default)]
+pub struct GlyphKeyCacheInfo {
+    eviction_notice: EvictionNotice,
+    #[cfg(debug_assertions)]
+    #[allow(dead_code)]
+    #[cfg_attr(feature = "replay", serde(default))]
+    last_frame_used: FrameId,
+}
+
+pub type GlyphKeyCache = ResourceClassCache<GlyphKey, GlyphCacheEntry, GlyphKeyCacheInfo>;
+
+impl GlyphKeyCache {
+    pub fn eviction_notice(&self) -> &EvictionNotice {
+        &self.user_data.eviction_notice
+    }
+
+    fn clear_glyphs(&mut self) {
+        self.clear();
+    }
+
+    pub fn add_glyph(&mut self, key: GlyphKey, value: GlyphCacheEntry) {
+        self.insert(key, value);
+    }
+
+    fn clear_evicted(&mut self, texture_cache: &TextureCache) {
+        if self.eviction_notice().check() {
+            // If there are evictions, filter out any glyphs evicted from the
+            // texture cache from the glyph key cache.
+            self.retain(|_, entry| !entry.has_been_evicted(texture_cache));
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GlyphCache {
+    glyph_key_caches: FastHashMap<FontInstance, GlyphKeyCache>,
+    current_frame: FrameId,
+}
+
+impl GlyphCache {
+    pub fn new() -> Self {
+        GlyphCache {
+            glyph_key_caches: FastHashMap::default(),
+            current_frame: Default::default(),
+        }
+    }
+
+    pub fn insert_glyph_key_cache_for_font(&mut self, font: &FontInstance) -> &mut GlyphKeyCache {
+        let cache = self.glyph_key_caches
+                        .entry(font.clone())
+                        .or_insert_with(GlyphKeyCache::new);
+        #[cfg(debug_assertions)]
+        {
+            cache.user_data.last_frame_used = self.current_frame;
+        }
+        cache
+    }
+
+    pub fn get_glyph_key_cache_for_font_mut(&mut self, font: &FontInstance) -> &mut GlyphKeyCache {
+        self.glyph_key_caches
+            .get_mut(font)
+            .expect("BUG: Unable to find glyph key cache!")
+    }
+
+    pub fn get_glyph_key_cache_for_font(&self, font: &FontInstance) -> &GlyphKeyCache {
+        self.glyph_key_caches
+            .get(font)
+            .expect("BUG: Unable to find glyph key cache!")
+    }
+
+    pub fn clear(&mut self) {
+        for (_, glyph_key_cache) in &mut self.glyph_key_caches {
+            glyph_key_cache.clear()
+        }
+        // We use this in on_memory_pressure where retaining memory allocations
+        // isn't desirable, so we completely remove the hash map instead of clearing it.
+        self.glyph_key_caches = FastHashMap::default();
+    }
+
+    pub fn delete_font_instances(
+        &mut self,
+        instance_keys: &[FontInstanceKey],
+        glyph_rasterizer: &mut GlyphRasterizer,
+    ) {
+        self.glyph_key_caches.retain(|k, cache| {
+            if instance_keys.contains(&k.instance_key) {
+                cache.clear_glyphs();
+                glyph_rasterizer.delete_font_instance(k);
+                false
+            } else {
+                true
+            }
+        });
+    }
+
+    pub fn delete_fonts(&mut self, font_keys: &[FontKey]) {
+        self.glyph_key_caches.retain(|k, cache| {
+            if font_keys.contains(&k.font_key) {
+                cache.clear_glyphs();
+                false
+            } else {
+                true
+            }
+        });
+    }
+
+    pub fn clear_namespace(&mut self, namespace: IdNamespace) {
+        self.glyph_key_caches.retain(|k, cache| {
+            if k.font_key.0 == namespace {
+                cache.clear_glyphs();
+                false
+            } else {
+                true
+            }
+        });
+    }
+
+    /// Clear out evicted entries from glyph key caches.
+    fn clear_evicted(&mut self, texture_cache: &TextureCache) {
+        for cache in self.glyph_key_caches.values_mut() {
+            // Scan for any glyph key caches that have evictions.
+            cache.clear_evicted(texture_cache);
+        }
+    }
+
+    /// If possible, remove entirely any empty glyph key caches.
+    fn clear_empty_caches(&mut self, glyph_rasterizer: &mut GlyphRasterizer) {
+        self.glyph_key_caches.retain(|key, cache| {
+            // Discard the glyph key cache if it has no valid glyphs.
+            if cache.is_empty() {
+                glyph_rasterizer.delete_font_instance(key);
+                false
+            } else {
+                true
+            }
+        });
+    }
+
+    pub fn begin_frame(
+        &mut self,
+        stamp: FrameStamp,
+        texture_cache: &mut TextureCache,
+        glyph_rasterizer: &mut GlyphRasterizer,
+    ) {
+        profile_scope!("begin_frame");
+        self.current_frame = stamp.frame_id();
+        self.clear_evicted(texture_cache);
+        // Clearing evicted glyphs and pruning excess usage might have produced empty caches,
+        // so get rid of them if possible.
+        self.clear_empty_caches(glyph_rasterizer);
+    }
+}
diff --git a/gfx/wr/webrender/src/gpu_cache.rs b/gfx/wr/webrender/src/gpu_cache.rs
new file mode 100644
index 0000000000..2f158aff2c
--- /dev/null
+++ b/gfx/wr/webrender/src/gpu_cache.rs
@@ -0,0 +1,937 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Overview of the GPU cache.
+//!
+//! The main goal of the GPU cache is to allow on-demand
+//! allocation and construction of GPU resources for the
+//! vertex shaders to consume.
+//!
+//! Every item that wants to be stored in the GPU cache
+//! should create a GpuCacheHandle that is used to refer
+//! to a cached GPU resource. Creating a handle is a
+//! cheap operation, that does *not* allocate room in the
+//! cache.
+//!
+//! On any frame when that data is required, the caller
+//! must request that handle, via ```request```. If the
+//! data is not in the cache, the user provided closure
+//! will be invoked to build the data.
+//!
+//! After ```end_frame``` has occurred, callers can
+//! use the ```get_address``` API to get the allocated
+//! address in the GPU cache of a given resource slot
+//! for this frame.
+
+use api::{DebugFlags, DocumentId, PremultipliedColorF};
+#[cfg(test)]
+use api::IdNamespace;
+use api::units::*;
+use euclid::{HomogeneousVector, Box2D};
+use crate::internal_types::{FastHashMap, FastHashSet, FrameStamp, FrameId};
+use crate::profiler::{self, TransactionProfile};
+use crate::prim_store::VECS_PER_SEGMENT;
+use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH;
+use crate::util::VecHelper;
+use std::{u16, u32};
+use std::num::NonZeroU32;
+use std::ops::Add;
+use std::time::{Duration, Instant};
+
+
+/// At the time of this writing, Firefox uses about 15 GPU cache rows on
+/// startup, and then gradually works its way up to the mid-30s with normal
+/// browsing.
+pub const GPU_CACHE_INITIAL_HEIGHT: i32 = 20;
+const NEW_ROWS_PER_RESIZE: i32 = 10;
+
+/// The number of frames an entry can go unused before being evicted.
+const FRAMES_BEFORE_EVICTION: usize = 10;
+
+/// The ratio of utilized blocks to total blocks for which we start the clock
+/// on reclaiming memory.
+const RECLAIM_THRESHOLD: f32 = 0.2;
+
+/// The amount of time utilization must be below the above threshold before we
+/// blow away the cache and rebuild it.
+const RECLAIM_DELAY_S: u64 = 5;
+
+#[derive(Debug, Copy, Clone, Eq, MallocSizeOf, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct Epoch(u32);
+
+impl Epoch {
+    fn next(&mut self) {
+        *self = Epoch(self.0.wrapping_add(1));
+    }
+}
+
+#[derive(Debug, Copy, Clone, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct CacheLocation {
+    block_index: BlockIndex,
+    epoch: Epoch,
+}
+
+/// A single texel in RGBAF32 texture - 16 bytes.
+#[derive(Copy, Clone, Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuBlockData {
+    data: [f32; 4],
+}
+
+impl GpuBlockData {
+    pub const EMPTY: Self = GpuBlockData { data: [0.0; 4] };
+}
+
+/// Conversion helpers for GpuBlockData
+impl From<PremultipliedColorF> for GpuBlockData {
+    fn from(c: PremultipliedColorF) -> Self {
+        GpuBlockData {
+            data: [c.r, c.g, c.b, c.a],
+        }
+    }
+}
+
+impl From<[f32; 4]> for GpuBlockData {
+    fn from(data: [f32; 4]) -> Self {
+        GpuBlockData { data }
+    }
+}
+
+impl<P> From<Box2D<f32, P>> for GpuBlockData {
+    fn from(r: Box2D<f32, P>) -> Self {
+        GpuBlockData {
+            data: [
+                r.min.x,
+                r.min.y,
+                r.max.x,
+                r.max.y,
+            ],
+        }
+    }
+}
+
+impl<P> From<HomogeneousVector<f32, P>> for GpuBlockData {
+    fn from(v: HomogeneousVector<f32, P>) -> Self {
+        GpuBlockData {
+            data: [
+                v.x,
+                v.y,
+                v.z,
+                v.w,
+            ],
+        }
+    }
+}
+
+impl From<TexelRect> for GpuBlockData {
+    fn from(tr: TexelRect) -> Self {
+        GpuBlockData {
+            data: [tr.uv0.x, tr.uv0.y, tr.uv1.x, tr.uv1.y],
+        }
+    }
+}
+
+
+// A handle to a GPU resource.
+#[derive(Debug, Copy, Clone, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuCacheHandle {
+    location: Option<CacheLocation>,
+}
+
+impl GpuCacheHandle {
+    pub fn new() -> Self {
+        GpuCacheHandle { location: None }
+    }
+
+    pub fn as_int(self, gpu_cache: &GpuCache) -> i32 {
+        gpu_cache.get_address(&self).as_int()
+    }
+}
+
+// A unique address in the GPU cache. These are uploaded
+// as part of the primitive instances, to allow the vertex
+// shader to fetch the specific data.
+#[derive(Copy, Debug, Clone, MallocSizeOf, Eq, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuCacheAddress {
+    pub u: u16,
+    pub v: u16,
+}
+
+impl GpuCacheAddress {
+    fn new(u: usize, v: usize) -> Self {
+        GpuCacheAddress {
+            u: u as u16,
+            v: v as u16,
+        }
+    }
+
+    pub const INVALID: GpuCacheAddress = GpuCacheAddress {
+        u: u16::MAX,
+        v: u16::MAX,
+    };
+
+    pub fn as_int(self) -> i32 {
+        // TODO(gw): Temporarily encode GPU Cache addresses as a single int.
+        //           In the future, we can change the PrimitiveInstanceData struct
+        //           to use 2x u16 for the vertex attribute instead of an i32.
+        self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32
+    }
+}
+
+impl Add<usize> for GpuCacheAddress {
+    type Output = GpuCacheAddress;
+
+    fn add(self, other: usize) -> GpuCacheAddress {
+        GpuCacheAddress {
+            u: self.u + other as u16,
+            v: self.v,
+        }
+    }
+}
+
+// An entry in a free-list of blocks in the GPU cache.
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct Block {
+    // The location in the cache of this block.
+    address: GpuCacheAddress,
+    // The current epoch (generation) of this block.
+    epoch: Epoch,
+    // Index of the next free block in the list it
+    // belongs to (either a free-list or the
+    // occupied list).
+    next: Option<BlockIndex>,
+    // The last frame this block was referenced.
+    last_access_time: FrameId,
+}
+
+impl Block {
+    fn new(
+        address: GpuCacheAddress,
+        next: Option<BlockIndex>,
+        frame_id: FrameId,
+        epoch: Epoch,
+    ) -> Self {
+        Block {
+            address,
+            next,
+            last_access_time: frame_id,
+            epoch,
+        }
+    }
+
+    fn advance_epoch(&mut self, max_epoch: &mut Epoch) {
+        self.epoch.next();
+        if max_epoch.0 < self.epoch.0 {
+            max_epoch.0 = self.epoch.0;
+        }
+    }
+
+    /// Creates an invalid dummy block ID.
+    pub const INVALID: Block = Block {
+        address: GpuCacheAddress { u: 0, v: 0 },
+        epoch: Epoch(0),
+        next: None,
+        last_access_time: FrameId::INVALID,
+    };
+}
+
+/// Represents the index of a Block in the block array. We only create such
+/// structs for blocks that represent the start of a chunk.
+///
+/// Because we use Option<BlockIndex> in a lot of places, we use a NonZeroU32
+/// here and avoid ever using the index zero.
+#[derive(Debug, Copy, Clone, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct BlockIndex(NonZeroU32);
+
+impl BlockIndex {
+    fn new(idx: usize) -> Self {
+        debug_assert!(idx <= u32::MAX as usize);
+        BlockIndex(NonZeroU32::new(idx as u32).expect("Index zero forbidden"))
+    }
+
+    fn get(&self) -> usize {
+        self.0.get() as usize
+    }
+}
+
+// A row in the cache texture.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+struct Row {
+    // The fixed size of blocks that this row supports.
+    // Each row becomes a slab allocator for a fixed block size.
+    // This means no dealing with fragmentation within a cache
+    // row as items are allocated and freed.
+    block_count_per_item: usize,
+}
+
+impl Row {
+    fn new(block_count_per_item: usize) -> Self {
+        Row {
+            block_count_per_item,
+        }
+    }
+}
+
+// A list of update operations that can be applied on the cache
+// this frame. The list of updates is created by the render backend
+// during frame construction. It's passed to the render thread
+// where GL commands can be applied.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub enum GpuCacheUpdate {
+    Copy {
+        block_index: usize,
+        block_count: usize,
+        address: GpuCacheAddress,
+    },
+}
+
+/// Command to inform the debug display in the renderer when chunks are allocated
+/// or freed.
+#[derive(MallocSizeOf)]
+pub enum GpuCacheDebugCmd {
+    /// Describes an allocated chunk.
+    Alloc(GpuCacheDebugChunk),
+    /// Describes a freed chunk.
+    Free(GpuCacheAddress),
+}
+
+#[derive(Clone, MallocSizeOf)]
+pub struct GpuCacheDebugChunk {
+    pub address: GpuCacheAddress,
+    pub size: usize,
+}
+
+#[must_use]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct GpuCacheUpdateList {
+    /// The frame current update list was generated from.
+    pub frame_id: FrameId,
+    /// Whether the texture should be cleared before updates
+    /// are applied.
+    pub clear: bool,
+    /// The current height of the texture. The render thread
+    /// should resize the texture if required.
+    pub height: i32,
+    /// List of updates to apply.
+    pub updates: Vec<GpuCacheUpdate>,
+    /// A flat list of GPU blocks that are pending upload
+    /// to GPU memory.
+    pub blocks: Vec<GpuBlockData>,
+    /// Whole state GPU block metadata for debugging.
+    #[cfg_attr(feature = "serde", serde(skip))]
+    pub debug_commands: Vec<GpuCacheDebugCmd>,
+}
+
+// Holds the free lists of fixed size blocks. Mostly
+// just serves to work around the borrow checker.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+struct FreeBlockLists {
+    free_list_1: Option<BlockIndex>,
+    free_list_2: Option<BlockIndex>,
+    free_list_4: Option<BlockIndex>,
+    free_list_8: Option<BlockIndex>,
+    free_list_16: Option<BlockIndex>,
+    free_list_32: Option<BlockIndex>,
+    free_list_64: Option<BlockIndex>,
+    free_list_128: Option<BlockIndex>,
+    free_list_256: Option<BlockIndex>,
+    free_list_341: Option<BlockIndex>,
+    free_list_512: Option<BlockIndex>,
+    free_list_1024: Option<BlockIndex>,
+}
+
+impl FreeBlockLists {
+    fn new() -> Self {
+        FreeBlockLists {
+            free_list_1: None,
+            free_list_2: None,
+            free_list_4: None,
+            free_list_8: None,
+            free_list_16: None,
+            free_list_32: None,
+            free_list_64: None,
+            free_list_128: None,
+            free_list_256: None,
+            free_list_341: None,
+            free_list_512: None,
+            free_list_1024: None,
+        }
+    }
+
+    fn get_actual_block_count_and_free_list(
+        &mut self,
+        block_count: usize,
+    ) -> (usize, &mut Option<BlockIndex>) {
+        // Find the appropriate free list to use based on the block size.
+        //
+        // Note that we cheat a bit with the 341 bucket, since it's not quite
+        // a divisor of 1024, because purecss-francine allocates many 260-block
+        // chunks, and there's no reason we shouldn't pack these three to a row.
+        // This means the allocation statistics will under-report by one block
+        // for each row using 341-block buckets, which is fine.
+        debug_assert_eq!(MAX_VERTEX_TEXTURE_WIDTH, 1024, "Need to update bucketing");
+        match block_count {
+            0 => panic!("Can't allocate zero sized blocks!"),
+            1 => (1, &mut self.free_list_1),
+            2 => (2, &mut self.free_list_2),
+            3..=4 => (4, &mut self.free_list_4),
+            5..=8 => (8, &mut self.free_list_8),
+            9..=16 => (16, &mut self.free_list_16),
+            17..=32 => (32, &mut self.free_list_32),
+            33..=64 => (64, &mut self.free_list_64),
+            65..=128 => (128, &mut self.free_list_128),
+            129..=256 => (256, &mut self.free_list_256),
+            257..=341 => (341, &mut self.free_list_341),
+            342..=512 => (512, &mut self.free_list_512),
+            513..=1024 => (1024, &mut self.free_list_1024),
+            _ => panic!("Can't allocate > MAX_VERTEX_TEXTURE_WIDTH per resource!"),
+        }
+    }
+}
+
+// CPU-side representation of the GPU resource cache texture.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+struct Texture {
+    // Current texture height
+    height: i32,
+    // All blocks that have been created for this texture
+    blocks: Vec<Block>,
+    // Metadata about each allocated row.
+    rows: Vec<Row>,
+    // The base Epoch for this texture.
+    base_epoch: Epoch,
+    // The maximum epoch reached. We track this along with the above so
+    // that we can rebuild the Texture and avoid collisions with handles
+    // allocated for the old texture.
+    max_epoch: Epoch,
+    // Free lists of available blocks for each supported
+    // block size in the texture. These are intrusive
+    // linked lists.
+    free_lists: FreeBlockLists,
+    // Linked list of currently occupied blocks. This
+    // makes it faster to iterate blocks looking for
+    // candidates to be evicted from the cache.
+    occupied_list_heads: FastHashMap<DocumentId, BlockIndex>,
+    // Pending blocks that have been written this frame
+    // and will need to be sent to the GPU.
+    pending_blocks: Vec<GpuBlockData>,
+    // Pending update commands.
+    updates: Vec<GpuCacheUpdate>,
+    // Profile stats
+    allocated_block_count: usize,
+    // The stamp at which we first reached our threshold for reclaiming `GpuCache`
+    // memory, or `None` if the threshold hasn't been reached.
+    #[cfg_attr(feature = "serde", serde(skip))]
+    reached_reclaim_threshold: Option<Instant>,
+    // List of debug commands to be sent to the renderer when the GPU cache
+    // debug display is enabled.
+    #[cfg_attr(feature = "serde", serde(skip))]
+    debug_commands: Vec<GpuCacheDebugCmd>,
+    // The current debug flags for the system.
+    debug_flags: DebugFlags,
+}
+
+impl Texture {
+    fn new(base_epoch: Epoch, debug_flags: DebugFlags) -> Self {
+        // Pre-fill the block array with one invalid block so that we never use
+        // 0 for a BlockIndex. This lets us use NonZeroU32 for BlockIndex, which
+        // saves memory.
+        let blocks = vec![Block::INVALID];
+
+        Texture {
+            height: GPU_CACHE_INITIAL_HEIGHT,
+            blocks,
+            rows: Vec::new(),
+            base_epoch,
+            max_epoch: base_epoch,
+            free_lists: FreeBlockLists::new(),
+            pending_blocks: Vec::new(),
+            updates: Vec::new(),
+            occupied_list_heads: FastHashMap::default(),
+            allocated_block_count: 0,
+            reached_reclaim_threshold: None,
+            debug_commands: Vec::new(),
+            debug_flags,
+        }
+    }
+
+    // Push new data into the cache. The ```pending_block_index``` field represents
+    // where the data was pushed into the texture ```pending_blocks``` array.
+    // Return the allocated address for this data.
+    fn push_data(
+        &mut self,
+        pending_block_index: Option<usize>,
+        block_count: usize,
+        frame_stamp: FrameStamp
+    ) -> CacheLocation {
+        debug_assert!(frame_stamp.is_valid());
+        // Find the appropriate free list to use based on the block size.
+        let (alloc_size, free_list) = self.free_lists
+            .get_actual_block_count_and_free_list(block_count);
+
+        // See if we need a new row (if free-list has nothing available)
+        if free_list.is_none() {
+            if self.rows.len() as i32 == self.height {
+                self.height += NEW_ROWS_PER_RESIZE;
+            }
+
+            // Create a new row.
+            let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / alloc_size;
+            let row_index = self.rows.len();
+            self.rows.push(Row::new(alloc_size));
+
+            // Create a ```Block``` for each possible allocation address
+            // in this row, and link it in to the free-list for this
+            // block size.
+            let mut prev_block_index = None;
+            for i in 0 .. items_per_row {
+                let address = GpuCacheAddress::new(i * alloc_size, row_index);
+                let block_index = BlockIndex::new(self.blocks.len());
+                let block = Block::new(address, prev_block_index, frame_stamp.frame_id(), self.base_epoch);
+                self.blocks.push(block);
+                prev_block_index = Some(block_index);
+            }
+
+            *free_list = prev_block_index;
+        }
+
+        // Given the code above, it's now guaranteed that there is a block
+        // available in the appropriate free-list. Pull a block from the
+        // head of the list.
+        let free_block_index = free_list.take().unwrap();
+        let block = &mut self.blocks[free_block_index.get()];
+        *free_list = block.next;
+
+        // Add the block to the occupied linked list.
+        block.next = self.occupied_list_heads.get(&frame_stamp.document_id()).cloned();
+        block.last_access_time = frame_stamp.frame_id();
+        self.occupied_list_heads.insert(frame_stamp.document_id(), free_block_index);
+        self.allocated_block_count += alloc_size;
+
+        if let Some(pending_block_index) = pending_block_index {
+            // Add this update to the pending list of blocks that need
+            // to be updated on the GPU.
+            self.updates.push(GpuCacheUpdate::Copy {
+                block_index: pending_block_index,
+                block_count,
+                address: block.address,
+            });
+        }
+
+        // If we're using the debug display, communicate the allocation to the
+        // renderer thread. Note that we do this regardless of whether or not
+        // pending_block_index is None (if it is, the renderer thread will fill
+        // in the data via a deferred resolve, but the block is still considered
+        // allocated).
+        if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
+            self.debug_commands.push(GpuCacheDebugCmd::Alloc(GpuCacheDebugChunk {
+                address: block.address,
+                size: block_count,
+            }));
+        }
+
+        CacheLocation {
+            block_index: free_block_index,
+            epoch: block.epoch,
+        }
+    }
+
+    // Run through the list of occupied cache blocks and evict
+    // any old blocks that haven't been referenced for a while.
+    fn evict_old_blocks(&mut self, frame_stamp: FrameStamp) {
+        debug_assert!(frame_stamp.is_valid());
+        // Prune any old items from the list to make room.
+        // Traverse the occupied linked list and see
+        // which items have not been used for a long time.
+        let mut current_block = self.occupied_list_heads.get(&frame_stamp.document_id()).map(|x| *x);
+        let mut prev_block: Option<BlockIndex> = None;
+
+        while let Some(index) = current_block {
+            let (next_block, should_unlink) = {
+                let block = &mut self.blocks[index.get()];
+
+                let next_block = block.next;
+                let mut should_unlink = false;
+
+                // If this resource has not been used in the last
+                // few frames, free it from the texture and mark
+                // as empty.
+                if block.last_access_time + FRAMES_BEFORE_EVICTION < frame_stamp.frame_id() {
+                    should_unlink = true;
+
+                    // Get the row metadata from the address.
+                    let row = &mut self.rows[block.address.v as usize];
+
+                    // Use the row metadata to determine which free-list
+                    // this block belongs to.
+                    let (_, free_list) = self.free_lists
+                        .get_actual_block_count_and_free_list(row.block_count_per_item);
+
+                    block.advance_epoch(&mut self.max_epoch);
+                    block.next = *free_list;
+                    *free_list = Some(index);
+
+                    self.allocated_block_count -= row.block_count_per_item;
+
+                    if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
+                        let cmd = GpuCacheDebugCmd::Free(block.address);
+                        self.debug_commands.push(cmd);
+                    }
+                };
+
+                (next_block, should_unlink)
+            };
+
+            // If the block was released, we will need to remove it
+            // from the occupied linked list.
+            if should_unlink {
+                match prev_block {
+                    Some(prev_block) => {
+                        self.blocks[prev_block.get()].next = next_block;
+                    }
+                    None => {
+                        match next_block {
+                            Some(next_block) => {
+                                self.occupied_list_heads.insert(frame_stamp.document_id(), next_block);
+                            }
+                            None => {
+                                self.occupied_list_heads.remove(&frame_stamp.document_id());
+                            }
+                        }
+                    }
+                }
+            } else {
+                prev_block = current_block;
+            }
+
+            current_block = next_block;
+        }
+    }
+
+    /// Returns the ratio of utilized blocks.
+    fn utilization(&self) -> f32 {
+        let total_blocks = self.rows.len() * MAX_VERTEX_TEXTURE_WIDTH;
+        debug_assert!(total_blocks > 0);
+        let ratio = self.allocated_block_count as f32 / total_blocks as f32;
+        debug_assert!(0.0 <= ratio && ratio <= 1.0, "Bad ratio: {}", ratio);
+        ratio
+    }
+}
+
+
+/// A wrapper object for GPU data requests,
+/// works as a container that can only grow.
+#[must_use]
+pub struct GpuDataRequest<'a> {
+    //TODO: remove this, see
+    // https://bugzilla.mozilla.org/show_bug.cgi?id=1690546
+    #[allow(dead_code)]
+    handle: &'a mut GpuCacheHandle,
+    frame_stamp: FrameStamp,
+    start_index: usize,
+    max_block_count: usize,
+    texture: &'a mut Texture,
+}
+
+impl<'a> GpuDataRequest<'a> {
+    pub fn push<B>(&mut self, block: B)
+    where
+        B: Into<GpuBlockData>,
+    {
+        self.texture.pending_blocks.push(block.into());
+    }
+
+    // Write the GPU cache data for an individual segment.
+    pub fn write_segment(
+        &mut self,
+        local_rect: LayoutRect,
+        extra_data: [f32; 4],
+    ) {
+        let _ = VECS_PER_SEGMENT;
+        self.push(local_rect);
+        self.push(extra_data);
+    }
+
+    pub fn current_used_block_num(&self) -> usize {
+        self.texture.pending_blocks.len() - self.start_index
+    }
+}
+
+impl<'a> Drop for GpuDataRequest<'a> {
+    fn drop(&mut self) {
+        // Push the data to the texture pending updates list.
+        let block_count = self.current_used_block_num();
+        debug_assert!(block_count <= self.max_block_count);
+
+        let location = self.texture
+            .push_data(Some(self.start_index), block_count, self.frame_stamp);
+        self.handle.location = Some(location);
+    }
+}
+
+
+/// The main LRU cache interface.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct GpuCache {
+    /// Current FrameId.
+    now: FrameStamp,
+    /// CPU-side texture allocator.
+    texture: Texture,
+    /// Number of blocks requested this frame that don't
+    /// need to be re-uploaded.
+    saved_block_count: usize,
+    /// The current debug flags for the system.
+    debug_flags: DebugFlags,
+    /// Whether there is a pending clear to send with the
+    /// next update.
+    pending_clear: bool,
+    /// Indicates that prepare_for_frames has been called for this group of frames.
+    /// Used for sanity checks.
+    prepared_for_frames: bool,
+    /// This indicates that we performed a cleanup operation which requires all
+    /// documents to build a frame.
+    requires_frame_build: bool,
+    /// The set of documents which have had frames built in this update. Used for
+    /// sanity checks.
+    document_frames_to_build: FastHashSet<DocumentId>,
+}
+
+impl GpuCache {
+    pub fn new() -> Self {
+        let debug_flags = DebugFlags::empty();
+        GpuCache {
+            now: FrameStamp::INVALID,
+            texture: Texture::new(Epoch(0), debug_flags),
+            saved_block_count: 0,
+            debug_flags,
+            pending_clear: false,
+            prepared_for_frames: false,
+            requires_frame_build: false,
+            document_frames_to_build: FastHashSet::default(),
+        }
+    }
+
+    /// Creates a GpuCache and sets it up with a valid `FrameStamp`, which
+    /// is useful for avoiding panics when instantiating the `GpuCache`
+    /// directly from unit test code.
+    #[cfg(test)]
+    pub fn new_for_testing() -> Self {
+        let mut cache = Self::new();
+        let mut now = FrameStamp::first(DocumentId::new(IdNamespace(1), 1));
+        now.advance();
+        cache.prepared_for_frames = true;
+        cache.begin_frame(now);
+        cache
+    }
+
+    /// Drops everything in the GPU cache. Must not be called once gpu cache entries
+    /// for the next frame have already been requested.
+    pub fn clear(&mut self) {
+        assert!(self.texture.updates.is_empty(), "Clearing with pending updates");
+        let mut next_base_epoch = self.texture.max_epoch;
+        next_base_epoch.next();
+        self.texture = Texture::new(next_base_epoch, self.debug_flags);
+        self.saved_block_count = 0;
+        self.pending_clear = true;
+        self.requires_frame_build = true;
+    }
+
+    pub fn requires_frame_build(&self) -> bool {
+        self.requires_frame_build
+    }
+
+    pub fn prepare_for_frames(&mut self) {
+        self.prepared_for_frames = true;
+        if self.should_reclaim_memory() {
+            self.clear();
+            debug_assert!(self.document_frames_to_build.is_empty());
+            for &document_id in self.texture.occupied_list_heads.keys() {
+                self.document_frames_to_build.insert(document_id);
+            }
+        }
+    }
+
+    pub fn bookkeep_after_frames(&mut self) {
+        assert!(self.document_frames_to_build.is_empty());
+        assert!(self.prepared_for_frames);
+        self.requires_frame_build = false;
+        self.prepared_for_frames = false;
+    }
+
+    /// Begin a new frame.
+    pub fn begin_frame(&mut self, stamp: FrameStamp) {
+        debug_assert!(self.texture.pending_blocks.is_empty());
+        assert!(self.prepared_for_frames);
+        profile_scope!("begin_frame");
+        self.now = stamp;
+        self.texture.evict_old_blocks(self.now);
+        self.saved_block_count = 0;
+    }
+
+    // Invalidate a (possibly) existing block in the cache.
+    // This means the next call to request() for this location
+    // will rebuild the data and upload it to the GPU.
+    pub fn invalidate(&mut self, handle: &GpuCacheHandle) {
+        if let Some(ref location) = handle.location {
+            // don't invalidate blocks that are already re-assigned
+            if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
+                if block.epoch == location.epoch {
+                    block.advance_epoch(&mut self.texture.max_epoch);
+                }
+            }
+        }
+    }
+
+    /// Request a resource be added to the cache. If the resource
+    /// is already in the cache, `None` will be returned.
+    pub fn request<'a>(&'a mut self, handle: &'a mut GpuCacheHandle) -> Option<GpuDataRequest<'a>> {
+        let mut max_block_count = MAX_VERTEX_TEXTURE_WIDTH;
+        // Check if the allocation for this handle is still valid.
+        if let Some(ref location) = handle.location {
+            if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
+                if block.epoch == location.epoch {
+                    max_block_count = self.texture.rows[block.address.v as usize].block_count_per_item;
+                    if block.last_access_time != self.now.frame_id() {
+                        // Mark last access time to avoid evicting this block.
+                        block.last_access_time = self.now.frame_id();
+                        self.saved_block_count += max_block_count;
+                    }
+                    return None;
+                }
+            }
+        }
+
+        debug_assert!(self.now.is_valid());
+        Some(GpuDataRequest {
+            handle,
+            frame_stamp: self.now,
+            start_index: self.texture.pending_blocks.len(),
+            texture: &mut self.texture,
+            max_block_count,
+        })
+    }
+
+    // Push an array of data blocks to be uploaded to the GPU
+    // unconditionally for this frame. The cache handle will
+    // assert if the caller tries to retrieve the address
+    // of this handle on a subsequent frame. This is typically
+    // used for uploading data that changes every frame, and
+    // therefore makes no sense to try and cache.
+    pub fn push_per_frame_blocks(&mut self, blocks: &[GpuBlockData]) -> GpuCacheHandle {
+        let start_index = self.texture.pending_blocks.len();
+        self.texture.pending_blocks.extend_from_slice(blocks);
+        let location = self.texture
+            .push_data(Some(start_index), blocks.len(), self.now);
+        GpuCacheHandle {
+            location: Some(location),
+        }
+    }
+
+    // Reserve space in the cache for per-frame blocks that
+    // will be resolved by the render thread via the
+    // external image callback.
+    pub fn push_deferred_per_frame_blocks(&mut self, block_count: usize) -> GpuCacheHandle {
+        let location = self.texture.push_data(None, block_count, self.now);
+        GpuCacheHandle {
+            location: Some(location),
+        }
+    }
+
+    /// End the frame. Return the list of updates to apply to the
+    /// device specific cache texture.
+    pub fn end_frame(
+        &mut self,
+        profile: &mut TransactionProfile,
+    ) -> FrameStamp {
+        profile_scope!("end_frame");
+        profile.set(profiler::GPU_CACHE_ROWS_TOTAL, self.texture.rows.len());
+        profile.set(profiler::GPU_CACHE_BLOCKS_TOTAL, self.texture.allocated_block_count);
+        profile.set(profiler::GPU_CACHE_BLOCKS_SAVED, self.saved_block_count);
+
+        let reached_threshold =
+            self.texture.rows.len() > (GPU_CACHE_INITIAL_HEIGHT as usize) &&
+            self.texture.utilization() < RECLAIM_THRESHOLD;
+        if reached_threshold {
+            self.texture.reached_reclaim_threshold.get_or_insert_with(Instant::now);
+        } else {
+            self.texture.reached_reclaim_threshold = None;
+        }
+
+        self.document_frames_to_build.remove(&self.now.document_id());
+        self.now
+    }
+
+    /// Returns true if utilization has been low enough for long enough that we
+    /// should blow the cache away and rebuild it.
+    pub fn should_reclaim_memory(&self) -> bool {
+        self.texture.reached_reclaim_threshold
+            .map_or(false, |t| t.elapsed() > Duration::from_secs(RECLAIM_DELAY_S))
+    }
+
+    /// Extract the pending updates from the cache.
+    pub fn extract_updates(&mut self) -> GpuCacheUpdateList {
+        let clear = self.pending_clear;
+        self.pending_clear = false;
+        GpuCacheUpdateList {
+            frame_id: self.now.frame_id(),
+            clear,
+            height: self.texture.height,
+            debug_commands: self.texture.debug_commands.take_and_preallocate(),
+            updates: self.texture.updates.take_and_preallocate(),
+            blocks: self.texture.pending_blocks.take_and_preallocate(),
+        }
+    }
+
+    /// Sets the current debug flags for the system.
+    pub fn set_debug_flags(&mut self, flags: DebugFlags) {
+        self.debug_flags = flags;
+        self.texture.debug_flags = flags;
+    }
+
+    /// Get the actual GPU address in the texture for a given slot ID.
+    /// It's assumed at this point that the given slot has been requested
+    /// and built for this frame. Attempting to get the address for a
+    /// freed or pending slot will panic!
+    pub fn get_address(&self, id: &GpuCacheHandle) -> GpuCacheAddress {
+        let location = id.location.expect("handle not requested or allocated!");
+        let block = &self.texture.blocks[location.block_index.get()];
+        debug_assert_eq!(block.epoch, location.epoch);
+        debug_assert_eq!(block.last_access_time, self.now.frame_id());
+        block.address
+    }
+}
+
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_struct_sizes() {
+    use std::mem;
+    // We can end up with a lot of blocks stored in the global vec, and keeping
+    // them small helps reduce memory overhead.
+    assert_eq!(mem::size_of::<Block>(), 24, "Block size changed");
+}
diff --git a/gfx/wr/webrender/src/gpu_types.rs b/gfx/wr/webrender/src/gpu_types.rs
new file mode 100644
index 0000000000..e044c9fe1a
--- /dev/null
+++ b/gfx/wr/webrender/src/gpu_types.rs
@@ -0,0 +1,904 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{AlphaType, PremultipliedColorF, YuvFormat, YuvRangedColorSpace};
+use api::units::*;
+use crate::composite::CompositeFeatures;
+use crate::segment::EdgeAaSegmentMask;
+use crate::spatial_tree::{SpatialTree, SpatialNodeIndex};
+use crate::gpu_cache::{GpuCacheAddress, GpuDataRequest};
+use crate::internal_types::FastHashMap;
+use crate::prim_store::ClipData;
+use crate::render_task::RenderTaskAddress;
+use crate::renderer::ShaderColorMode;
+use std::i32;
+use crate::util::{TransformedRectKind, MatrixHelpers};
+use glyph_rasterizer::SubpixelDirection;
+use crate::util::{ScaleOffset, pack_as_float};
+
+// Contains type that must exactly match the same structures declared in GLSL.
+
+pub const VECS_PER_TRANSFORM: usize = 8;
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ZBufferId(pub i32);
+
+impl ZBufferId {
+    pub fn invalid() -> Self {
+        ZBufferId(i32::MAX)
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ZBufferIdGenerator {
+    next: i32,
+    max_depth_ids: i32,
+}
+
+impl ZBufferIdGenerator {
+    pub fn new(max_depth_ids: i32) -> Self {
+        ZBufferIdGenerator {
+            next: 0,
+            max_depth_ids,
+        }
+    }
+
+    pub fn next(&mut self) -> ZBufferId {
+        debug_assert!(self.next < self.max_depth_ids);
+        let id = ZBufferId(self.next);
+        self.next += 1;
+        id
+    }
+}
+
+#[derive(Clone, Debug)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CopyInstance {
+    pub src_rect: DeviceRect,
+    pub dst_rect: DeviceRect,
+    pub dst_texture_size: DeviceSize,
+}
+
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub enum RasterizationSpace {
+    Local = 0,
+    Screen = 1,
+}
+
+#[derive(Debug, Copy, Clone, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub enum BoxShadowStretchMode {
+    Stretch = 0,
+    Simple = 1,
+}
+
+#[repr(i32)]
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum BlurDirection {
+    Horizontal = 0,
+    Vertical,
+}
+
+#[derive(Clone, Debug)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BlurInstance {
+    pub task_address: RenderTaskAddress,
+    pub src_task_address: RenderTaskAddress,
+    pub blur_direction: BlurDirection,
+}
+
+#[derive(Clone, Debug)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ScalingInstance {
+    pub target_rect: DeviceRect,
+    pub source_rect: DeviceRect,
+}
+
+#[derive(Clone, Debug)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SvgFilterInstance {
+    pub task_address: RenderTaskAddress,
+    pub input_1_task_address: RenderTaskAddress,
+    pub input_2_task_address: RenderTaskAddress,
+    pub kind: u16,
+    pub input_count: u16,
+    pub generic_int: u16,
+    pub extra_data_address: GpuCacheAddress,
+}
+
+#[derive(Copy, Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum BorderSegment {
+    TopLeft,
+    TopRight,
+    BottomRight,
+    BottomLeft,
+    Left,
+    Top,
+    Right,
+    Bottom,
+}
+
+#[derive(Debug, Clone)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BorderInstance {
+    pub task_origin: DevicePoint,
+    pub local_rect: DeviceRect,
+    pub color0: PremultipliedColorF,
+    pub color1: PremultipliedColorF,
+    pub flags: i32,
+    pub widths: DeviceSize,
+    pub radius: DeviceSize,
+    pub clip_params: [f32; 8],
+}
+
+#[derive(Copy, Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub struct ClipMaskInstanceCommon {
+    pub sub_rect: DeviceRect,
+    pub task_origin: DevicePoint,
+    pub screen_origin: DevicePoint,
+    pub device_pixel_scale: f32,
+    pub clip_transform_id: TransformPaletteId,
+    pub prim_transform_id: TransformPaletteId,
+}
+
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub struct ClipMaskInstanceImage {
+    pub common: ClipMaskInstanceCommon,
+    pub tile_rect: LayoutRect,
+    pub resource_address: GpuCacheAddress,
+    pub local_rect: LayoutRect,
+}
+
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub struct ClipMaskInstanceRect {
+    pub common: ClipMaskInstanceCommon,
+    pub local_pos: LayoutPoint,
+    pub clip_data: ClipData,
+}
+
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub struct BoxShadowData {
+    pub src_rect_size: LayoutSize,
+    pub clip_mode: i32,
+    pub stretch_mode_x: i32,
+    pub stretch_mode_y: i32,
+    pub dest_rect: LayoutRect,
+}
+
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub struct ClipMaskInstanceBoxShadow {
+    pub common: ClipMaskInstanceCommon,
+    pub resource_address: GpuCacheAddress,
+    pub shadow_data: BoxShadowData,
+}
+
+/// A clipping primitive drawn into the clipping mask.
+/// Could be an image or a rectangle, which defines the
+/// way `address` is treated.
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub struct ClipMaskInstance {
+    pub clip_transform_id: TransformPaletteId,
+    pub prim_transform_id: TransformPaletteId,
+    pub clip_data_address: GpuCacheAddress,
+    pub resource_address: GpuCacheAddress,
+    pub local_pos: LayoutPoint,
+    pub tile_rect: LayoutRect,
+    pub sub_rect: DeviceRect,
+    pub task_origin: DevicePoint,
+    pub screen_origin: DevicePoint,
+    pub device_pixel_scale: f32,
+}
+
+// 16 bytes per instance should be enough for anyone!
+#[repr(C)]
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveInstanceData {
+    data: [i32; 4],
+}
+
+/// Specifies that an RGB CompositeInstance's UV coordinates are normalized.
+const UV_TYPE_NORMALIZED: u32 = 0;
+/// Specifies that an RGB CompositeInstance's UV coordinates are not normalized.
+const UV_TYPE_UNNORMALIZED: u32 = 1;
+
+/// A GPU-friendly representation of the `ScaleOffset` type
+#[derive(Clone, Debug)]
+#[repr(C)]
+pub struct CompositorTransform {
+    pub sx: f32,
+    pub sy: f32,
+    pub tx: f32,
+    pub ty: f32,
+}
+
+impl CompositorTransform {
+    pub fn identity() -> Self {
+        CompositorTransform {
+            sx: 1.0,
+            sy: 1.0,
+            tx: 0.0,
+            ty: 0.0,
+        }
+    }
+}
+
+impl From<ScaleOffset> for CompositorTransform {
+    fn from(scale_offset: ScaleOffset) -> Self {
+        CompositorTransform {
+            sx: scale_offset.scale.x,
+            sy: scale_offset.scale.y,
+            tx: scale_offset.offset.x,
+            ty: scale_offset.offset.y,
+        }
+    }
+}
+
+/// Vertex format for picture cache composite shader.
+/// When editing the members, update desc::COMPOSITE
+/// so its list of instance_attributes matches:
+#[derive(Clone, Debug)]
+#[repr(C)]
+pub struct CompositeInstance {
+    // Picture space destination rectangle of surface
+    rect: PictureRect,
+    // Device space destination clip rect for this surface
+    clip_rect: DeviceRect,
+    // Color for solid color tiles, white otherwise
+    color: PremultipliedColorF,
+
+    // Packed into a single vec4 (aParams)
+    z_id: f32,
+    color_space_or_uv_type: f32, // YuvColorSpace for YUV;
+                                 // UV coordinate space for RGB
+    yuv_format: f32,            // YuvFormat
+    yuv_channel_bit_depth: f32,
+
+    // UV rectangles (pixel space) for color / yuv texture planes
+    uv_rects: [TexelRect; 3],
+
+    // A 2d scale + offset transform for the rect
+    transform: CompositorTransform,
+}
+
+impl CompositeInstance {
+    pub fn new(
+        rect: PictureRect,
+        clip_rect: DeviceRect,
+        color: PremultipliedColorF,
+        z_id: ZBufferId,
+        transform: CompositorTransform,
+    ) -> Self {
+        let uv = TexelRect::new(0.0, 0.0, 1.0, 1.0);
+        CompositeInstance {
+            rect,
+            clip_rect,
+            color,
+            z_id: z_id.0 as f32,
+            color_space_or_uv_type: pack_as_float(UV_TYPE_NORMALIZED),
+            yuv_format: 0.0,
+            yuv_channel_bit_depth: 0.0,
+            uv_rects: [uv, uv, uv],
+            transform,
+        }
+    }
+
+    pub fn new_rgb(
+        rect: PictureRect,
+        clip_rect: DeviceRect,
+        color: PremultipliedColorF,
+        z_id: ZBufferId,
+        uv_rect: TexelRect,
+        transform: CompositorTransform,
+    ) -> Self {
+        CompositeInstance {
+            rect,
+            clip_rect,
+            color,
+            z_id: z_id.0 as f32,
+            color_space_or_uv_type: pack_as_float(UV_TYPE_UNNORMALIZED),
+            yuv_format: 0.0,
+            yuv_channel_bit_depth: 0.0,
+            uv_rects: [uv_rect, uv_rect, uv_rect],
+            transform,
+        }
+    }
+
+    pub fn new_yuv(
+        rect: PictureRect,
+        clip_rect: DeviceRect,
+        z_id: ZBufferId,
+        yuv_color_space: YuvRangedColorSpace,
+        yuv_format: YuvFormat,
+        yuv_channel_bit_depth: u32,
+        uv_rects: [TexelRect; 3],
+        transform: CompositorTransform,
+    ) -> Self {
+        CompositeInstance {
+            rect,
+            clip_rect,
+            color: PremultipliedColorF::WHITE,
+            z_id: z_id.0 as f32,
+            color_space_or_uv_type: pack_as_float(yuv_color_space as u32),
+            yuv_format: pack_as_float(yuv_format as u32),
+            yuv_channel_bit_depth: pack_as_float(yuv_channel_bit_depth),
+            uv_rects,
+            transform,
+        }
+    }
+
+    // Returns the CompositeFeatures that can be used to composite
+    // this RGB instance.
+    pub fn get_rgb_features(&self) -> CompositeFeatures {
+        let mut features = CompositeFeatures::empty();
+
+        // If the UV rect covers the entire texture then we can avoid UV clamping.
+        // We should try harder to determine this for unnormalized UVs too.
+        if self.color_space_or_uv_type == pack_as_float(UV_TYPE_NORMALIZED)
+            && self.uv_rects[0] == TexelRect::new(0.0, 0.0, 1.0, 1.0)
+        {
+            features |= CompositeFeatures::NO_UV_CLAMP;
+        }
+
+        if self.color == PremultipliedColorF::WHITE {
+            features |= CompositeFeatures::NO_COLOR_MODULATION
+        }
+
+        features
+    }
+}
+
+/// Vertex format for issuing colored quads.
+#[derive(Debug, Clone)]
+#[repr(C)]
+pub struct ClearInstance {
+    pub rect: [f32; 4],
+    pub color: [f32; 4],
+}
+
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveHeaderIndex(pub i32);
+
+#[derive(Debug)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveHeaders {
+    // The integer-type headers for a primitive.
+    pub headers_int: Vec<PrimitiveHeaderI>,
+    // The float-type headers for a primitive.
+    pub headers_float: Vec<PrimitiveHeaderF>,
+}
+
+impl PrimitiveHeaders {
+    pub fn new() -> PrimitiveHeaders {
+        PrimitiveHeaders {
+            headers_int: Vec::new(),
+            headers_float: Vec::new(),
+        }
+    }
+
+    // Add a new primitive header.
+    pub fn push(
+        &mut self,
+        prim_header: &PrimitiveHeader,
+        z: ZBufferId,
+        user_data: [i32; 4],
+    ) -> PrimitiveHeaderIndex {
+        debug_assert_eq!(self.headers_int.len(), self.headers_float.len());
+        let id = self.headers_float.len();
+
+        self.headers_float.push(PrimitiveHeaderF {
+            local_rect: prim_header.local_rect,
+            local_clip_rect: prim_header.local_clip_rect,
+        });
+
+        self.headers_int.push(PrimitiveHeaderI {
+            z,
+            unused: 0,
+            specific_prim_address: prim_header.specific_prim_address.as_int(),
+            transform_id: prim_header.transform_id,
+            user_data,
+        });
+
+        PrimitiveHeaderIndex(id as i32)
+    }
+}
+
+// This is a convenience type used to make it easier to pass
+// the common parts around during batching.
+#[derive(Debug)]
+pub struct PrimitiveHeader {
+    pub local_rect: LayoutRect,
+    pub local_clip_rect: LayoutRect,
+    pub specific_prim_address: GpuCacheAddress,
+    pub transform_id: TransformPaletteId,
+}
+
+// f32 parts of a primitive header
+#[derive(Debug)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveHeaderF {
+    pub local_rect: LayoutRect,
+    pub local_clip_rect: LayoutRect,
+}
+
+// i32 parts of a primitive header
+// TODO(gw): Compress parts of these down to u16
+#[derive(Debug)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveHeaderI {
+    pub z: ZBufferId,
+    pub specific_prim_address: i32,
+    pub transform_id: TransformPaletteId,
+    pub unused: i32,                    // To ensure required 16 byte alignment of vertex textures
+    pub user_data: [i32; 4],
+}
+
+pub struct GlyphInstance {
+    pub prim_header_index: PrimitiveHeaderIndex,
+}
+
+impl GlyphInstance {
+    pub fn new(
+        prim_header_index: PrimitiveHeaderIndex,
+    ) -> Self {
+        GlyphInstance {
+            prim_header_index,
+        }
+    }
+
+    // TODO(gw): Some of these fields can be moved to the primitive
+    //           header since they are constant, and some can be
+    //           compressed to a smaller size.
+    pub fn build(&self,
+        render_task: RenderTaskAddress,
+        clip_task: RenderTaskAddress,
+        subpx_dir: SubpixelDirection,
+        glyph_index_in_text_run: i32,
+        glyph_uv_rect: GpuCacheAddress,
+        color_mode: ShaderColorMode,
+    ) -> PrimitiveInstanceData {
+        PrimitiveInstanceData {
+            data: [
+                self.prim_header_index.0 as i32,
+                ((render_task.0 as i32) << 16)
+                | clip_task.0 as i32,
+                (subpx_dir as u32 as i32) << 24
+                | (color_mode as u32 as i32) << 16
+                | glyph_index_in_text_run,
+                glyph_uv_rect.as_int(),
+            ],
+        }
+    }
+}
+
+pub struct SplitCompositeInstance {
+    pub prim_header_index: PrimitiveHeaderIndex,
+    pub polygons_address: i32,
+    pub z: ZBufferId,
+    pub render_task_address: RenderTaskAddress,
+}
+
+impl From<SplitCompositeInstance> for PrimitiveInstanceData {
+    fn from(instance: SplitCompositeInstance) -> Self {
+        PrimitiveInstanceData {
+            data: [
+                instance.prim_header_index.0,
+                instance.polygons_address,
+                instance.z.0,
+                instance.render_task_address.0 as i32,
+            ],
+        }
+    }
+}
+
+bitflags! {
+    // Note: This can use up to 12 bits due to how it will
+    // be packed in the instance data.
+
+    /// Flags that define how the common brush shader
+    /// code should process this instance.
+    #[cfg_attr(feature = "capture", derive(Serialize))]
+    #[cfg_attr(feature = "replay", derive(Deserialize))]
+    #[derive(MallocSizeOf)]
+    pub struct BrushFlags: u16 {
+        /// Apply perspective interpolation to UVs
+        const PERSPECTIVE_INTERPOLATION = 1;
+        /// Do interpolation relative to segment rect,
+        /// rather than primitive rect.
+        const SEGMENT_RELATIVE = 2;
+        /// Repeat UVs horizontally.
+        const SEGMENT_REPEAT_X = 4;
+        /// Repeat UVs vertically.
+        const SEGMENT_REPEAT_Y = 8;
+        /// Horizontally follow border-image-repeat: round.
+        const SEGMENT_REPEAT_X_ROUND = 16;
+        /// Vertically follow border-image-repeat: round.
+        const SEGMENT_REPEAT_Y_ROUND = 32;
+        /// Middle (fill) area of a border-image-repeat.
+        const SEGMENT_NINEPATCH_MIDDLE = 64;
+        /// The extra segment data is a texel rect.
+        const SEGMENT_TEXEL_RECT = 128;
+        /// Whether to force the anti-aliasing when the primitive
+        /// is axis-aligned.
+        const FORCE_AA = 256;
+    }
+}
+
+/// Convenience structure to encode into PrimitiveInstanceData.
+pub struct BrushInstance {
+    pub prim_header_index: PrimitiveHeaderIndex,
+    pub render_task_address: RenderTaskAddress,
+    pub clip_task_address: RenderTaskAddress,
+    pub segment_index: i32,
+    pub edge_flags: EdgeAaSegmentMask,
+    pub brush_flags: BrushFlags,
+    pub resource_address: i32,
+}
+
+impl From<BrushInstance> for PrimitiveInstanceData {
+    fn from(instance: BrushInstance) -> Self {
+        PrimitiveInstanceData {
+            data: [
+                instance.prim_header_index.0,
+                ((instance.render_task_address.0 as i32) << 16)
+                | instance.clip_task_address.0 as i32,
+                instance.segment_index
+                | ((instance.brush_flags.bits() as i32) << 16)
+                | ((instance.edge_flags.bits() as i32) << 28),
+                instance.resource_address,
+            ]
+        }
+    }
+}
+
+/// Convenience structure to encode into the image brush's user data.
+#[derive(Copy, Clone, Debug)]
+pub struct ImageBrushData {
+    pub color_mode: ShaderColorMode,
+    pub alpha_type: AlphaType,
+    pub raster_space: RasterizationSpace,
+    pub opacity: f32,
+}
+
+impl ImageBrushData {
+    #[inline]
+    pub fn encode(&self) -> [i32; 4] {
+        [
+            self.color_mode as i32 | ((self.alpha_type as i32) << 16),
+            self.raster_space as i32,
+            get_shader_opacity(self.opacity),
+            0,
+        ]
+    }
+}
+
+// Represents the information about a transform palette
+// entry that is passed to shaders. It includes an index
+// into the transform palette, and a set of flags. The
+// only flag currently used determines whether the
+// transform is axis-aligned (and this should have
+// pixel snapping applied).
+#[derive(Copy, Debug, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub struct TransformPaletteId(pub u32);
+
+impl TransformPaletteId {
+    /// Identity transform ID.
+    pub const IDENTITY: Self = TransformPaletteId(0);
+
+    /// Extract the transform kind from the id.
+    pub fn transform_kind(&self) -> TransformedRectKind {
+        if (self.0 >> 24) == 0 {
+            TransformedRectKind::AxisAligned
+        } else {
+            TransformedRectKind::Complex
+        }
+    }
+
+    /// Override the kind of transform stored in this id. This can be useful in
+    /// cases where we don't want shaders to consider certain transforms axis-
+    /// aligned (i.e. perspective warp) even though we may still want to for the
+    /// general case.
+    pub fn override_transform_kind(&self, kind: TransformedRectKind) -> Self {
+        TransformPaletteId((self.0 & 0xFFFFFFu32) | ((kind as u32) << 24))
+    }
+}
+
+/// The GPU data payload for a transform palette entry.
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub struct TransformData {
+    transform: LayoutToPictureTransform,
+    inv_transform: PictureToLayoutTransform,
+}
+
+impl TransformData {
+    fn invalid() -> Self {
+        TransformData {
+            transform: LayoutToPictureTransform::identity(),
+            inv_transform: PictureToLayoutTransform::identity(),
+        }
+    }
+}
+
+// Extra data stored about each transform palette entry.
+#[derive(Clone)]
+pub struct TransformMetadata {
+    transform_kind: TransformedRectKind,
+}
+
+impl TransformMetadata {
+    pub fn invalid() -> Self {
+        TransformMetadata {
+            transform_kind: TransformedRectKind::AxisAligned,
+        }
+    }
+}
+
+#[derive(Debug, Hash, Eq, PartialEq)]
+struct RelativeTransformKey {
+    from_index: SpatialNodeIndex,
+    to_index: SpatialNodeIndex,
+}
+
+// Stores a contiguous list of TransformData structs, that
+// are ready for upload to the GPU.
+// TODO(gw): For now, this only stores the complete local
+//           to world transform for each spatial node. In
+//           the future, the transform palette will support
+//           specifying a coordinate system that the transform
+//           should be relative to.
+pub struct TransformPalette {
+    transforms: Vec<TransformData>,
+    metadata: Vec<TransformMetadata>,
+    map: FastHashMap<RelativeTransformKey, usize>,
+}
+
+impl TransformPalette {
+    pub fn new(
+        count: usize,
+    ) -> Self {
+        let _ = VECS_PER_TRANSFORM;
+
+        let mut transforms = Vec::with_capacity(count);
+        let mut metadata = Vec::with_capacity(count);
+
+        transforms.push(TransformData::invalid());
+        metadata.push(TransformMetadata::invalid());
+
+        TransformPalette {
+            transforms,
+            metadata,
+            map: FastHashMap::default(),
+        }
+    }
+
+    pub fn finish(self) -> Vec<TransformData> {
+        self.transforms
+    }
+
+    fn get_index(
+        &mut self,
+        child_index: SpatialNodeIndex,
+        parent_index: SpatialNodeIndex,
+        spatial_tree: &SpatialTree,
+    ) -> usize {
+        if child_index == parent_index {
+            0
+        } else {
+            let key = RelativeTransformKey {
+                from_index: child_index,
+                to_index: parent_index,
+            };
+
+            let metadata = &mut self.metadata;
+            let transforms = &mut self.transforms;
+
+            *self.map
+                .entry(key)
+                .or_insert_with(|| {
+                    let transform = spatial_tree.get_relative_transform(
+                        child_index,
+                        parent_index,
+                    )
+                    .into_transform()
+                    .with_destination::<PicturePixel>();
+
+                    register_transform(
+                        metadata,
+                        transforms,
+                        transform,
+                    )
+                })
+        }
+    }
+
+    // Get a transform palette id for the given spatial node.
+    // TODO(gw): In the future, it will be possible to specify
+    //           a coordinate system id here, to allow retrieving
+    //           transforms in the local space of a given spatial node.
+    pub fn get_id(
+        &mut self,
+        from_index: SpatialNodeIndex,
+        to_index: SpatialNodeIndex,
+        spatial_tree: &SpatialTree,
+    ) -> TransformPaletteId {
+        let index = self.get_index(
+            from_index,
+            to_index,
+            spatial_tree,
+        );
+        let transform_kind = self.metadata[index].transform_kind as u32;
+        TransformPaletteId(
+            (index as u32) |
+            (transform_kind << 24)
+        )
+    }
+
+    pub fn get_custom(
+        &mut self,
+        transform: LayoutToPictureTransform,
+    ) -> TransformPaletteId {
+        let index = register_transform(
+            &mut self.metadata,
+            &mut self.transforms,
+            transform,
+        );
+
+        let transform_kind = self.metadata[index].transform_kind as u32;
+        TransformPaletteId(
+            (index as u32) |
+            (transform_kind << 24)
+        )
+    }
+}
+
+// Texture cache resources can be either a simple rect, or define
+// a polygon within a rect by specifying a UV coordinate for each
+// corner. This is useful for rendering screen-space rasterized
+// off-screen surfaces.
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum UvRectKind {
+    // The 2d bounds of the texture cache entry define the
+    // valid UV space for this texture cache entry.
+    Rect,
+    // The four vertices below define a quad within
+    // the texture cache entry rect. The shader can
+    // use a bilerp() to correctly interpolate a
+    // UV coord in the vertex shader.
+    Quad {
+        top_left: DeviceHomogeneousVector,
+        top_right: DeviceHomogeneousVector,
+        bottom_left: DeviceHomogeneousVector,
+        bottom_right: DeviceHomogeneousVector,
+    },
+}
+
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ImageSource {
+    pub p0: DevicePoint,
+    pub p1: DevicePoint,
+    // TODO: It appears that only glyphs make use of user_data (to store glyph offset
+    // and scale).
+    // Perhaps we should separate the two so we don't have to push an empty unused vec4
+    // for all image sources.
+    pub user_data: [f32; 4],
+    pub uv_rect_kind: UvRectKind,
+}
+
+impl ImageSource {
+    pub fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
+        // see fetch_image_resource in GLSL
+        // has to be VECS_PER_IMAGE_RESOURCE vectors
+        request.push([
+            self.p0.x,
+            self.p0.y,
+            self.p1.x,
+            self.p1.y,
+        ]);
+        request.push(self.user_data);
+
+        // If this is a polygon uv kind, then upload the four vertices.
+        if let UvRectKind::Quad { top_left, top_right, bottom_left, bottom_right } = self.uv_rect_kind {
+            // see fetch_image_resource_extra in GLSL
+            //Note: we really need only 3 components per point here: X, Y, and W
+            request.push(top_left);
+            request.push(top_right);
+            request.push(bottom_left);
+            request.push(bottom_right);
+        }
+    }
+}
+
+// Set the local -> world transform for a given spatial
+// node in the transform palette.
+fn register_transform(
+    metadatas: &mut Vec<TransformMetadata>,
+    transforms: &mut Vec<TransformData>,
+    transform: LayoutToPictureTransform,
+) -> usize {
+    // TODO: refactor the calling code to not even try
+    // registering a non-invertible transform.
+    let inv_transform = transform
+        .inverse()
+        .unwrap_or_else(PictureToLayoutTransform::identity);
+
+    let metadata = TransformMetadata {
+        transform_kind: transform.transform_kind()
+    };
+    let data = TransformData {
+        transform,
+        inv_transform,
+    };
+
+    let index = transforms.len();
+    metadatas.push(metadata);
+    transforms.push(data);
+
+    index
+}
+
+pub fn get_shader_opacity(opacity: f32) -> i32 {
+    (opacity * 65535.0).round() as i32
+}
diff --git a/gfx/wr/webrender/src/hit_test.rs b/gfx/wr/webrender/src/hit_test.rs
new file mode 100644
index 0000000000..34a7e7e404
--- /dev/null
+++ b/gfx/wr/webrender/src/hit_test.rs
@@ -0,0 +1,420 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{BorderRadius, ClipMode, HitTestResultItem, HitTestResult, ItemTag, PrimitiveFlags};
+use api::{PipelineId, ApiHitTester};
+use api::units::*;
+use crate::clip::{rounded_rectangle_contains_point, ClipNodeId, ClipTreeBuilder};
+use crate::clip::{polygon_contains_point, ClipItemKey, ClipItemKeyKind};
+use crate::prim_store::PolygonKey;
+use crate::scene_builder_thread::Interners;
+use crate::spatial_tree::{SpatialNodeIndex, SpatialTree, get_external_scroll_offset};
+use crate::internal_types::{FastHashMap, LayoutPrimitiveInfo};
+use std::sync::{Arc, Mutex};
+use crate::util::{LayoutToWorldFastTransform};
+
+pub struct SharedHitTester {
+    // We don't really need a mutex here. We could do with some sort of
+    // atomic-atomic-ref-counted pointer (an Arc which would let the pointer
+    // be swapped atomically like an AtomicPtr).
+    // In practive this shouldn't cause performance issues, though.
+    hit_tester: Mutex<Arc<HitTester>>,
+}
+
+impl SharedHitTester {
+    pub fn new() -> Self {
+        SharedHitTester {
+            hit_tester: Mutex::new(Arc::new(HitTester::empty())),
+        }
+    }
+
+    pub fn get_ref(&self) -> Arc<HitTester> {
+        let guard = self.hit_tester.lock().unwrap();
+        Arc::clone(&*guard)
+    }
+
+    pub(crate) fn update(&self, new_hit_tester: Arc<HitTester>) {
+        let mut guard = self.hit_tester.lock().unwrap();
+        *guard = new_hit_tester;
+    }
+}
+
+impl ApiHitTester for SharedHitTester {
+    fn hit_test(&self,
+        point: WorldPoint,
+    ) -> HitTestResult {
+        self.get_ref().hit_test(HitTest::new(point))
+    }
+}
+
+/// A copy of important spatial node data to use during hit testing. This a copy of
+/// data from the SpatialTree that will persist as a new frame is under construction,
+/// allowing hit tests consistent with the currently rendered frame.
+#[derive(MallocSizeOf)]
+struct HitTestSpatialNode {
+    /// The pipeline id of this node.
+    pipeline_id: PipelineId,
+
+    /// World transform for content transformed by this node.
+    world_content_transform: LayoutToWorldFastTransform,
+
+    /// World viewport transform for content transformed by this node.
+    world_viewport_transform: LayoutToWorldFastTransform,
+
+    /// The accumulated external scroll offset for this spatial node.
+    external_scroll_offset: LayoutVector2D,
+}
+
+#[derive(MallocSizeOf)]
+struct HitTestClipNode {
+    /// A particular point must be inside all of these regions to be considered clipped in
+    /// for the purposes of a hit test.
+    region: HitTestRegion,
+    /// The positioning node for this clip
+    spatial_node_index: SpatialNodeIndex,
+    /// Parent clip node
+    parent: ClipNodeId,
+}
+
+impl HitTestClipNode {
+    fn new(
+        item: &ClipItemKey,
+        interners: &Interners,
+        parent: ClipNodeId,
+    ) -> Self {
+        let region = match item.kind {
+            ClipItemKeyKind::Rectangle(rect, mode) => {
+                HitTestRegion::Rectangle(rect.into(), mode)
+            }
+            ClipItemKeyKind::RoundedRectangle(rect, radius, mode) => {
+                HitTestRegion::RoundedRectangle(rect.into(), radius.into(), mode)
+            }
+            ClipItemKeyKind::ImageMask(rect, _, polygon_handle) => {
+                if let Some(handle) = polygon_handle {
+                    // Retrieve the polygon data from the interner.
+                    let polygon = &interners.polygon[handle];
+                    HitTestRegion::Polygon(rect.into(), *polygon)
+                } else {
+                    HitTestRegion::Rectangle(rect.into(), ClipMode::Clip)
+                }
+            }
+            ClipItemKeyKind::BoxShadow(..) => HitTestRegion::Invalid,
+        };
+
+        HitTestClipNode {
+            region,
+            spatial_node_index: item.spatial_node_index,
+            parent,
+        }
+    }
+}
+
+#[derive(Clone, MallocSizeOf)]
+struct HitTestingItem {
+    rect: LayoutRect,
+    tag: ItemTag,
+    animation_id: u64,
+    is_backface_visible: bool,
+    spatial_node_index: SpatialNodeIndex,
+    clip_node_id: ClipNodeId,
+}
+
+impl HitTestingItem {
+    fn new(
+        tag: ItemTag,
+        animation_id: u64,
+        info: &LayoutPrimitiveInfo,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+    ) -> HitTestingItem {
+        HitTestingItem {
+            rect: info.rect,
+            tag,
+            animation_id,
+            is_backface_visible: info.flags.contains(PrimitiveFlags::IS_BACKFACE_VISIBLE),
+            spatial_node_index,
+            clip_node_id,
+        }
+    }
+}
+
+/// Statistics about allocation sizes of current hit tester,
+/// used to pre-allocate size of the next hit tester.
+pub struct HitTestingSceneStats {
+    pub clip_nodes_count: usize,
+    pub items_count: usize,
+}
+
+impl HitTestingSceneStats {
+    pub fn empty() -> Self {
+        HitTestingSceneStats {
+            clip_nodes_count: 0,
+            items_count: 0,
+        }
+    }
+}
+
+#[derive(MallocSizeOf, Debug, Copy, Clone)]
+pub struct ClipNodeIndex(u32);
+
+/// Defines the immutable part of a hit tester for a given scene.
+/// The hit tester is recreated each time a frame is built, since
+/// it relies on the current values of the spatial tree.
+/// However, the clip chain and item definitions don't change,
+/// so they are created once per scene, and shared between
+/// hit tester instances via Arc.
+#[derive(MallocSizeOf)]
+pub struct HitTestingScene {
+    clip_nodes: FastHashMap<ClipNodeId, HitTestClipNode>,
+
+    /// List of hit testing primitives.
+    items: Vec<HitTestingItem>,
+}
+
+impl HitTestingScene {
+    /// Construct a new hit testing scene, pre-allocating to size
+    /// provided by previous scene stats.
+    pub fn new(stats: &HitTestingSceneStats) -> Self {
+        HitTestingScene {
+            clip_nodes: FastHashMap::default(),
+            items: Vec::with_capacity(stats.items_count),
+        }
+    }
+
+    /// Get stats about the current scene allocation sizes.
+    pub fn get_stats(&self) -> HitTestingSceneStats {
+        HitTestingSceneStats {
+            clip_nodes_count: 0,
+            items_count: self.items.len(),
+        }
+    }
+
+    fn add_clip_node(
+        &mut self,
+        clip_node_id: ClipNodeId,
+        clip_tree_builder: &ClipTreeBuilder,
+        interners: &Interners,
+    ) {
+        if clip_node_id == ClipNodeId::NONE {
+            return;
+        }
+
+        if !self.clip_nodes.contains_key(&clip_node_id) {
+            let src_clip_node = clip_tree_builder.get_node(clip_node_id);
+            let clip_item = &interners.clip[src_clip_node.handle];
+
+            let clip_node = HitTestClipNode::new(
+                &clip_item.key,
+                interners,
+                src_clip_node.parent,
+            );
+
+            self.clip_nodes.insert(clip_node_id, clip_node);
+
+            self.add_clip_node(
+                src_clip_node.parent,
+                clip_tree_builder,
+                interners,
+            );
+        }
+    }
+
+    /// Add a hit testing primitive.
+    pub fn add_item(
+        &mut self,
+        tag: ItemTag,
+        anim_id: u64,
+        info: &LayoutPrimitiveInfo,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+        clip_tree_builder: &ClipTreeBuilder,
+        interners: &Interners,
+    ) {
+        self.add_clip_node(
+            clip_node_id,
+            clip_tree_builder,
+            interners,
+        );
+
+        let item = HitTestingItem::new(
+            tag,
+            anim_id,
+            info,
+            spatial_node_index,
+            clip_node_id,
+        );
+
+        self.items.push(item);
+    }
+}
+
+#[derive(MallocSizeOf)]
+enum HitTestRegion {
+    Invalid,
+    Rectangle(LayoutRect, ClipMode),
+    RoundedRectangle(LayoutRect, BorderRadius, ClipMode),
+    Polygon(LayoutRect, PolygonKey),
+}
+
+impl HitTestRegion {
+    fn contains(&self, point: &LayoutPoint) -> bool {
+        match *self {
+            HitTestRegion::Rectangle(ref rectangle, ClipMode::Clip) =>
+                rectangle.contains(*point),
+            HitTestRegion::Rectangle(ref rectangle, ClipMode::ClipOut) =>
+                !rectangle.contains(*point),
+            HitTestRegion::RoundedRectangle(rect, radii, ClipMode::Clip) =>
+                rounded_rectangle_contains_point(point, &rect, &radii),
+            HitTestRegion::RoundedRectangle(rect, radii, ClipMode::ClipOut) =>
+                !rounded_rectangle_contains_point(point, &rect, &radii),
+            HitTestRegion::Polygon(rect, polygon) =>
+                polygon_contains_point(point, &rect, &polygon),
+            HitTestRegion::Invalid => true,
+        }
+    }
+}
+
+#[derive(MallocSizeOf)]
+pub struct HitTester {
+    #[ignore_malloc_size_of = "Arc"]
+    scene: Arc<HitTestingScene>,
+    spatial_nodes: FastHashMap<SpatialNodeIndex, HitTestSpatialNode>,
+}
+
+impl HitTester {
+    pub fn empty() -> Self {
+        HitTester {
+            scene: Arc::new(HitTestingScene::new(&HitTestingSceneStats::empty())),
+            spatial_nodes: FastHashMap::default(),
+        }
+    }
+
+    pub fn new(
+        scene: Arc<HitTestingScene>,
+        spatial_tree: &SpatialTree,
+    ) -> HitTester {
+        let mut hit_tester = HitTester {
+            scene,
+            spatial_nodes: FastHashMap::default(),
+        };
+        hit_tester.read_spatial_tree(spatial_tree);
+        hit_tester
+    }
+
+    fn read_spatial_tree(
+        &mut self,
+        spatial_tree: &SpatialTree,
+    ) {
+        self.spatial_nodes.clear();
+        self.spatial_nodes.reserve(spatial_tree.spatial_node_count());
+
+        spatial_tree.visit_nodes(|index, node| {
+            //TODO: avoid inverting more than necessary:
+            //  - if the coordinate system is non-invertible, no need to try any of these concrete transforms
+            //  - if there are other places where inversion is needed, let's not repeat the step
+
+            self.spatial_nodes.insert(index, HitTestSpatialNode {
+                pipeline_id: node.pipeline_id,
+                world_content_transform: spatial_tree
+                    .get_world_transform(index)
+                    .into_fast_transform(),
+                world_viewport_transform: spatial_tree
+                    .get_world_viewport_transform(index)
+                    .into_fast_transform(),
+                external_scroll_offset: get_external_scroll_offset(spatial_tree, index),
+            });
+        });
+    }
+
+    pub fn hit_test(&self, test: HitTest) -> HitTestResult {
+        let mut result = HitTestResult::default();
+
+        let mut current_spatial_node_index = SpatialNodeIndex::INVALID;
+        let mut point_in_layer = None;
+
+        // For each hit test primitive
+        for item in self.scene.items.iter().rev() {
+            let scroll_node = &self.spatial_nodes[&item.spatial_node_index];
+            let pipeline_id = scroll_node.pipeline_id;
+
+            // Update the cached point in layer space, if the spatial node
+            // changed since last primitive.
+            if item.spatial_node_index != current_spatial_node_index {
+                point_in_layer = scroll_node
+                    .world_content_transform
+                    .inverse()
+                    .and_then(|inverted| inverted.project_point2d(test.point));
+                current_spatial_node_index = item.spatial_node_index;
+            }
+
+            // Only consider hit tests on transformable layers.
+            let point_in_layer = match point_in_layer {
+                Some(p) => p,
+                None => continue,
+            };
+
+            // If the item's rect or clip rect don't contain this point, it's
+            // not a valid hit.
+            if !item.rect.contains(point_in_layer) {
+                continue;
+            }
+
+            // See if any of the clips for this primitive cull out the item.
+            let mut current_clip_node_id = item.clip_node_id;
+            let mut is_valid = true;
+
+            while current_clip_node_id != ClipNodeId::NONE {
+                let clip_node = &self.scene.clip_nodes[&current_clip_node_id];
+
+                let transform = self
+                    .spatial_nodes[&clip_node.spatial_node_index]
+                    .world_content_transform;
+                if let Some(transformed_point) = transform
+                    .inverse()
+                    .and_then(|inverted| inverted.project_point2d(test.point))
+                {
+                    if !clip_node.region.contains(&transformed_point) {
+                        is_valid = false;
+                        break;
+                    }
+                }
+
+                current_clip_node_id = clip_node.parent;
+            }
+
+            if !is_valid {
+                continue;
+            }
+
+            // Don't hit items with backface-visibility:hidden if they are facing the back.
+            if !item.is_backface_visible && scroll_node.world_content_transform.is_backface_visible() {
+                continue;
+            }
+
+            result.items.push(HitTestResultItem {
+                pipeline: pipeline_id,
+                tag: item.tag,
+                animation_id: item.animation_id,
+            });
+        }
+
+        result.items.dedup();
+        result
+    }
+}
+
+#[derive(MallocSizeOf)]
+pub struct HitTest {
+    point: WorldPoint,
+}
+
+impl HitTest {
+    pub fn new(
+        point: WorldPoint,
+    ) -> HitTest {
+        HitTest {
+            point,
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/image_source.rs b/gfx/wr/webrender/src/image_source.rs
new file mode 100644
index 0000000000..c2223ab6f1
--- /dev/null
+++ b/gfx/wr/webrender/src/image_source.rs
@@ -0,0 +1,93 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! This module contains the logic to obtain a primitive's source texture and uv rect.
+//!
+//! Currently this is a somewhat involved process because the code grew into having ad-hoc
+//! ways to store this information depending on how the image data is produced. The goal
+//! is for any textured primitive to be able to read from any source (texture cache, render
+//! tasks, etc.) without primitive-specific code.
+
+use crate::api::ExternalImageType;
+use crate::api::units::*;
+use crate::gpu_cache::GpuCache;
+use crate::prim_store::DeferredResolve;
+use crate::renderer::BLOCKS_PER_UV_RECT;
+use crate::render_task_cache::RenderTaskCacheEntryHandle;
+use crate::resource_cache::{ResourceCache, ImageRequest, CacheItem};
+use crate::internal_types::{TextureSource, DeferredResolveIndex};
+
+/// Resolve a resource cache's imagre request into a texture cache item.
+pub fn resolve_image(
+    request: ImageRequest,
+    resource_cache: &ResourceCache,
+    gpu_cache: &mut GpuCache,
+    deferred_resolves: &mut Vec<DeferredResolve>,
+) -> CacheItem {
+    match resource_cache.get_image_properties(request.key) {
+        Some(image_properties) => {
+            // Check if an external image that needs to be resolved
+            // by the render thread.
+            match image_properties.external_image {
+                Some(external_image) => {
+                    // This is an external texture - we will add it to
+                    // the deferred resolves list to be patched by
+                    // the render thread...
+                    let cache_handle = gpu_cache.push_deferred_per_frame_blocks(BLOCKS_PER_UV_RECT);
+
+                    let deferred_resolve_index = DeferredResolveIndex(deferred_resolves.len() as u32);
+
+                    let image_buffer_kind = match external_image.image_type {
+                        ExternalImageType::TextureHandle(target) => {
+                            target
+                        }
+                        ExternalImageType::Buffer => {
+                            // The ExternalImageType::Buffer should be handled by resource_cache.
+                            // It should go through the non-external case.
+                            panic!("Unexpected non-texture handle type");
+                        }
+                    };
+
+                    let cache_item = CacheItem {
+                        texture_id: TextureSource::External(deferred_resolve_index, image_buffer_kind),
+                        uv_rect_handle: cache_handle,
+                        uv_rect: DeviceIntRect::from_size(
+                            image_properties.descriptor.size,
+                        ),
+                        user_data: [0.0; 4],
+                    };
+
+                    deferred_resolves.push(DeferredResolve {
+                        image_properties,
+                        address: gpu_cache.get_address(&cache_handle),
+                        rendering: request.rendering,
+                    });
+
+                    cache_item
+                }
+                None => {
+                    if let Ok(cache_item) = resource_cache.get_cached_image(request) {
+                        cache_item
+                    } else {
+                        // There is no usable texture entry for the image key. Just return an invalid texture here.
+                        CacheItem::invalid()
+                    }
+                }
+            }
+        }
+        None => {
+            CacheItem::invalid()
+        }
+    }
+}
+
+pub fn resolve_cached_render_task(
+    handle: &RenderTaskCacheEntryHandle,
+    resource_cache: &ResourceCache,
+) -> CacheItem {
+    let rt_cache_entry = resource_cache
+        .get_cached_render_task(&handle);
+
+    resource_cache.get_texture_cache_item(&rt_cache_entry.handle)
+}
diff --git a/gfx/wr/webrender/src/image_tiling.rs b/gfx/wr/webrender/src/image_tiling.rs
new file mode 100644
index 0000000000..e2fb3b05b9
--- /dev/null
+++ b/gfx/wr/webrender/src/image_tiling.rs
@@ -0,0 +1,823 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::api::TileSize;
+use crate::api::units::*;
+use crate::segment::EdgeAaSegmentMask;
+use euclid::{point2, size2};
+use std::i32;
+use std::ops::Range;
+
+/// If repetitions are far enough apart that only one is within
+/// the primitive rect, then we can simplify the parameters and
+/// treat the primitive as not repeated.
+/// This can let us avoid unnecessary work later to handle some
+/// of the parameters.
+pub fn simplify_repeated_primitive(
+    stretch_size: &LayoutSize,
+    tile_spacing: &mut LayoutSize,
+    prim_rect: &mut LayoutRect,
+) {
+    let stride = *stretch_size + *tile_spacing;
+
+    if stride.width >= prim_rect.width() {
+        tile_spacing.width = 0.0;
+        prim_rect.max.x = f32::min(prim_rect.min.x + stretch_size.width, prim_rect.max.x);
+    }
+    if stride.height >= prim_rect.height() {
+        tile_spacing.height = 0.0;
+        prim_rect.max.y = f32::min(prim_rect.min.y + stretch_size.height, prim_rect.max.y);
+    }
+}
+
+pub struct Repetition {
+    pub origin: LayoutPoint,
+    pub edge_flags: EdgeAaSegmentMask,
+}
+
+pub struct RepetitionIterator {
+    current_x: i32,
+    x_count: i32,
+    current_y: i32,
+    y_count: i32,
+    row_flags: EdgeAaSegmentMask,
+    current_origin: LayoutPoint,
+    initial_origin: LayoutPoint,
+    stride: LayoutSize,
+}
+
+impl RepetitionIterator {
+    pub fn num_repetitions(&self) -> usize {
+        (self.y_count * self.x_count) as usize
+    }
+}
+
+impl Iterator for RepetitionIterator {
+    type Item = Repetition;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.current_x == self.x_count {
+            self.current_y += 1;
+            if self.current_y >= self.y_count {
+                return None;
+            }
+            self.current_x = 0;
+
+            self.row_flags = EdgeAaSegmentMask::empty();
+            if self.current_y == self.y_count - 1 {
+                self.row_flags |= EdgeAaSegmentMask::BOTTOM;
+            }
+
+            self.current_origin.x = self.initial_origin.x;
+            self.current_origin.y += self.stride.height;
+        }
+
+        let mut edge_flags = self.row_flags;
+        if self.current_x == 0 {
+            edge_flags |= EdgeAaSegmentMask::LEFT;
+        }
+
+        if self.current_x == self.x_count - 1 {
+            edge_flags |= EdgeAaSegmentMask::RIGHT;
+        }
+
+        let repetition = Repetition {
+            origin: self.current_origin,
+            edge_flags,
+        };
+
+        self.current_origin.x += self.stride.width;
+        self.current_x += 1;
+
+        Some(repetition)
+    }
+}
+
+pub fn repetitions(
+    prim_rect: &LayoutRect,
+    visible_rect: &LayoutRect,
+    stride: LayoutSize,
+) -> RepetitionIterator {
+    let visible_rect = match prim_rect.intersection(&visible_rect) {
+        Some(rect) => rect,
+        None => {
+            return RepetitionIterator {
+                current_origin: LayoutPoint::zero(),
+                initial_origin: LayoutPoint::zero(),
+                current_x: 0,
+                current_y: 0,
+                x_count: 0,
+                y_count: 0,
+                stride,
+                row_flags: EdgeAaSegmentMask::empty(),
+            }
+        }
+    };
+
+    assert!(stride.width > 0.0);
+    assert!(stride.height > 0.0);
+
+    let nx = if visible_rect.min.x > prim_rect.min.x {
+        f32::floor((visible_rect.min.x - prim_rect.min.x) / stride.width)
+    } else {
+        0.0
+    };
+
+    let ny = if visible_rect.min.y > prim_rect.min.y {
+        f32::floor((visible_rect.min.y - prim_rect.min.y) / stride.height)
+    } else {
+        0.0
+    };
+
+    let x0 = prim_rect.min.x + nx * stride.width;
+    let y0 = prim_rect.min.y + ny * stride.height;
+
+    let x_most = visible_rect.max.x;
+    let y_most = visible_rect.max.y;
+
+    let x_count = f32::ceil((x_most - x0) / stride.width) as i32;
+    let y_count = f32::ceil((y_most - y0) / stride.height) as i32;
+
+    let mut row_flags = EdgeAaSegmentMask::TOP;
+    if y_count == 1 {
+        row_flags |= EdgeAaSegmentMask::BOTTOM;
+    }
+
+    RepetitionIterator {
+        current_origin: LayoutPoint::new(x0, y0),
+        initial_origin: LayoutPoint::new(x0, y0),
+        current_x: 0,
+        current_y: 0,
+        x_count,
+        y_count,
+        row_flags,
+        stride,
+    }
+}
+
+#[derive(Debug)]
+pub struct Tile {
+    pub rect: LayoutRect,
+    pub offset: TileOffset,
+    pub edge_flags: EdgeAaSegmentMask,
+}
+
+#[derive(Debug)]
+pub struct TileIteratorExtent {
+    /// Range of visible tiles to iterate over in number of tiles.
+    tile_range: Range<i32>,
+    /// Range of tiles of the full image including tiles that are culled out.
+    image_tiles: Range<i32>,
+    /// Size of the first tile in layout space.
+    first_tile_layout_size: f32,
+    /// Size of the last tile in layout space.
+    last_tile_layout_size: f32,
+    /// Position of blob point (0, 0) in layout space.
+    layout_tiling_origin: f32,
+    /// Position of the top-left corner of the primitive rect in layout space.
+    layout_prim_start: f32,
+}
+
+#[derive(Debug)]
+pub struct TileIterator {
+    current_tile: TileOffset,
+    x: TileIteratorExtent,
+    y: TileIteratorExtent,
+    regular_tile_size: LayoutSize,
+}
+
+impl Iterator for TileIterator {
+    type Item = Tile;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // If we reach the end of a row, reset to the beginning of the next row.
+        if self.current_tile.x >= self.x.tile_range.end {
+            self.current_tile.y += 1;
+            self.current_tile.x = self.x.tile_range.start;
+        }
+
+        // Stop iterating if we reach the last tile. We may start here if there
+        // were no tiles to iterate over.
+        if self.current_tile.x >= self.x.tile_range.end || self.current_tile.y >= self.y.tile_range.end {
+            return None;
+        }
+
+        let tile_offset = self.current_tile;
+
+        let mut segment_rect = LayoutRect::from_origin_and_size(
+            LayoutPoint::new(
+                self.x.layout_tiling_origin + tile_offset.x as f32 * self.regular_tile_size.width,
+                self.y.layout_tiling_origin + tile_offset.y as f32 * self.regular_tile_size.height,
+            ),
+            self.regular_tile_size,
+        );
+
+        let mut edge_flags = EdgeAaSegmentMask::empty();
+
+        if tile_offset.x == self.x.image_tiles.start {
+            edge_flags |= EdgeAaSegmentMask::LEFT;
+            segment_rect.min.x = self.x.layout_prim_start;
+            // TODO(nical) we may not need to do this.
+            segment_rect.max.x = segment_rect.min.x + self.x.first_tile_layout_size;
+        }
+        if tile_offset.x == self.x.image_tiles.end - 1 {
+            edge_flags |= EdgeAaSegmentMask::RIGHT;
+            segment_rect.max.x = segment_rect.min.x + self.x.last_tile_layout_size;
+        }
+
+        if tile_offset.y == self.y.image_tiles.start {
+            segment_rect.min.y = self.y.layout_prim_start;
+            segment_rect.max.y = segment_rect.min.y + self.y.first_tile_layout_size;
+            edge_flags |= EdgeAaSegmentMask::TOP;
+        }
+        if tile_offset.y == self.y.image_tiles.end - 1 {
+            segment_rect.max.y = segment_rect.min.y + self.y.last_tile_layout_size;
+            edge_flags |= EdgeAaSegmentMask::BOTTOM;
+        }
+
+        assert!(tile_offset.y < self.y.tile_range.end);
+        let tile = Tile {
+            rect: segment_rect,
+            offset: tile_offset,
+            edge_flags,
+        };
+
+        self.current_tile.x += 1;
+
+        Some(tile)
+    }
+}
+
+pub fn tiles(
+    prim_rect: &LayoutRect,
+    visible_rect: &LayoutRect,
+    image_rect: &DeviceIntRect,
+    device_tile_size: i32,
+) -> TileIterator {
+    // The image resource is tiled. We have to generate an image primitive
+    // for each tile.
+    // We need to do this because the image is broken up into smaller tiles in the texture
+    // cache and the image shader is not able to work with this type of sparse representation.
+
+    // The tiling logic works as follows:
+    //
+    //  +-#################-+  -+
+    //  | #//|    |    |//# |   | image size
+    //  | #//|    |    |//# |   |
+    //  +-#--+----+----+--#-+   |  -+
+    //  | #//|    |    |//# |   |   | regular tile size
+    //  | #//|    |    |//# |   |   |
+    //  +-#--+----+----+--#-+   |  -+-+
+    //  | #//|////|////|//# |   |     | "leftover" height
+    //  | ################# |  -+  ---+
+    //  +----+----+----+----+
+    //
+    // In the ascii diagram above, a large image is split into tiles of almost regular size.
+    // The tiles on the edges (hatched in the diagram) can be smaller than the regular tiles
+    // and are handled separately in the code (we'll call them boundary tiles).
+    //
+    // Each generated segment corresponds to a tile in the texture cache, with the
+    // assumption that the boundary tiles are sized to fit their own irregular size in the
+    // texture cache.
+    //
+    // Because we can have very large virtual images we iterate over the visible portion of
+    // the image in layer space instead of iterating over all device tiles.
+
+    let visible_rect = match prim_rect.intersection(&visible_rect) {
+        Some(rect) => rect,
+        None => {
+            return TileIterator {
+                current_tile: TileOffset::zero(),
+                x: TileIteratorExtent {
+                    tile_range: 0..0,
+                    image_tiles: 0..0,
+                    first_tile_layout_size: 0.0,
+                    last_tile_layout_size: 0.0,
+                    layout_tiling_origin: 0.0,
+                    layout_prim_start: prim_rect.min.x,
+                },
+                y: TileIteratorExtent {
+                    tile_range: 0..0,
+                    image_tiles: 0..0,
+                    first_tile_layout_size: 0.0,
+                    last_tile_layout_size: 0.0,
+                    layout_tiling_origin: 0.0,
+                    layout_prim_start: prim_rect.min.y,
+                },
+                regular_tile_size: LayoutSize::zero(),
+            }
+        }
+    };
+
+    // Size of regular tiles in layout space.
+    let layout_tile_size = LayoutSize::new(
+        device_tile_size as f32 / image_rect.width() as f32 * prim_rect.width(),
+        device_tile_size as f32 / image_rect.height() as f32 * prim_rect.height(),
+    );
+
+    // The decomposition logic is exactly the same on each axis so we reduce
+    // this to a 1-dimensional problem in an attempt to make the code simpler.
+
+    let x_extent = tiles_1d(
+        layout_tile_size.width,
+        visible_rect.x_range(),
+        prim_rect.min.x,
+        image_rect.x_range(),
+        device_tile_size,
+    );
+
+    let y_extent = tiles_1d(
+        layout_tile_size.height,
+        visible_rect.y_range(),
+        prim_rect.min.y,
+        image_rect.y_range(),
+        device_tile_size,
+    );
+
+    TileIterator {
+        current_tile: point2(
+            x_extent.tile_range.start,
+            y_extent.tile_range.start,
+        ),
+        x: x_extent,
+        y: y_extent,
+        regular_tile_size: layout_tile_size,
+    }
+}
+
+/// Decompose tiles along an arbitrary axis.
+///
+/// This does most of the heavy lifting needed for `tiles` but in a single dimension for
+/// the sake of simplicity since the problem is independent on the x and y axes.
+fn tiles_1d(
+    layout_tile_size: f32,
+    layout_visible_range: Range<f32>,
+    layout_prim_start: f32,
+    device_image_range: Range<i32>,
+    device_tile_size: i32,
+) -> TileIteratorExtent {
+    // A few sanity checks.
+    debug_assert!(layout_tile_size > 0.0);
+    debug_assert!(layout_visible_range.end >= layout_visible_range.start);
+    debug_assert!(device_image_range.end > device_image_range.start);
+    debug_assert!(device_tile_size > 0);
+
+    // Sizes of the boundary tiles in pixels.
+    let first_tile_device_size = first_tile_size_1d(&device_image_range, device_tile_size);
+    let last_tile_device_size = last_tile_size_1d(&device_image_range, device_tile_size);
+
+    // [start..end[ Range of tiles of this row/column (in number of tiles) without
+    // taking culling into account.
+    let image_tiles = tile_range_1d(&device_image_range, device_tile_size);
+
+    // Layout offset of tile (0, 0) with respect to the top-left corner of the display item.
+    let layout_offset = device_image_range.start as f32 * layout_tile_size / device_tile_size as f32;
+    // Position in layout space of tile (0, 0).
+    let layout_tiling_origin = layout_prim_start - layout_offset;
+
+    // [start..end[ Range of the visible tiles (because of culling).
+    let visible_tiles_start = f32::floor((layout_visible_range.start - layout_tiling_origin) / layout_tile_size) as i32;
+    let visible_tiles_end = f32::ceil((layout_visible_range.end - layout_tiling_origin) / layout_tile_size) as i32;
+
+    // Combine the above two to get the tiles in the image that are visible this frame.
+    let mut tiles_start = i32::max(image_tiles.start, visible_tiles_start);
+    let tiles_end = i32::min(image_tiles.end, visible_tiles_end);
+    if tiles_start > tiles_end {
+        tiles_start = tiles_end;
+    }
+
+    // The size in layout space of the boundary tiles.
+    let first_tile_layout_size = if tiles_start == image_tiles.start {
+        first_tile_device_size as f32 * layout_tile_size / device_tile_size as f32
+    } else {
+        // boundary tile was culled out, so the new first tile is a regularly sized tile.
+        layout_tile_size
+    };
+
+    // Same here.
+    let last_tile_layout_size = if tiles_end == image_tiles.end {
+        last_tile_device_size as f32 * layout_tile_size / device_tile_size as f32
+    } else {
+        layout_tile_size
+    };
+
+    TileIteratorExtent {
+        tile_range: tiles_start..tiles_end,
+        image_tiles,
+        first_tile_layout_size,
+        last_tile_layout_size,
+        layout_tiling_origin,
+        layout_prim_start,
+    }
+}
+
+/// Compute the range of tiles (in number of tiles) that intersect the provided
+/// image range (in pixels) in an arbitrary dimension.
+///
+/// ```ignore
+///
+///         0
+///         :
+///   #-+---+---+---+---+---+--#
+///   # |   |   |   |   |   |  #
+///   #-+---+---+---+---+---+--#
+/// ^       :                   ^
+///
+///  +------------------------+  image_range
+///        +---+  regular_tile_size
+///
+/// ```
+fn tile_range_1d(
+    image_range: &Range<i32>,
+    regular_tile_size: i32,
+) -> Range<i32> {
+    // Integer division truncates towards zero so with negative values if the first/last
+    // tile isn't a full tile we can get offset by one which we account for here.
+
+    let mut start = image_range.start / regular_tile_size;
+    if image_range.start % regular_tile_size < 0 {
+        start -= 1;
+    }
+
+    let mut end = image_range.end / regular_tile_size;
+    if image_range.end % regular_tile_size > 0 {
+        end += 1;
+    }
+
+    start..end
+}
+
+// Sizes of the first boundary tile in pixels.
+//
+// It can be smaller than the regular tile size if the image is not a multiple
+// of the regular tile size.
+fn first_tile_size_1d(
+    image_range: &Range<i32>,
+    regular_tile_size: i32,
+) -> i32 {
+    // We have to account for how the % operation behaves for negative values.
+    let image_size = image_range.end - image_range.start;
+    i32::min(
+        match image_range.start % regular_tile_size {
+            //             .      #------+------+      .
+            //             .      #//////|      |      .
+            0 => regular_tile_size,
+            //   (zero) -> 0      .   #--+------+      .
+            //             .      .   #//|      |      .
+            // %(m):                  ~~>
+            m if m > 0 => regular_tile_size - m,
+            //             .      .   #--+------+      0 <- (zero)
+            //             .      .   #//|      |      .
+            // %(m):                  <~~
+            m => -m,
+        },
+        image_size
+    )
+}
+
+// Sizes of the last boundary tile in pixels.
+//
+// It can be smaller than the regular tile size if the image is not a multiple
+// of the regular tile size.
+fn last_tile_size_1d(
+    image_range: &Range<i32>,
+    regular_tile_size: i32,
+) -> i32 {
+    // We have to account for how the modulo operation behaves for negative values.
+    let image_size = image_range.end - image_range.start;
+    i32::min(
+        match image_range.end % regular_tile_size {
+            //                    +------+------#      .
+            // tiles:      .      |      |//////#      .
+            0 => regular_tile_size,
+            //             .      +------+--#   .      0 <- (zero)
+            //             .      |      |//#   .      .
+            // modulo (m):                   <~~
+            m if m < 0 => regular_tile_size + m,
+            //   (zero) -> 0      +------+--#   .      .
+            //             .      |      |//#   .      .
+            // modulo (m):                ~~>
+            m => m,
+        },
+        image_size,
+    )
+}
+
+pub fn compute_tile_rect(
+    image_rect: &DeviceIntRect,
+    regular_tile_size: TileSize,
+    tile: TileOffset,
+) -> DeviceIntRect {
+    let regular_tile_size = regular_tile_size as i32;
+    DeviceIntRect::from_origin_and_size(
+        point2(
+            compute_tile_origin_1d(image_rect.x_range(), regular_tile_size, tile.x as i32),
+            compute_tile_origin_1d(image_rect.y_range(), regular_tile_size, tile.y as i32),
+        ),
+        size2(
+            compute_tile_size_1d(image_rect.x_range(), regular_tile_size, tile.x as i32),
+            compute_tile_size_1d(image_rect.y_range(), regular_tile_size, tile.y as i32),
+        ),
+    )
+}
+
+fn compute_tile_origin_1d(
+    img_range: Range<i32>,
+    regular_tile_size: i32,
+    tile_offset: i32,
+) -> i32 {
+    let tile_range = tile_range_1d(&img_range, regular_tile_size);
+    if tile_offset == tile_range.start {
+        img_range.start
+    } else {
+        tile_offset * regular_tile_size
+    }
+}
+
+// Compute the width and height in pixels of a tile depending on its position in the image.
+pub fn compute_tile_size(
+    image_rect: &DeviceIntRect,
+    regular_tile_size: TileSize,
+    tile: TileOffset,
+) -> DeviceIntSize {
+    let regular_tile_size = regular_tile_size as i32;
+    size2(
+        compute_tile_size_1d(image_rect.x_range(), regular_tile_size, tile.x as i32),
+        compute_tile_size_1d(image_rect.y_range(), regular_tile_size, tile.y as i32),
+    )
+}
+
+fn compute_tile_size_1d(
+    img_range: Range<i32>,
+    regular_tile_size: i32,
+    tile_offset: i32,
+) -> i32 {
+    let tile_range = tile_range_1d(&img_range, regular_tile_size);
+
+    // Most tiles are going to have base_size as width and height,
+    // except for tiles around the edges that are shrunk to fit the image data.
+    let actual_size = if tile_offset == tile_range.start {
+        first_tile_size_1d(&img_range, regular_tile_size)
+    } else if tile_offset == tile_range.end - 1 {
+        last_tile_size_1d(&img_range, regular_tile_size)
+    } else {
+        regular_tile_size
+    };
+
+    assert!(actual_size > 0);
+
+    actual_size
+}
+
+pub fn compute_tile_range(
+    visible_area: &DeviceIntRect,
+    tile_size: u16,
+) -> TileRange {
+    let tile_size = tile_size as i32;
+    let x_range = tile_range_1d(&visible_area.x_range(), tile_size);
+    let y_range = tile_range_1d(&visible_area.y_range(), tile_size);
+
+    TileRange {
+        min: point2(x_range.start, y_range.start),
+        max: point2(x_range.end, y_range.end),
+    }
+}
+
+pub fn for_each_tile_in_range(
+    range: &TileRange,
+    mut callback: impl FnMut(TileOffset),
+) {
+    for y in range.y_range() {
+        for x in range.x_range() {
+            callback(point2(x, y));
+        }
+    }
+}
+
+pub fn compute_valid_tiles_if_bounds_change(
+    prev_rect: &DeviceIntRect,
+    new_rect: &DeviceIntRect,
+    tile_size: u16,
+) -> Option<TileRange> {
+    let intersection = match prev_rect.intersection(new_rect) {
+        Some(rect) => rect,
+        None => {
+            return Some(TileRange::zero());
+        }
+    };
+
+    let left = prev_rect.min.x != new_rect.min.x;
+    let right = prev_rect.max.x != new_rect.max.x;
+    let top = prev_rect.min.y != new_rect.min.y;
+    let bottom = prev_rect.max.y != new_rect.max.y;
+
+    if !left && !right && !top && !bottom {
+        // Bounds have not changed.
+        return None;
+    }
+
+    let tw = 1.0 / (tile_size as f32);
+    let th = 1.0 / (tile_size as f32);
+
+    let tiles = intersection
+        .cast::<f32>()
+        .scale(tw, th);
+
+    let min_x = if left { f32::ceil(tiles.min.x) } else { f32::floor(tiles.min.x) };
+    let min_y = if top { f32::ceil(tiles.min.y) } else { f32::floor(tiles.min.y) };
+    let max_x = if right { f32::floor(tiles.max.x) } else { f32::ceil(tiles.max.x) };
+    let max_y = if bottom { f32::floor(tiles.max.y) } else { f32::ceil(tiles.max.y) };
+
+    Some(TileRange {
+        min: point2(min_x as i32, min_y as i32),
+        max: point2(max_x as i32, max_y as i32),
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::collections::HashSet;
+    use euclid::rect;
+
+    // this checks some additional invariants
+    fn checked_for_each_tile(
+        prim_rect: &LayoutRect,
+        visible_rect: &LayoutRect,
+        device_image_rect: &DeviceIntRect,
+        device_tile_size: i32,
+        callback: &mut dyn FnMut(&LayoutRect, TileOffset, EdgeAaSegmentMask),
+    ) {
+        let mut coverage = LayoutRect::zero();
+        let mut seen_tiles = HashSet::new();
+        for tile in tiles(
+            prim_rect,
+            visible_rect,
+            device_image_rect,
+            device_tile_size,
+        ) {
+            // make sure we don't get sent duplicate tiles
+            assert!(!seen_tiles.contains(&tile.offset));
+            seen_tiles.insert(tile.offset);
+            coverage = coverage.union(&tile.rect);
+            assert!(prim_rect.contains_box(&tile.rect));
+            callback(&tile.rect, tile.offset, tile.edge_flags);
+        }
+        assert!(prim_rect.contains_box(&coverage));
+        assert!(coverage.contains_box(&visible_rect.intersection(&prim_rect).unwrap_or(LayoutRect::zero())));
+    }
+
+    #[test]
+    fn basic() {
+        let mut count = 0;
+        checked_for_each_tile(&rect(0., 0., 1000., 1000.).to_box2d(),
+            &rect(75., 75., 400., 400.).to_box2d(),
+            &rect(0, 0, 400, 400).to_box2d(),
+            36,
+            &mut |_tile_rect, _tile_offset, _tile_flags| {
+                count += 1;
+            },
+        );
+        assert_eq!(count, 36);
+    }
+
+    #[test]
+    fn empty() {
+        let mut count = 0;
+        checked_for_each_tile(&rect(0., 0., 74., 74.).to_box2d(),
+            &rect(75., 75., 400., 400.).to_box2d(),
+            &rect(0, 0, 400, 400).to_box2d(),
+            36,
+            &mut |_tile_rect, _tile_offset, _tile_flags| {
+              count += 1;
+            },
+        );
+        assert_eq!(count, 0);
+    }
+
+    #[test]
+    fn test_tiles_1d() {
+        // Exactly one full tile at positive offset.
+        let result = tiles_1d(64.0, -10000.0..10000.0, 0.0, 0..64, 64);
+        assert_eq!(result.tile_range.start, 0);
+        assert_eq!(result.tile_range.end, 1);
+        assert_eq!(result.first_tile_layout_size, 64.0);
+        assert_eq!(result.last_tile_layout_size, 64.0);
+
+        // Exactly one full tile at negative offset.
+        let result = tiles_1d(64.0, -10000.0..10000.0, -64.0, -64..0, 64);
+        assert_eq!(result.tile_range.start, -1);
+        assert_eq!(result.tile_range.end, 0);
+        assert_eq!(result.first_tile_layout_size, 64.0);
+        assert_eq!(result.last_tile_layout_size, 64.0);
+
+        // Two full tiles at negative and positive offsets.
+        let result = tiles_1d(64.0, -10000.0..10000.0, -64.0, -64..64, 64);
+        assert_eq!(result.tile_range.start, -1);
+        assert_eq!(result.tile_range.end, 1);
+        assert_eq!(result.first_tile_layout_size, 64.0);
+        assert_eq!(result.last_tile_layout_size, 64.0);
+
+        // One partial tile at positive offset, non-zero origin, culled out.
+        let result = tiles_1d(64.0, -100.0..10.0, 64.0, 64..310, 64);
+        assert_eq!(result.tile_range.start, result.tile_range.end);
+
+        // Two tiles at negative and positive offsets, one of which is culled out.
+        // The remaining tile is partially culled but it should still generate a full tile.
+        let result = tiles_1d(64.0, 10.0..10000.0, -64.0, -64..64, 64);
+        assert_eq!(result.tile_range.start, 0);
+        assert_eq!(result.tile_range.end, 1);
+        assert_eq!(result.first_tile_layout_size, 64.0);
+        assert_eq!(result.last_tile_layout_size, 64.0);
+        let result = tiles_1d(64.0, -10000.0..-10.0, -64.0, -64..64, 64);
+        assert_eq!(result.tile_range.start, -1);
+        assert_eq!(result.tile_range.end, 0);
+        assert_eq!(result.first_tile_layout_size, 64.0);
+        assert_eq!(result.last_tile_layout_size, 64.0);
+
+        // Stretched tile in layout space device tile size is 64 and layout tile size is 128.
+        // So the resulting tile sizes in layout space should be multiplied by two.
+        let result = tiles_1d(128.0, -10000.0..10000.0, -64.0, -64..32, 64);
+        assert_eq!(result.tile_range.start, -1);
+        assert_eq!(result.tile_range.end, 1);
+        assert_eq!(result.first_tile_layout_size, 128.0);
+        assert_eq!(result.last_tile_layout_size, 64.0);
+
+        // Two visible tiles (the rest is culled out).
+        let result = tiles_1d(10.0, 0.0..20.0, 0.0, 0..64, 64);
+        assert_eq!(result.tile_range.start, 0);
+        assert_eq!(result.tile_range.end, 1);
+        assert_eq!(result.first_tile_layout_size, 10.0);
+        assert_eq!(result.last_tile_layout_size, 10.0);
+
+        // Two visible tiles at negative layout offsets (the rest is culled out).
+        let result = tiles_1d(10.0, -20.0..0.0, -20.0, 0..64, 64);
+        assert_eq!(result.tile_range.start, 0);
+        assert_eq!(result.tile_range.end, 1);
+        assert_eq!(result.first_tile_layout_size, 10.0);
+        assert_eq!(result.last_tile_layout_size, 10.0);
+    }
+
+    #[test]
+    fn test_tile_range_1d() {
+        assert_eq!(tile_range_1d(&(0..256), 256), 0..1);
+        assert_eq!(tile_range_1d(&(0..257), 256), 0..2);
+        assert_eq!(tile_range_1d(&(-1..257), 256), -1..2);
+        assert_eq!(tile_range_1d(&(-256..256), 256), -1..1);
+        assert_eq!(tile_range_1d(&(-20..-10), 6), -4..-1);
+        assert_eq!(tile_range_1d(&(20..100), 256), 0..1);
+    }
+
+    #[test]
+    fn test_first_last_tile_size_1d() {
+        assert_eq!(first_tile_size_1d(&(0..10), 64), 10);
+        assert_eq!(first_tile_size_1d(&(-20..0), 64), 20);
+
+        assert_eq!(last_tile_size_1d(&(0..10), 64), 10);
+        assert_eq!(last_tile_size_1d(&(-20..0), 64), 20);
+    }
+
+    #[test]
+    fn doubly_partial_tiles() {
+        // In the following tests the image is a single tile and none of the sides of the tile
+        // align with the tile grid.
+        // This can only happen when we have a single non-aligned partial tile and no regular
+        // tiles.
+        assert_eq!(first_tile_size_1d(&(300..310), 64), 10);
+        assert_eq!(first_tile_size_1d(&(-20..-10), 64), 10);
+
+        assert_eq!(last_tile_size_1d(&(300..310), 64), 10);
+        assert_eq!(last_tile_size_1d(&(-20..-10), 64), 10);
+
+
+        // One partial tile at positve offset, non-zero origin.
+        let result = tiles_1d(64.0, -10000.0..10000.0, 0.0, 300..310, 64);
+        assert_eq!(result.tile_range.start, 4);
+        assert_eq!(result.tile_range.end, 5);
+        assert_eq!(result.first_tile_layout_size, 10.0);
+        assert_eq!(result.last_tile_layout_size, 10.0);
+    }
+
+    #[test]
+    fn smaller_than_tile_size_at_origin() {
+        let r = compute_tile_rect(
+            &rect(0, 0, 80, 80).to_box2d(),
+            256,
+            point2(0, 0),
+        );
+
+        assert_eq!(r, rect(0, 0, 80, 80).to_box2d());
+    }
+
+    #[test]
+    fn smaller_than_tile_size_with_offset() {
+        let r = compute_tile_rect(
+            &rect(20, 20, 80, 80).to_box2d(),
+            256,
+            point2(0, 0),
+        );
+
+        assert_eq!(r, rect(20, 20, 80, 80).to_box2d());
+    }
+}
diff --git a/gfx/wr/webrender/src/intern.rs b/gfx/wr/webrender/src/intern.rs
new file mode 100644
index 0000000000..d865a93eee
--- /dev/null
+++ b/gfx/wr/webrender/src/intern.rs
@@ -0,0 +1,469 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! The interning module provides a generic data structure
+//! interning container. It is similar in concept to a
+//! traditional string interning container, but it is
+//! specialized to the WR thread model.
+//!
+//! There is an Interner structure, that lives in the
+//! scene builder thread, and a DataStore structure
+//! that lives in the frame builder thread.
+//!
+//! Hashing, interning and handle creation is done by
+//! the interner structure during scene building.
+//!
+//! Delta changes for the interner are pushed during
+//! a transaction to the frame builder. The frame builder
+//! is then able to access the content of the interned
+//! handles quickly, via array indexing.
+//!
+//! Epoch tracking ensures that the garbage collection
+//! step which the interner uses to remove items is
+//! only invoked on items that the frame builder thread
+//! is no longer referencing.
+//!
+//! Items in the data store are stored in a traditional
+//! free-list structure, for content access and memory
+//! usage efficiency.
+//!
+//! The epoch is incremented each time a scene is
+//! built. The most recently used scene epoch is
+//! stored inside each handle. This is then used for
+//! cache invalidation.
+
+use crate::internal_types::FastHashMap;
+use malloc_size_of::MallocSizeOf;
+use std::fmt::Debug;
+use std::hash::Hash;
+use std::marker::PhantomData;
+use std::{ops, u64};
+use crate::util::VecHelper;
+use crate::profiler::TransactionProfile;
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, Hash, MallocSizeOf, PartialEq, Eq)]
+struct Epoch(u32);
+
+/// A list of updates to be applied to the data store,
+/// provided by the interning structure.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct UpdateList<S> {
+    /// Items to insert.
+    pub insertions: Vec<Insertion<S>>,
+
+    /// Items to remove.
+    pub removals: Vec<Removal>,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct Insertion<S> {
+    pub index: usize,
+    pub uid: ItemUid,
+    pub value: S,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct Removal {
+    pub index: usize,
+    pub uid: ItemUid,
+}
+
+impl<S> UpdateList<S> {
+    fn new() -> UpdateList<S> {
+        UpdateList {
+            insertions: Vec::new(),
+            removals: Vec::new(),
+        }
+    }
+
+    fn take_and_preallocate(&mut self) -> UpdateList<S> {
+        UpdateList {
+            insertions: self.insertions.take_and_preallocate(),
+            removals: self.removals.take_and_preallocate(),
+        }
+    }
+}
+
+/// A globally, unique identifier
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, Eq, Hash, MallocSizeOf, PartialEq)]
+pub struct ItemUid {
+    uid: u64,
+}
+
+impl ItemUid {
+    // Intended for debug usage only
+    pub fn get_uid(&self) -> u64 {
+        self.uid
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Hash, MallocSizeOf, PartialEq, Eq)]
+pub struct Handle<I> {
+    index: u32,
+    epoch: Epoch,
+    _marker: PhantomData<I>,
+}
+
+impl<I> Clone for Handle<I> {
+    fn clone(&self) -> Self {
+        Handle {
+            index: self.index,
+            epoch: self.epoch,
+            _marker: self._marker,
+        }
+    }
+}
+
+impl<I> Copy for Handle<I> {}
+
+impl<I> Handle<I> {
+    pub fn uid(&self) -> ItemUid {
+        ItemUid {
+            // The index in the freelist + the epoch it was interned generates a stable
+            // unique id for an interned element.
+            uid: ((self.index as u64) << 32) | self.epoch.0 as u64
+        }
+    }
+
+    pub const INVALID: Self = Handle { index: !0, epoch: Epoch(!0), _marker: PhantomData };
+}
+
+pub trait InternDebug {
+    fn on_interned(&self, _uid: ItemUid) {}
+}
+
+/// The data store lives in the frame builder thread. It
+/// contains a free-list of items for fast access.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct DataStore<I: Internable> {
+    items: Vec<Option<I::StoreData>>,
+}
+
+impl<I: Internable> Default for DataStore<I> {
+    fn default() -> Self {
+        DataStore {
+            items: Vec::new(),
+        }
+    }
+}
+
+impl<I: Internable> DataStore<I> {
+    /// Apply any updates from the scene builder thread to
+    /// this data store.
+    pub fn apply_updates(
+        &mut self,
+        update_list: UpdateList<I::Key>,
+        profile: &mut TransactionProfile,
+    ) {
+        for insertion in update_list.insertions {
+            self.items
+                .entry(insertion.index)
+                .set(Some(insertion.value.into()));
+        }
+
+        for removal in update_list.removals {
+            self.items[removal.index] = None;
+        }
+
+        profile.set(I::PROFILE_COUNTER, self.items.len());
+    }
+}
+
+/// Retrieve an item from the store via handle
+impl<I: Internable> ops::Index<Handle<I>> for DataStore<I> {
+    type Output = I::StoreData;
+    fn index(&self, handle: Handle<I>) -> &I::StoreData {
+        self.items[handle.index as usize].as_ref().expect("Bad datastore lookup")
+    }
+}
+
+/// Retrieve a mutable item from the store via handle
+/// Retrieve an item from the store via handle
+impl<I: Internable> ops::IndexMut<Handle<I>> for DataStore<I> {
+    fn index_mut(&mut self, handle: Handle<I>) -> &mut I::StoreData {
+        self.items[handle.index as usize].as_mut().expect("Bad datastore lookup")
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+struct ItemDetails<I> {
+    /// Frame that this element was first interned
+    interned_epoch: Epoch,
+    /// Last frame this element was referenced (used to GC intern items)
+    last_used_epoch: Epoch,
+    /// Index into the freelist this item is located
+    index: usize,
+    /// Type marker for create_handle method
+    _marker: PhantomData<I>,
+}
+
+impl<I> ItemDetails<I> {
+    /// Construct a stable handle value from the item details
+    fn create_handle(&self) -> Handle<I> {
+        Handle {
+            index: self.index as u32,
+            epoch: self.interned_epoch,
+            _marker: PhantomData,
+        }
+    }
+}
+
+/// The main interning data structure. This lives in the
+/// scene builder thread, and handles hashing and interning
+/// unique data structures. It also manages a free-list for
+/// the items in the data store, which is synchronized via
+/// an update list of additions / removals.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct Interner<I: Internable> {
+    /// Uniquely map an interning key to a handle
+    map: FastHashMap<I::Key, ItemDetails<I>>,
+    /// List of free slots in the data store for re-use.
+    free_list: Vec<usize>,
+    /// Pending list of updates that need to be applied.
+    update_list: UpdateList<I::Key>,
+    /// The current epoch for the interner.
+    current_epoch: Epoch,
+    /// The information associated with each interned
+    /// item that can be accessed by the interner.
+    local_data: Vec<I::InternData>,
+}
+
+impl<I: Internable> Default for Interner<I> {
+    fn default() -> Self {
+        Interner {
+            map: FastHashMap::default(),
+            free_list: Vec::new(),
+            update_list: UpdateList::new(),
+            current_epoch: Epoch(1),
+            local_data: Vec::new(),
+        }
+    }
+}
+
+impl<I: Internable> Interner<I> {
+    /// Intern a data structure, and return a handle to
+    /// that data. The handle can then be stored in the
+    /// frame builder, and safely accessed via the data
+    /// store that lives in the frame builder thread.
+    /// The provided closure is invoked to build the
+    /// local data about an interned structure if the
+    /// key isn't already interned.
+    pub fn intern<F>(
+        &mut self,
+        data: &I::Key,
+        fun: F,
+    ) -> Handle<I> where F: FnOnce() -> I::InternData {
+        // Use get_mut rather than entry here to avoid
+        // cloning the (sometimes large) key in the common
+        // case, where the data already exists in the interner.
+        if let Some(details) = self.map.get_mut(data) {
+            // Update the last referenced frame for this element
+            details.last_used_epoch = self.current_epoch;
+            // Return a stable handle value for dependency checking
+            return details.create_handle();
+        }
+
+        // We need to intern a new data item. First, find out
+        // if there is a spare slot in the free-list that we
+        // can use. Otherwise, append to the end of the list.
+        let index = match self.free_list.pop() {
+            Some(index) => index,
+            None => self.local_data.len(),
+        };
+
+        // Generate a handle for access via the data store.
+        let handle = Handle {
+            index: index as u32,
+            epoch: self.current_epoch,
+            _marker: PhantomData,
+        };
+
+        let uid = handle.uid();
+
+        // Add a pending update to insert the new data.
+        self.update_list.insertions.push(Insertion {
+            index,
+            uid,
+            value: data.clone(),
+        });
+
+        #[cfg(debug_assertions)]
+        data.on_interned(uid);
+
+        // Store this handle so the next time it is
+        // interned, it gets re-used.
+        self.map.insert(data.clone(), ItemDetails {
+            interned_epoch: self.current_epoch,
+            last_used_epoch: self.current_epoch,
+            index,
+            _marker: PhantomData,
+        });
+
+        // Create the local data for this item that is
+        // being interned.
+        self.local_data.entry(index).set(fun());
+
+        handle
+    }
+
+    /// Retrieve the pending list of updates for an interner
+    /// that need to be applied to the data store. Also run
+    /// a GC step that removes old entries.
+    pub fn end_frame_and_get_pending_updates(&mut self) -> UpdateList<I::Key> {
+        let mut update_list = self.update_list.take_and_preallocate();
+
+        let free_list = &mut self.free_list;
+        let current_epoch = self.current_epoch.0;
+
+        // First, run a GC step. Walk through the handles, and
+        // if we find any that haven't been used for some time,
+        // remove them. If this ever shows up in profiles, we
+        // can make the GC step partial (scan only part of the
+        // map each frame). It also might make sense in the
+        // future to adjust how long items remain in the cache
+        // based on the current size of the list.
+        self.map.retain(|_, details| {
+            if details.last_used_epoch.0 + 10 < current_epoch {
+                // To expire an item:
+                //  - Add index to the free-list for re-use.
+                //  - Add an update to the data store to invalidate this slot.
+                //  - Remove from the hash map.
+                free_list.push(details.index);
+                update_list.removals.push(Removal {
+                    index: details.index,
+                    uid: details.create_handle().uid(),
+                });
+                return false;
+            }
+
+            true
+        });
+
+        // Begin the next epoch
+        self.current_epoch = Epoch(self.current_epoch.0 + 1);
+
+        update_list
+    }
+}
+
+/// Retrieve the local data for an item from the interner via handle
+impl<I: Internable> ops::Index<Handle<I>> for Interner<I> {
+    type Output = I::InternData;
+    fn index(&self, handle: Handle<I>) -> &I::InternData {
+        &self.local_data[handle.index as usize]
+    }
+}
+
+/// Meta-macro to enumerate the various interner identifiers and types.
+///
+/// IMPORTANT: Keep this synchronized with the list in mozilla-central located at
+/// gfx/webrender_bindings/webrender_ffi.h
+///
+/// Note that this could be a lot less verbose if concat_idents! were stable. :-(
+#[macro_export]
+macro_rules! enumerate_interners {
+    ($macro_name: ident) => {
+        $macro_name! {
+            clip: ClipIntern,
+            prim: PrimitiveKeyKind,
+            normal_border: NormalBorderPrim,
+            image_border: ImageBorder,
+            image: Image,
+            yuv_image: YuvImage,
+            line_decoration: LineDecoration,
+            linear_grad: LinearGradient,
+            radial_grad: RadialGradient,
+            conic_grad: ConicGradient,
+            picture: Picture,
+            text_run: TextRun,
+            filter_data: FilterDataIntern,
+            backdrop_capture: BackdropCapture,
+            backdrop_render: BackdropRender,
+            polygon: PolygonIntern,
+        }
+    }
+}
+
+macro_rules! declare_interning_memory_report {
+    ( $( $name:ident: $ty:ident, )+ ) => {
+        ///
+        #[repr(C)]
+        #[derive(AddAssign, Clone, Debug, Default)]
+        pub struct InternerSubReport {
+            $(
+                ///
+                pub $name: usize,
+            )+
+        }
+    }
+}
+
+enumerate_interners!(declare_interning_memory_report);
+
+/// Memory report for interning-related data structures.
+/// cbindgen:derive-eq=false
+/// cbindgen:derive-ostream=false
+#[repr(C)]
+#[derive(Clone, Debug, Default)]
+pub struct InterningMemoryReport {
+    ///
+    pub interners: InternerSubReport,
+    ///
+    pub data_stores: InternerSubReport,
+}
+
+impl ::std::ops::AddAssign for InterningMemoryReport {
+    fn add_assign(&mut self, other: InterningMemoryReport) {
+        self.interners += other.interners;
+        self.data_stores += other.data_stores;
+    }
+}
+
+// The trick to make trait bounds configurable by features.
+mod dummy {
+    #[cfg(not(feature = "capture"))]
+    pub trait Serialize {}
+    #[cfg(not(feature = "capture"))]
+    impl<T> Serialize for T {}
+    #[cfg(not(feature = "replay"))]
+    pub trait Deserialize<'a> {}
+    #[cfg(not(feature = "replay"))]
+    impl<'a, T> Deserialize<'a> for T {}
+}
+#[cfg(feature = "capture")]
+use serde::Serialize as InternSerialize;
+#[cfg(not(feature = "capture"))]
+use self::dummy::Serialize as InternSerialize;
+#[cfg(feature = "replay")]
+use serde::Deserialize as InternDeserialize;
+#[cfg(not(feature = "replay"))]
+use self::dummy::Deserialize as InternDeserialize;
+
+/// Implement `Internable` for a type that wants to participate in interning.
+pub trait Internable: MallocSizeOf {
+    type Key: Eq + Hash + Clone + Debug + MallocSizeOf + InternDebug + InternSerialize + for<'a> InternDeserialize<'a>;
+    type StoreData: From<Self::Key> + MallocSizeOf + InternSerialize + for<'a> InternDeserialize<'a>;
+    type InternData: MallocSizeOf + InternSerialize + for<'a> InternDeserialize<'a>;
+
+    // Profile counter indices, see the list in profiler.rs
+    const PROFILE_COUNTER: usize;
+}
diff --git a/gfx/wr/webrender/src/internal_types.rs b/gfx/wr/webrender/src/internal_types.rs
new file mode 100644
index 0000000000..6dae6ce651
--- /dev/null
+++ b/gfx/wr/webrender/src/internal_types.rs
@@ -0,0 +1,758 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ColorF, DocumentId, ExternalImageId, PrimitiveFlags, Parameter, RenderReasons};
+use api::{ImageFormat, NotificationRequest, Shadow, FilterOp, ImageBufferKind};
+use api::units::*;
+use api;
+use crate::render_api::DebugCommand;
+use crate::composite::NativeSurfaceOperation;
+use crate::device::TextureFilter;
+use crate::renderer::{FullFrameStats, PipelineInfo};
+use crate::gpu_cache::GpuCacheUpdateList;
+use crate::frame_builder::Frame;
+use crate::profiler::TransactionProfile;
+use crate::spatial_tree::SpatialNodeIndex;
+use crate::prim_store::PrimitiveInstanceIndex;
+use fxhash::FxHasher;
+use plane_split::BspSplitter;
+use smallvec::SmallVec;
+use std::{usize, i32};
+use std::collections::{HashMap, HashSet};
+use std::f32;
+use std::hash::BuildHasherDefault;
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::time::{UNIX_EPOCH, SystemTime};
+
+#[cfg(any(feature = "capture", feature = "replay"))]
+use crate::capture::CaptureConfig;
+#[cfg(feature = "capture")]
+use crate::capture::ExternalCaptureImage;
+#[cfg(feature = "replay")]
+use crate::capture::PlainExternalImage;
+
+pub type FastHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>;
+pub type FastHashSet<K> = HashSet<K, BuildHasherDefault<FxHasher>>;
+
+#[derive(Copy, Clone, Hash, MallocSizeOf, PartialEq, PartialOrd, Debug, Eq, Ord)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct FrameId(usize);
+
+impl FrameId {
+    /// Returns a FrameId corresponding to the first frame.
+    ///
+    /// Note that we use 0 as the internal id here because the current code
+    /// increments the frame id at the beginning of the frame, rather than
+    /// at the end, and we want the first frame to be 1. It would probably
+    /// be sensible to move the advance() call to after frame-building, and
+    /// then make this method return FrameId(1).
+    pub fn first() -> Self {
+        FrameId(0)
+    }
+
+    /// Returns the backing usize for this FrameId.
+    pub fn as_usize(&self) -> usize {
+        self.0
+    }
+
+    /// Advances this FrameId to the next frame.
+    pub fn advance(&mut self) {
+        self.0 += 1;
+    }
+
+    /// An invalid sentinel FrameId, which will always compare less than
+    /// any valid FrameId.
+    pub const INVALID: FrameId = FrameId(0);
+}
+
+impl Default for FrameId {
+    fn default() -> Self {
+        FrameId::INVALID
+    }
+}
+
+impl ::std::ops::Add<usize> for FrameId {
+    type Output = Self;
+    fn add(self, other: usize) -> FrameId {
+        FrameId(self.0 + other)
+    }
+}
+
+impl ::std::ops::Sub<usize> for FrameId {
+    type Output = Self;
+    fn sub(self, other: usize) -> FrameId {
+        assert!(self.0 >= other, "Underflow subtracting FrameIds");
+        FrameId(self.0 - other)
+    }
+}
+
+/// Identifier to track a sequence of frames.
+///
+/// This is effectively a `FrameId` with a ridealong timestamp corresponding
+/// to when advance() was called, which allows for more nuanced cache eviction
+/// decisions. As such, we use the `FrameId` for equality and comparison, since
+/// we should never have two `FrameStamps` with the same id but different
+/// timestamps.
+#[derive(Copy, Clone, Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct FrameStamp {
+    id: FrameId,
+    time: SystemTime,
+    document_id: DocumentId,
+}
+
+impl Eq for FrameStamp {}
+
+impl PartialEq for FrameStamp {
+    fn eq(&self, other: &Self) -> bool {
+        // We should not be checking equality unless the documents are the same
+        debug_assert!(self.document_id == other.document_id);
+        self.id == other.id
+    }
+}
+
+impl PartialOrd for FrameStamp {
+    fn partial_cmp(&self, other: &Self) -> Option<::std::cmp::Ordering> {
+        self.id.partial_cmp(&other.id)
+    }
+}
+
+impl FrameStamp {
+    /// Gets the FrameId in this stamp.
+    pub fn frame_id(&self) -> FrameId {
+        self.id
+    }
+
+    /// Gets the time associated with this FrameStamp.
+    pub fn time(&self) -> SystemTime {
+        self.time
+    }
+
+    /// Gets the DocumentId in this stamp.
+    pub fn document_id(&self) -> DocumentId {
+        self.document_id
+    }
+
+    pub fn is_valid(&self) -> bool {
+        // If any fields are their default values, the whole struct should equal INVALID
+        debug_assert!((self.time != UNIX_EPOCH && self.id != FrameId(0) && self.document_id != DocumentId::INVALID) ||
+                      *self == Self::INVALID);
+        self.document_id != DocumentId::INVALID
+    }
+
+    /// Returns a FrameStamp corresponding to the first frame.
+    pub fn first(document_id: DocumentId) -> Self {
+        FrameStamp {
+            id: FrameId::first(),
+            time: SystemTime::now(),
+            document_id,
+        }
+    }
+
+    /// Advances to a new frame.
+    pub fn advance(&mut self) {
+        self.id.advance();
+        self.time = SystemTime::now();
+    }
+
+    /// An invalid sentinel FrameStamp.
+    pub const INVALID: FrameStamp = FrameStamp {
+        id: FrameId(0),
+        time: UNIX_EPOCH,
+        document_id: DocumentId::INVALID,
+    };
+}
+
+/// Custom field embedded inside the Polygon struct of the plane-split crate.
+#[derive(Copy, Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct PlaneSplitAnchor {
+    pub spatial_node_index: SpatialNodeIndex,
+    pub instance_index: PrimitiveInstanceIndex,
+}
+
+impl PlaneSplitAnchor {
+    pub fn new(
+        spatial_node_index: SpatialNodeIndex,
+        instance_index: PrimitiveInstanceIndex,
+    ) -> Self {
+        PlaneSplitAnchor {
+            spatial_node_index,
+            instance_index,
+        }
+    }
+}
+
+impl Default for PlaneSplitAnchor {
+    fn default() -> Self {
+        PlaneSplitAnchor {
+            spatial_node_index: SpatialNodeIndex::INVALID,
+            instance_index: PrimitiveInstanceIndex(!0),
+        }
+    }
+}
+
+/// A concrete plane splitter type used in WebRender.
+pub type PlaneSplitter = BspSplitter<PlaneSplitAnchor>;
+
+/// An index into the scene's list of plane splitters
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct PlaneSplitterIndex(pub usize);
+
+/// An arbitrary number which we assume opacity is invisible below.
+const OPACITY_EPSILON: f32 = 0.001;
+
+/// Equivalent to api::FilterOp with added internal information
+#[derive(Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum Filter {
+    Identity,
+    Blur {
+        width: f32,
+        height: f32,
+        should_inflate: bool,
+    },
+    Brightness(f32),
+    Contrast(f32),
+    Grayscale(f32),
+    HueRotate(f32),
+    Invert(f32),
+    Opacity(api::PropertyBinding<f32>, f32),
+    Saturate(f32),
+    Sepia(f32),
+    DropShadows(SmallVec<[Shadow; 1]>),
+    ColorMatrix(Box<[f32; 20]>),
+    SrgbToLinear,
+    LinearToSrgb,
+    ComponentTransfer,
+    Flood(ColorF),
+}
+
+impl Filter {
+    pub fn is_visible(&self) -> bool {
+        match *self {
+            Filter::Identity |
+            Filter::Blur { .. } |
+            Filter::Brightness(..) |
+            Filter::Contrast(..) |
+            Filter::Grayscale(..) |
+            Filter::HueRotate(..) |
+            Filter::Invert(..) |
+            Filter::Saturate(..) |
+            Filter::Sepia(..) |
+            Filter::DropShadows(..) |
+            Filter::ColorMatrix(..) |
+            Filter::SrgbToLinear |
+            Filter::LinearToSrgb |
+            Filter::ComponentTransfer  => true,
+            Filter::Opacity(_, amount) => {
+                amount > OPACITY_EPSILON
+            },
+            Filter::Flood(color) => {
+                color.a > OPACITY_EPSILON
+            }
+        }
+    }
+
+    pub fn is_noop(&self) -> bool {
+        match *self {
+            Filter::Identity => false, // this is intentional
+            Filter::Blur { width, height, .. } => width == 0.0 && height == 0.0,
+            Filter::Brightness(amount) => amount == 1.0,
+            Filter::Contrast(amount) => amount == 1.0,
+            Filter::Grayscale(amount) => amount == 0.0,
+            Filter::HueRotate(amount) => amount == 0.0,
+            Filter::Invert(amount) => amount == 0.0,
+            Filter::Opacity(api::PropertyBinding::Value(amount), _) => amount >= 1.0,
+            Filter::Saturate(amount) => amount == 1.0,
+            Filter::Sepia(amount) => amount == 0.0,
+            Filter::DropShadows(ref shadows) => {
+                for shadow in shadows {
+                    if shadow.offset.x != 0.0 || shadow.offset.y != 0.0 || shadow.blur_radius != 0.0 {
+                        return false;
+                    }
+                }
+
+                true
+            }
+            Filter::ColorMatrix(ref matrix) => {
+                **matrix == [
+                    1.0, 0.0, 0.0, 0.0,
+                    0.0, 1.0, 0.0, 0.0,
+                    0.0, 0.0, 1.0, 0.0,
+                    0.0, 0.0, 0.0, 1.0,
+                    0.0, 0.0, 0.0, 0.0
+                ]
+            }
+            Filter::Opacity(api::PropertyBinding::Binding(..), _) |
+            Filter::SrgbToLinear |
+            Filter::LinearToSrgb |
+            Filter::ComponentTransfer |
+            Filter::Flood(..) => false,
+        }
+    }
+
+
+    pub fn as_int(&self) -> i32 {
+        // Must be kept in sync with brush_blend.glsl
+        match *self {
+            Filter::Identity => 0, // matches `Contrast(1)`
+            Filter::Contrast(..) => 0,
+            Filter::Grayscale(..) => 1,
+            Filter::HueRotate(..) => 2,
+            Filter::Invert(..) => 3,
+            Filter::Saturate(..) => 4,
+            Filter::Sepia(..) => 5,
+            Filter::Brightness(..) => 6,
+            Filter::ColorMatrix(..) => 7,
+            Filter::SrgbToLinear => 8,
+            Filter::LinearToSrgb => 9,
+            Filter::Flood(..) => 10,
+            Filter::ComponentTransfer => 11,
+            Filter::Blur { .. } => 12,
+            Filter::DropShadows(..) => 13,
+            Filter::Opacity(..) => 14,
+        }
+    }
+}
+
+impl From<FilterOp> for Filter {
+    fn from(op: FilterOp) -> Self {
+        match op {
+            FilterOp::Identity => Filter::Identity,
+            FilterOp::Blur(width, height) => Filter::Blur { width, height, should_inflate: true },
+            FilterOp::Brightness(b) => Filter::Brightness(b),
+            FilterOp::Contrast(c) => Filter::Contrast(c),
+            FilterOp::Grayscale(g) => Filter::Grayscale(g),
+            FilterOp::HueRotate(h) => Filter::HueRotate(h),
+            FilterOp::Invert(i) => Filter::Invert(i),
+            FilterOp::Opacity(binding, opacity) => Filter::Opacity(binding, opacity),
+            FilterOp::Saturate(s) => Filter::Saturate(s),
+            FilterOp::Sepia(s) => Filter::Sepia(s),
+            FilterOp::ColorMatrix(mat) => Filter::ColorMatrix(Box::new(mat)),
+            FilterOp::SrgbToLinear => Filter::SrgbToLinear,
+            FilterOp::LinearToSrgb => Filter::LinearToSrgb,
+            FilterOp::ComponentTransfer => Filter::ComponentTransfer,
+            FilterOp::DropShadow(shadow) => Filter::DropShadows(smallvec![shadow]),
+            FilterOp::Flood(color) => Filter::Flood(color),
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone, Copy, Debug, Eq, Hash, MallocSizeOf, PartialEq)]
+pub enum Swizzle {
+    Rgba,
+    Bgra,
+}
+
+impl Default for Swizzle {
+    fn default() -> Self {
+        Swizzle::Rgba
+    }
+}
+
+/// Swizzle settings of the texture cache.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone, Copy, Debug, Eq, Hash, MallocSizeOf, PartialEq)]
+pub struct SwizzleSettings {
+    /// Swizzle required on sampling a texture with BGRA8 format.
+    pub bgra8_sampling_swizzle: Swizzle,
+}
+
+/// An ID for a texture that is owned by the `texture_cache` module.
+///
+/// This can include atlases or standalone textures allocated via the texture
+/// cache (e.g.  if an image is too large to be added to an atlas). The texture
+/// cache manages the allocation and freeing of these IDs, and the rendering
+/// thread maintains a map from cache texture ID to native texture.
+///
+/// We never reuse IDs, so we use a u64 here to be safe.
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CacheTextureId(pub u32);
+
+impl CacheTextureId {
+    pub const INVALID: CacheTextureId = CacheTextureId(!0);
+}
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct DeferredResolveIndex(pub u32);
+
+/// Identifies the source of an input texture to a shader.
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum TextureSource {
+    /// Equivalent to `None`, allowing us to avoid using `Option`s everywhere.
+    Invalid,
+    /// An entry in the texture cache.
+    TextureCache(CacheTextureId, Swizzle),
+    /// An external image texture, mananged by the embedding.
+    External(DeferredResolveIndex, ImageBufferKind),
+    /// Select a dummy 1x1 white texture. This can be used by image
+    /// shaders that want to draw a solid color.
+    Dummy,
+}
+
+impl TextureSource {
+    pub fn image_buffer_kind(&self) -> ImageBufferKind {
+        match *self {
+            TextureSource::TextureCache(..) => ImageBufferKind::Texture2D,
+
+            TextureSource::External(_, image_buffer_kind) => image_buffer_kind,
+
+            // Render tasks use texture arrays for now.
+            TextureSource::Dummy => ImageBufferKind::Texture2D,
+
+            TextureSource::Invalid => ImageBufferKind::Texture2D,
+        }
+    }
+
+    #[inline]
+    pub fn is_compatible(
+        &self,
+        other: &TextureSource,
+    ) -> bool {
+        *self == TextureSource::Invalid ||
+        *other == TextureSource::Invalid ||
+        self == other
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTargetInfo {
+    pub has_depth: bool,
+}
+
+#[derive(Debug)]
+pub enum TextureUpdateSource {
+    External {
+        id: ExternalImageId,
+        channel_index: u8,
+    },
+    Bytes { data: Arc<Vec<u8>> },
+    /// Clears the target area, rather than uploading any pixels. Used when the
+    /// texture cache debug display is active.
+    DebugClear,
+}
+
+/// Command to allocate, reallocate, or free a texture for the texture cache.
+#[derive(Debug)]
+pub struct TextureCacheAllocation {
+    /// The virtual ID (i.e. distinct from device ID) of the texture.
+    pub id: CacheTextureId,
+    /// Details corresponding to the operation in question.
+    pub kind: TextureCacheAllocationKind,
+}
+
+/// A little bit of extra information to make memory reports more useful
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum TextureCacheCategory {
+    Atlas,
+    Standalone,
+    PictureTile,
+    RenderTarget,
+}
+
+/// Information used when allocating / reallocating.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub struct TextureCacheAllocInfo {
+    pub width: i32,
+    pub height: i32,
+    pub format: ImageFormat,
+    pub filter: TextureFilter,
+    pub target: ImageBufferKind,
+    /// Indicates whether this corresponds to one of the shared texture caches.
+    pub is_shared_cache: bool,
+    /// If true, this texture requires a depth target.
+    pub has_depth: bool,
+    pub category: TextureCacheCategory
+}
+
+/// Sub-operation-specific information for allocation operations.
+#[derive(Debug)]
+pub enum TextureCacheAllocationKind {
+    /// Performs an initial texture allocation.
+    Alloc(TextureCacheAllocInfo),
+    /// Reallocates the texture without preserving its contents.
+    Reset(TextureCacheAllocInfo),
+    /// Frees the texture and the corresponding cache ID.
+    Free,
+}
+
+/// Command to update the contents of the texture cache.
+#[derive(Debug)]
+pub struct TextureCacheUpdate {
+    pub rect: DeviceIntRect,
+    pub stride: Option<i32>,
+    pub offset: i32,
+    pub format_override: Option<ImageFormat>,
+    pub source: TextureUpdateSource,
+}
+
+/// Command to update the contents of the texture cache.
+#[derive(Debug)]
+pub struct TextureCacheCopy {
+    pub src_rect: DeviceIntRect,
+    pub dst_rect: DeviceIntRect,
+}
+
+/// Atomic set of commands to manipulate the texture cache, generated on the
+/// RenderBackend thread and executed on the Renderer thread.
+///
+/// The list of allocation operations is processed before the updates. This is
+/// important to allow coalescing of certain allocation operations.
+#[derive(Default)]
+pub struct TextureUpdateList {
+    /// Indicates that there was some kind of cleanup clear operation. Used for
+    /// sanity checks.
+    pub clears_shared_cache: bool,
+    /// Commands to alloc/realloc/free the textures. Processed first.
+    pub allocations: Vec<TextureCacheAllocation>,
+    /// Commands to update the contents of the textures. Processed second.
+    pub updates: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,
+    /// Commands to move items within the cache, these are applied before everything
+    /// else in the update list.
+    pub copies: FastHashMap<(CacheTextureId, CacheTextureId), Vec<TextureCacheCopy>>,
+}
+
+impl TextureUpdateList {
+    /// Mints a new `TextureUpdateList`.
+    pub fn new() -> Self {
+        TextureUpdateList {
+            clears_shared_cache: false,
+            allocations: Vec::new(),
+            updates: FastHashMap::default(),
+            copies: FastHashMap::default(),
+        }
+    }
+
+    /// Returns true if this is a no-op (no updates to be applied).
+    pub fn is_nop(&self) -> bool {
+        self.allocations.is_empty() && self.updates.is_empty()
+    }
+
+    /// Sets the clears_shared_cache flag for renderer-side sanity checks.
+    #[inline]
+    pub fn note_clear(&mut self) {
+        self.clears_shared_cache = true;
+    }
+
+    /// Pushes an update operation onto the list.
+    #[inline]
+    pub fn push_update(&mut self, id: CacheTextureId, update: TextureCacheUpdate) {
+        self.updates
+            .entry(id)
+            .or_default()
+            .push(update);
+    }
+
+    /// Sends a command to the Renderer to clear the portion of the shared region
+    /// we just freed. Used when the texture cache debugger is enabled.
+    #[cold]
+    pub fn push_debug_clear(
+        &mut self,
+        id: CacheTextureId,
+        origin: DeviceIntPoint,
+        width: i32,
+        height: i32,
+    ) {
+        let size = DeviceIntSize::new(width, height);
+        let rect = DeviceIntRect::from_origin_and_size(origin, size);
+        self.push_update(id, TextureCacheUpdate {
+            rect,
+            stride: None,
+            offset: 0,
+            format_override: None,
+            source: TextureUpdateSource::DebugClear,
+        });
+    }
+
+
+    /// Pushes an allocation operation onto the list.
+    pub fn push_alloc(&mut self, id: CacheTextureId, info: TextureCacheAllocInfo) {
+        debug_assert!(!self.allocations.iter().any(|x| x.id == id));
+        self.allocations.push(TextureCacheAllocation {
+            id,
+            kind: TextureCacheAllocationKind::Alloc(info),
+        });
+    }
+
+    /// Pushes a reallocation operation onto the list, potentially coalescing
+    /// with previous operations.
+    pub fn push_reset(&mut self, id: CacheTextureId, info: TextureCacheAllocInfo) {
+        self.debug_assert_coalesced(id);
+
+        // Drop any unapplied updates to the to-be-freed texture.
+        self.updates.remove(&id);
+
+        // Coallesce this realloc into a previous alloc or realloc, if available.
+        if let Some(cur) = self.allocations.iter_mut().find(|x| x.id == id) {
+            match cur.kind {
+                TextureCacheAllocationKind::Alloc(ref mut i) => *i = info,
+                TextureCacheAllocationKind::Reset(ref mut i) => *i = info,
+                TextureCacheAllocationKind::Free => panic!("Resetting freed texture"),
+            }
+            return
+        }
+
+        self.allocations.push(TextureCacheAllocation {
+            id,
+            kind: TextureCacheAllocationKind::Reset(info),
+        });
+    }
+
+    /// Pushes a free operation onto the list, potentially coalescing with
+    /// previous operations.
+    pub fn push_free(&mut self, id: CacheTextureId) {
+        self.debug_assert_coalesced(id);
+
+        // Drop any unapplied updates to the to-be-freed texture.
+        self.updates.remove(&id);
+
+        // Drop any allocations for it as well. If we happen to be allocating and
+        // freeing in the same batch, we can collapse them to a no-op.
+        let idx = self.allocations.iter().position(|x| x.id == id);
+        let removed_kind = idx.map(|i| self.allocations.remove(i).kind);
+        match removed_kind {
+            Some(TextureCacheAllocationKind::Alloc(..)) => { /* no-op! */ },
+            Some(TextureCacheAllocationKind::Free) => panic!("Double free"),
+            Some(TextureCacheAllocationKind::Reset(..)) |
+            None => {
+                self.allocations.push(TextureCacheAllocation {
+                    id,
+                    kind: TextureCacheAllocationKind::Free,
+                });
+            }
+        };
+    }
+
+    /// Push a copy operation from a texture to another.
+    ///
+    /// The source and destination rectangles must have the same size.
+    /// The copies are applied before every other operations in the
+    /// texture update list.
+    pub fn push_copy(
+        &mut self,
+        src_id: CacheTextureId, src_rect: &DeviceIntRect,
+        dst_id: CacheTextureId, dst_rect: &DeviceIntRect,
+    ) {
+        debug_assert_eq!(src_rect.size(), dst_rect.size());
+        self.copies.entry((src_id, dst_id))
+            .or_insert_with(Vec::new)
+            .push(TextureCacheCopy {
+                src_rect: *src_rect,
+                dst_rect: *dst_rect,
+            });
+    }
+
+    fn debug_assert_coalesced(&self, id: CacheTextureId) {
+        debug_assert!(
+            self.allocations.iter().filter(|x| x.id == id).count() <= 1,
+            "Allocations should have been coalesced",
+        );
+    }
+}
+
+/// A list of updates built by the render backend that should be applied
+/// by the renderer thread.
+pub struct ResourceUpdateList {
+    /// List of OS native surface create / destroy operations to apply.
+    pub native_surface_updates: Vec<NativeSurfaceOperation>,
+
+    /// Atomic set of texture cache updates to apply.
+    pub texture_updates: TextureUpdateList,
+}
+
+impl ResourceUpdateList {
+    /// Returns true if this update list has no effect.
+    pub fn is_nop(&self) -> bool {
+        self.texture_updates.is_nop() && self.native_surface_updates.is_empty()
+    }
+}
+
+/// Wraps a frame_builder::Frame, but conceptually could hold more information
+pub struct RenderedDocument {
+    pub frame: Frame,
+    pub is_new_scene: bool,
+    pub profile: TransactionProfile,
+    pub render_reasons: RenderReasons,
+    pub frame_stats: Option<FullFrameStats>
+}
+
+pub enum DebugOutput {
+    #[cfg(feature = "capture")]
+    SaveCapture(CaptureConfig, Vec<ExternalCaptureImage>),
+    #[cfg(feature = "replay")]
+    LoadCapture(CaptureConfig, Vec<PlainExternalImage>),
+}
+
+#[allow(dead_code)]
+pub enum ResultMsg {
+    DebugCommand(DebugCommand),
+    DebugOutput(DebugOutput),
+    RefreshShader(PathBuf),
+    UpdateGpuCache(GpuCacheUpdateList),
+    UpdateResources {
+        resource_updates: ResourceUpdateList,
+        memory_pressure: bool,
+    },
+    PublishPipelineInfo(PipelineInfo),
+    PublishDocument(
+        DocumentId,
+        RenderedDocument,
+        ResourceUpdateList,
+    ),
+    AppendNotificationRequests(Vec<NotificationRequest>),
+    SetParameter(Parameter),
+    ForceRedraw,
+}
+
+/// Primitive metadata we pass around in a bunch of places
+#[derive(Copy, Clone, Debug)]
+pub struct LayoutPrimitiveInfo {
+    /// NOTE: this is *ideally* redundant with the clip_rect
+    /// but that's an ongoing project, so for now it exists and is used :(
+    pub rect: LayoutRect,
+    pub clip_rect: LayoutRect,
+    pub flags: PrimitiveFlags,
+}
+
+impl LayoutPrimitiveInfo {
+    pub fn with_clip_rect(rect: LayoutRect, clip_rect: LayoutRect) -> Self {
+        Self {
+            rect,
+            clip_rect,
+            flags: PrimitiveFlags::default(),
+        }
+    }
+}
+
+// In some cases (e.g. printing) a pipeline is referenced multiple times by
+// a parent display list. This allows us to distinguish between them.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Copy, Clone, PartialEq, Debug, Eq, Hash)]
+pub struct PipelineInstanceId(u32);
+
+impl PipelineInstanceId {
+    pub fn new(id: u32) -> Self {
+        PipelineInstanceId(id)
+    }
+}
diff --git a/gfx/wr/webrender/src/lib.rs b/gfx/wr/webrender/src/lib.rs
new file mode 100644
index 0000000000..023cd7aad6
--- /dev/null
+++ b/gfx/wr/webrender/src/lib.rs
@@ -0,0 +1,194 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*!
+A GPU based renderer for the web.
+
+It serves as an experimental render backend for [Servo](https://servo.org/),
+but it can also be used as such in a standalone application.
+
+# External dependencies
+WebRender currently depends on [FreeType](https://www.freetype.org/)
+
+# Api Structure
+The main entry point to WebRender is the [`crate::Renderer`].
+
+By calling [`Renderer::new(...)`](crate::Renderer::new) you get a [`Renderer`], as well as
+a [`RenderApiSender`](api::RenderApiSender). Your [`Renderer`] is responsible to render the
+previously processed frames onto the screen.
+
+By calling [`yourRenderApiSender.create_api()`](api::RenderApiSender::create_api), you'll
+get a [`RenderApi`](api::RenderApi) instance, which is responsible for managing resources
+and documents. A worker thread is used internally to untie the workload from the application
+thread and therefore be able to make better use of multicore systems.
+
+## Frame
+
+What is referred to as a `frame`, is the current geometry on the screen.
+A new Frame is created by calling [`set_display_list()`](api::Transaction::set_display_list)
+on the [`RenderApi`](api::RenderApi). When the geometry is processed, the application will be
+informed via a [`RenderNotifier`](api::RenderNotifier), a callback which you pass to
+[`Renderer::new`].
+More information about [stacking contexts][stacking_contexts].
+
+[`set_display_list()`](api::Transaction::set_display_list) also needs to be supplied with
+[`BuiltDisplayList`](api::BuiltDisplayList)s. These are obtained by finalizing a
+[`DisplayListBuilder`](api::DisplayListBuilder). These are used to draw your geometry. But it
+doesn't only contain trivial geometry, it can also store another
+[`StackingContext`](api::StackingContext), as they're nestable.
+
+[stacking_contexts]: https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Positioning/Understanding_z_index/The_stacking_context
+*/
+
+#![cfg_attr(feature = "cargo-clippy", allow(clippy::unreadable_literal, clippy::new_without_default, clippy::too_many_arguments))]
+
+
+// Cribbed from the |matches| crate, for simplicity.
+macro_rules! matches {
+    ($expression:expr, $($pattern:tt)+) => {
+        match $expression {
+            $($pattern)+ => true,
+            _ => false
+        }
+    }
+}
+
+#[macro_use]
+extern crate bitflags;
+#[macro_use]
+extern crate lazy_static;
+#[macro_use]
+extern crate log;
+#[macro_use]
+extern crate malloc_size_of_derive;
+#[cfg(any(feature = "serde"))]
+#[macro_use]
+extern crate serde;
+#[macro_use]
+extern crate tracy_rs;
+#[macro_use]
+extern crate derive_more;
+extern crate malloc_size_of;
+extern crate svg_fmt;
+
+#[macro_use]
+mod profiler;
+mod telemetry;
+
+mod batch;
+mod border;
+mod box_shadow;
+#[cfg(any(feature = "capture", feature = "replay"))]
+mod capture;
+mod clip;
+mod space;
+mod spatial_tree;
+mod command_buffer;
+mod composite;
+mod compositor;
+mod debug_colors;
+mod debug_font_data;
+mod debug_item;
+mod device;
+mod ellipse;
+mod filterdata;
+mod frame_builder;
+mod freelist;
+mod glyph_cache;
+mod gpu_cache;
+mod gpu_types;
+mod hit_test;
+mod internal_types;
+mod lru_cache;
+mod picture;
+mod picture_graph;
+mod prepare;
+mod prim_store;
+mod print_tree;
+mod render_backend;
+mod render_target;
+mod render_task_graph;
+mod render_task_cache;
+mod render_task;
+mod renderer;
+mod resource_cache;
+mod scene;
+mod scene_builder_thread;
+mod scene_building;
+mod screen_capture;
+mod segment;
+mod spatial_node;
+mod surface;
+mod texture_pack;
+mod texture_cache;
+mod tile_cache;
+mod util;
+mod visibility;
+mod api_resources;
+mod image_tiling;
+mod image_source;
+mod rectangle_occlusion;
+mod picture_textures;
+
+///
+pub mod intern;
+///
+pub mod render_api;
+
+pub mod shader_source {
+    include!(concat!(env!("OUT_DIR"), "/shaders.rs"));
+}
+
+extern crate bincode;
+extern crate byteorder;
+pub extern crate euclid;
+extern crate fxhash;
+extern crate gleam;
+extern crate num_traits;
+extern crate plane_split;
+extern crate rayon;
+#[cfg(feature = "ron")]
+extern crate ron;
+#[macro_use]
+extern crate smallvec;
+extern crate time;
+#[cfg(all(feature = "capture", feature = "png"))]
+extern crate png;
+#[cfg(test)]
+extern crate rand;
+
+pub extern crate api;
+extern crate webrender_build;
+
+#[doc(hidden)]
+pub use crate::composite::{CompositorConfig, Compositor, CompositorCapabilities, CompositorSurfaceTransform};
+pub use crate::composite::{NativeSurfaceId, NativeTileId, NativeSurfaceInfo, PartialPresentCompositor};
+pub use crate::composite::{MappableCompositor, MappedTileInfo, SWGLCompositeSurfaceInfo, WindowVisibility};
+pub use crate::device::{UploadMethod, VertexUsageHint, get_gl_target, get_unoptimized_shader_source};
+pub use crate::device::{ProgramBinary, ProgramCache, ProgramCacheObserver, FormatDesc};
+pub use crate::device::Device;
+pub use crate::profiler::{ProfilerHooks, set_profiler_hooks};
+pub use crate::renderer::{
+    CpuProfile, DebugFlags, GpuProfile, GraphicsApi,
+    GraphicsApiInfo, PipelineInfo, Renderer, RendererError, RenderResults,
+    RendererStats, Shaders, SharedShaders, ShaderPrecacheFlags,
+    MAX_VERTEX_TEXTURE_WIDTH,
+};
+pub use crate::renderer::init::{WebRenderOptions, create_webrender_instance, AsyncPropertySampler, SceneBuilderHooks, ONE_TIME_USAGE_HINT};
+pub use crate::hit_test::SharedHitTester;
+pub use crate::internal_types::FastHashMap;
+pub use crate::screen_capture::{AsyncScreenshotHandle, RecordedFrameHandle};
+pub use crate::texture_cache::TextureCacheConfig;
+pub use api as webrender_api;
+pub use webrender_build::shader::ProgramSourceDigest;
+pub use crate::picture::{TileDescriptor, TileId, InvalidationReason};
+pub use crate::picture::{PrimitiveCompareResult, CompareHelperResult};
+pub use crate::picture::{TileNode, TileNodeKind, TileOffset};
+pub use crate::intern::ItemUid;
+pub use crate::render_api::*;
+pub use crate::tile_cache::{PictureCacheDebugInfo, DirtyTileDebugInfo, TileDebugInfo, SliceDebugInfo};
+pub use glyph_rasterizer;
+
+#[cfg(feature = "sw_compositor")]
+pub use crate::compositor::sw_compositor;
diff --git a/gfx/wr/webrender/src/lru_cache.rs b/gfx/wr/webrender/src/lru_cache.rs
new file mode 100644
index 0000000000..d53119b77d
--- /dev/null
+++ b/gfx/wr/webrender/src/lru_cache.rs
@@ -0,0 +1,675 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
+use std::{mem, num};
+
+/*
+  This module implements a least recently used cache structure, which is
+  used by the texture cache to manage the lifetime of items inside the
+  texture cache. It has a few special pieces of functionality that the
+  texture cache requires, but should be usable as a general LRU cache
+  type if useful in other areas.
+
+  The cache is implemented with two types of backing freelists. These allow
+  random access to the underlying data, while being efficient in both
+  memory access and allocation patterns.
+
+  The "entries" freelist stores the elements being cached (for example, the
+  CacheEntry structure for the texture cache). These elements are stored
+  in arbitrary order, reusing empty slots in the freelist where possible.
+
+  The "lru_index" freelists store the LRU tracking information. Although the
+  tracking elements are stored in arbitrary order inside a freelist for
+  efficiency, they use next/prev links to represent a doubly-linked list,
+  kept sorted in order of recent use. The next link is also used to store
+  the current freelist within the array when the element is not occupied.
+
+  The LRU cache allows having multiple LRU "partitions". Every entry is tracked
+  by exactly one partition at any time; all partitions refer to entries in the
+  shared freelist. Entries can move between partitions, if replace_or_insert is
+  called with a new partition index for an existing handle.
+  The partitioning is used by the texture cache so that, for example, allocating
+  more glyph entries does not cause eviction of image entries (which go into
+  a different shared texture). If an existing handle's entry is reallocated with
+  a new size, it might need to move from a shared texture to a standalone
+  texture; in this case the handle will move to a different LRU partition.
+ */
+
+/// Stores the data supplied by the user to be cached, and an index
+/// into the LRU tracking freelist for this element.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+struct LRUCacheEntry<T> {
+    /// The LRU partition that tracks this entry.
+    partition_index: u8,
+
+    /// The location of the LRU tracking element for this cache entry in the
+    /// right LRU partition.
+    lru_index: ItemIndex,
+
+    /// The cached data provided by the caller for this element.
+    value: T,
+}
+
+/// The main public interface to the LRU cache
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct LRUCache<T, M> {
+    /// A free list of cache entries, and indices into the LRU tracking list
+    entries: FreeList<LRUCacheEntry<T>, M>,
+    /// The LRU tracking list, allowing O(1) access to the oldest element
+    lru: Vec<LRUTracker<FreeListHandle<M>>>,
+}
+
+impl<T, M> LRUCache<T, M> {
+    /// Construct a new LRU cache
+    pub fn new(lru_partition_count: usize) -> Self {
+        assert!(lru_partition_count <= u8::MAX as usize + 1);
+        LRUCache {
+            entries: FreeList::new(),
+            lru: (0..lru_partition_count).map(|_| LRUTracker::new()).collect(),
+        }
+    }
+
+    /// Insert a new element into the cache. Returns a weak handle for callers to
+    /// access the data, since the lifetime is managed by the LRU algorithm and it
+    /// may be evicted at any time.
+    pub fn push_new(
+        &mut self,
+        partition_index: u8,
+        value: T,
+    ) -> WeakFreeListHandle<M> {
+        // It's a slightly awkward process to insert an element, since we don't know
+        // the index of the LRU tracking element until we've got a handle for the
+        // underlying cached data.
+
+        // Insert the data provided by the caller
+        let handle = self.entries.insert(LRUCacheEntry {
+            partition_index: 0,
+            lru_index: ItemIndex(num::NonZeroU32::new(1).unwrap()),
+            value
+        });
+
+        // Get a weak handle to return to the caller
+        let weak_handle = handle.weak();
+
+        // Add an LRU tracking node that owns the strong handle, and store the location
+        // of this inside the cache entry.
+        let entry = self.entries.get_mut(&handle);
+        let lru_index = self.lru[partition_index as usize].push_new(handle);
+        entry.partition_index = partition_index;
+        entry.lru_index = lru_index;
+
+        weak_handle
+    }
+
+    /// Get immutable access to the data at a given slot. Since this takes a weak
+    /// handle, it may have been evicted, so returns an Option.
+    pub fn get_opt(
+        &self,
+        handle: &WeakFreeListHandle<M>,
+    ) -> Option<&T> {
+        self.entries
+            .get_opt(handle)
+            .map(|entry| {
+                &entry.value
+            })
+    }
+
+    /// Get mutable access to the data at a given slot. Since this takes a weak
+    /// handle, it may have been evicted, so returns an Option.
+    pub fn get_opt_mut(
+        &mut self,
+        handle: &WeakFreeListHandle<M>,
+    ) -> Option<&mut T> {
+        self.entries
+            .get_opt_mut(handle)
+            .map(|entry| {
+                &mut entry.value
+            })
+    }
+
+    /// Return a reference to the oldest item in the cache, keeping it in the cache.
+    /// If the cache is empty, this will return None.
+    pub fn peek_oldest(&self, partition_index: u8) -> Option<&T> {
+        self.lru[partition_index as usize]
+            .peek_front()
+            .map(|handle| {
+                let entry = self.entries.get(handle);
+                &entry.value
+            })
+    }
+
+    /// Remove the oldest item from the cache. This is used to select elements to
+    /// be evicted. If the cache is empty, this will return None.
+    pub fn pop_oldest(
+        &mut self,
+        partition_index: u8,
+    ) -> Option<T> {
+        self.lru[partition_index as usize]
+            .pop_front()
+            .map(|handle| {
+                let entry = self.entries.free(handle);
+                entry.value
+            })
+    }
+
+    /// This is a special case of `push_new`, which is a requirement for the texture
+    /// cache. Sometimes, we want to replace the content of an existing handle if it
+    /// exists, or insert a new element if the handle is invalid (for example, if an
+    /// image is resized and it moves to a new location in the texture atlas). This
+    /// method returns the old cache entry if it existed, so it can be freed by the caller.
+    #[must_use]
+    pub fn replace_or_insert(
+        &mut self,
+        handle: &mut WeakFreeListHandle<M>,
+        partition_index: u8,
+        data: T,
+    ) -> Option<T> {
+        match self.entries.get_opt_mut(handle) {
+            Some(entry) => {
+                if entry.partition_index != partition_index {
+                    // Move to a different partition.
+                    let strong_handle = self.lru[entry.partition_index as usize].remove(entry.lru_index);
+                    let lru_index = self.lru[partition_index as usize].push_new(strong_handle);
+                    entry.partition_index = partition_index;
+                    entry.lru_index = lru_index;
+                }
+                Some(mem::replace(&mut entry.value, data))
+            }
+            None => {
+                *handle = self.push_new(partition_index, data);
+                None
+            }
+        }
+    }
+
+    /// Manually evict a specific item.
+    pub fn remove(&mut self, handle: &WeakFreeListHandle<M>) -> Option<T> {
+        if let Some(entry) = self.entries.get_opt_mut(handle) {
+            let strong_handle = self.lru[entry.partition_index as usize].remove(entry.lru_index);
+            return Some(self.entries.free(strong_handle).value);
+        }
+
+        None
+    }
+
+    /// This is used by the calling code to signal that the element that this handle
+    /// references has been used on this frame. Internally, it updates the links in
+    /// the LRU tracking element to move this item to the end of the LRU list. Returns
+    /// the underlying data in case the client wants to mutate it.
+    pub fn touch(
+        &mut self,
+        handle: &WeakFreeListHandle<M>,
+    ) -> Option<&mut T> {
+        let lru = &mut self.lru;
+
+        self.entries
+            .get_opt_mut(handle)
+            .map(|entry| {
+                lru[entry.partition_index as usize].mark_used(entry.lru_index);
+                &mut entry.value
+            })
+    }
+
+    /// Try to validate that the state of the cache is consistent
+    #[cfg(test)]
+    fn validate(&self) {
+        for lru in &self.lru {
+            lru.validate();
+        }
+    }
+}
+
+/// Index of an LRU tracking element
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, MallocSizeOf)]
+struct ItemIndex(num::NonZeroU32);
+
+impl ItemIndex {
+    fn as_usize(&self) -> usize {
+        self.0.get() as usize
+    }
+}
+
+/// Stores a strong handle controlling the lifetime of the data in the LRU
+/// cache, and a doubly-linked list node specifying where in the current LRU
+/// order this element exists. These items are themselves backed by a freelist
+/// to minimize heap allocations and improve cache access patterns.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, MallocSizeOf)]
+struct Item<H> {
+    prev: Option<ItemIndex>,
+    next: Option<ItemIndex>,
+    handle: Option<H>,
+}
+
+/// Internal implementation of the LRU tracking list
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+struct LRUTracker<H> {
+    /// Current head of the list - this is the oldest item that will be evicted next.
+    head: Option<ItemIndex>,
+    /// Current tail of the list - this is the most recently used element.
+    tail: Option<ItemIndex>,
+    /// As tracking items are removed, they are stored in a freelist, to minimize heap allocations
+    free_list_head: Option<ItemIndex>,
+    /// The freelist that stores all the LRU tracking items
+    items: Vec<Item<H>>,
+}
+
+impl<H> LRUTracker<H> where H: std::fmt::Debug {
+    /// Construct a new LRU tracker
+    fn new() -> Self {
+        // Push a dummy entry in the vec that is never used. This ensures the NonZeroU32
+        // property is respected, and we never create an ItemIndex(0).
+        let items = vec![
+            Item {
+                prev: None,
+                next: None,
+                handle: None,
+            },
+        ];
+
+        LRUTracker {
+            head: None,
+            tail: None,
+            free_list_head: None,
+            items,
+        }
+    }
+
+    /// Internal function that takes an item index, and links it to the
+    /// end of the tracker list (makes it the newest item).
+    fn link_as_new_tail(
+        &mut self,
+        item_index: ItemIndex,
+    ) {
+        match (self.head, self.tail) {
+            (Some(..), Some(tail)) => {
+                // Both a head and a tail
+                self.items[item_index.as_usize()].prev = Some(tail);
+                self.items[item_index.as_usize()].next = None;
+
+                self.items[tail.as_usize()].next = Some(item_index);
+                self.tail = Some(item_index);
+            }
+            (None, None) => {
+                // No head/tail, currently empty list
+                self.items[item_index.as_usize()].prev = None;
+                self.items[item_index.as_usize()].next = None;
+
+                self.head = Some(item_index);
+                self.tail = Some(item_index);
+            }
+            (Some(..), None) | (None, Some(..)) => {
+                // Invalid state
+                unreachable!();
+            }
+        }
+    }
+
+    /// Internal function that takes an LRU item index, and removes it from
+    /// the current doubly linked list. Used during removal of items, and also
+    /// when items are moved to the back of the list as they're touched.
+    fn unlink(
+        &mut self,
+        item_index: ItemIndex,
+    ) {
+        let (next, prev) = {
+            let item = &self.items[item_index.as_usize()];
+            (item.next, item.prev)
+        };
+
+        match next {
+            Some(next) => {
+                self.items[next.as_usize()].prev = prev;
+            }
+            None => {
+                debug_assert_eq!(self.tail, Some(item_index));
+                self.tail = prev;
+            }
+        }
+
+        match prev {
+            Some(prev) => {
+                self.items[prev.as_usize()].next = next;
+            }
+            None => {
+                debug_assert_eq!(self.head, Some(item_index));
+                self.head = next;
+            }
+        }
+    }
+
+    /// Push a new LRU tracking item on to the back of the list, marking
+    /// it as the most recent item.
+    fn push_new(
+        &mut self,
+        handle: H,
+    ) -> ItemIndex {
+        // See if there is a slot available in the current free list
+        let item_index = match self.free_list_head {
+            Some(index) => {
+                // Reuse an existing slot
+                let item = &mut self.items[index.as_usize()];
+
+                assert!(item.handle.is_none());
+                item.handle = Some(handle);
+
+                self.free_list_head = item.next;
+
+                index
+            }
+            None => {
+                // No free slots available, push to the end of the array
+                let index = ItemIndex(num::NonZeroU32::new(self.items.len() as u32).unwrap());
+
+                self.items.push(Item {
+                    prev: None,
+                    next: None,
+                    handle: Some(handle),
+                });
+
+                index
+            }
+        };
+
+        // Now link this element into the LRU list
+        self.link_as_new_tail(item_index);
+
+        item_index
+    }
+
+    /// Returns a reference to the oldest element, or None if the list is empty.
+    fn peek_front(&self) -> Option<&H> {
+        self.head.map(|head| self.items[head.as_usize()].handle.as_ref().unwrap())
+    }
+
+    /// Remove the oldest element from the front of the LRU list. Returns None
+    /// if the list is empty.
+    fn pop_front(
+        &mut self,
+    ) -> Option<H> {
+        let handle = match (self.head, self.tail) {
+            (Some(head), Some(tail)) => {
+                let item_index = head;
+
+                // Head and tail are the same - removing the only element
+                if head == tail {
+                    self.head = None;
+                    self.tail = None;
+                } else {
+                    // Update the head of the list, popping the first element off
+                    let new_head = self.items[head.as_usize()].next.unwrap();
+                    self.head = Some(new_head);
+                    self.items[new_head.as_usize()].prev = None;
+                }
+
+                // Add this item to the freelist for later use
+                self.items[item_index.as_usize()].next = self.free_list_head;
+                self.free_list_head = Some(item_index);
+
+                // Return the handle to the user
+                Some(self.items[item_index.as_usize()].handle.take().unwrap())
+            }
+            (None, None) => {
+                // List is empty
+                None
+            }
+            (Some(..), None) | (None, Some(..)) => {
+                // Invalid state
+                unreachable!();
+            }
+        };
+
+        handle
+    }
+
+    /// Manually remove an item from the LRU tracking list. This is used
+    /// when an element switches from one LRU partition to a different one.
+    fn remove(
+        &mut self,
+        index: ItemIndex,
+    ) -> H {
+        // Remove from the LRU list
+        self.unlink(index);
+
+        let handle = self.items[index.as_usize()].handle.take().unwrap();
+
+        // Add LRU item to the freelist for future use.
+        self.items[index.as_usize()].next = self.free_list_head;
+        self.free_list_head = Some(index);
+
+        handle
+    }
+
+    /// Called to mark that an item was used on this frame. It unlinks the
+    /// tracking item, and then re-links it to the back of the list.
+    fn mark_used(
+        &mut self,
+        index: ItemIndex,
+    ) {
+        self.unlink(index);
+        self.link_as_new_tail(index);
+    }
+
+    /// Try to validate that the state of the linked lists are consistent
+    #[cfg(test)]
+    fn validate(&self) {
+        use std::collections::HashSet;
+
+        // Must have a valid head/tail or be empty
+        assert!((self.head.is_none() && self.tail.is_none()) || (self.head.is_some() && self.tail.is_some()));
+
+        // If there is a head, the prev of the head must be none
+        if let Some(head) = self.head {
+            assert!(self.items[head.as_usize()].prev.is_none());
+        }
+
+        // If there is a tail, the next of the tail must be none
+        if let Some(tail) = self.tail {
+            assert!(self.items[tail.as_usize()].next.is_none());
+        }
+
+        // Collect all free and valid items, both in forwards and reverse order
+        let mut free_items = Vec::new();
+        let mut free_items_set = HashSet::new();
+        let mut valid_items_front = Vec::new();
+        let mut valid_items_front_set = HashSet::new();
+        let mut valid_items_reverse = Vec::new();
+        let mut valid_items_reverse_set = HashSet::new();
+
+        let mut current = self.free_list_head;
+        while let Some(index) = current {
+            let item = &self.items[index.as_usize()];
+            free_items.push(index);
+            assert!(free_items_set.insert(index));
+            current = item.next;
+        }
+
+        current = self.head;
+        while let Some(index) = current {
+            let item = &self.items[index.as_usize()];
+            valid_items_front.push(index);
+            assert!(valid_items_front_set.insert(index));
+            current = item.next;
+        }
+
+        current = self.tail;
+        while let Some(index) = current {
+            let item = &self.items[index.as_usize()];
+            valid_items_reverse.push(index);
+            assert!(!valid_items_reverse_set.contains(&index));
+            valid_items_reverse_set.insert(index);
+            current = item.prev;
+        }
+
+        // Ensure set lengths match the vec lengths (should be enforced by the assert check during insert anyway)
+        assert_eq!(valid_items_front.len(), valid_items_front_set.len());
+        assert_eq!(valid_items_reverse.len(), valid_items_reverse_set.len());
+
+        // Length of the array should equal free + valid items count + 1 (dummy entry)
+        assert_eq!(free_items.len() + valid_items_front.len() + 1, self.items.len());
+
+        // Should be same number of items whether iterating forwards or reverse
+        assert_eq!(valid_items_front.len(), valid_items_reverse.len());
+
+        // Ensure there are no items considered in the free list that are also in the valid list
+        assert!(free_items_set.intersection(&valid_items_reverse_set).collect::<HashSet<_>>().is_empty());
+        assert!(free_items_set.intersection(&valid_items_front_set).collect::<HashSet<_>>().is_empty());
+
+        // Should be the same number of items regardless of iteration direction
+        assert_eq!(valid_items_front_set.len(), valid_items_reverse_set.len());
+
+        // Ensure that the ordering is exactly the same, regardless of iteration direction
+        for (i0, i1) in valid_items_front.iter().zip(valid_items_reverse.iter().rev()) {
+            assert_eq!(i0, i1);
+        }
+    }
+}
+
+#[test]
+fn test_lru_tracker_push_peek() {
+    // Push elements, peek and ensure:
+    // - peek_oldest returns None before first element pushed
+    // - peek_oldest returns oldest element
+    // - subsequent calls to peek_oldest return same element (nothing was removed)
+    struct CacheMarker;
+    const NUM_ELEMENTS: usize = 50;
+
+    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new(1);
+    cache.validate();
+
+    assert_eq!(cache.peek_oldest(0), None);
+
+    for i in 0 .. NUM_ELEMENTS {
+        cache.push_new(0, i);
+    }
+    cache.validate();
+
+    assert_eq!(cache.peek_oldest(0), Some(&0));
+    assert_eq!(cache.peek_oldest(0), Some(&0));
+
+    cache.pop_oldest(0);
+    assert_eq!(cache.peek_oldest(0), Some(&1));
+}
+
+#[test]
+fn test_lru_tracker_push_pop() {
+    // Push elements, pop them all off and ensure:
+    // - Returned in oldest order
+    // - pop_oldest returns None after last element popped
+    struct CacheMarker;
+    const NUM_ELEMENTS: usize = 50;
+
+    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new(1);
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS {
+        cache.push_new(0, i);
+    }
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS {
+        assert_eq!(cache.pop_oldest(0), Some(i));
+    }
+    cache.validate();
+
+    assert_eq!(cache.pop_oldest(0), None);
+}
+
+#[test]
+fn test_lru_tracker_push_touch_pop() {
+    // Push elements, touch even handles, pop them all off and ensure:
+    // - Returned in correct order
+    // - pop_oldest returns None after last element popped
+    struct CacheMarker;
+    const NUM_ELEMENTS: usize = 50;
+
+    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new(1);
+    let mut handles = Vec::new();
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS {
+        handles.push(cache.push_new(0, i));
+    }
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS/2 {
+        cache.touch(&handles[i*2]);
+    }
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS/2 {
+        assert_eq!(cache.pop_oldest(0), Some(i*2+1));
+    }
+    cache.validate();
+    for i in 0 .. NUM_ELEMENTS/2 {
+        assert_eq!(cache.pop_oldest(0), Some(i*2));
+    }
+    cache.validate();
+
+    assert_eq!(cache.pop_oldest(0), None);
+}
+
+#[test]
+fn test_lru_tracker_push_get() {
+    // Push elements, ensure:
+    // - get access via weak handles works
+    struct CacheMarker;
+    const NUM_ELEMENTS: usize = 50;
+
+    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new(1);
+    let mut handles = Vec::new();
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS {
+        handles.push(cache.push_new(0, i));
+    }
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS/2 {
+        assert!(cache.get_opt(&handles[i]) == Some(&i));
+    }
+    cache.validate();
+}
+
+#[test]
+fn test_lru_tracker_push_replace_get() {
+    // Push elements, replace contents, ensure:
+    // - each element was replaced with new data correctly
+    // - replace_or_insert works for invalid handles
+    struct CacheMarker;
+    const NUM_ELEMENTS: usize = 50;
+
+    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new(1);
+    let mut handles = Vec::new();
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS {
+        handles.push(cache.push_new(0, i));
+    }
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS {
+        assert_eq!(cache.replace_or_insert(&mut handles[i], 0, i * 2), Some(i));
+    }
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS/2 {
+        assert!(cache.get_opt(&handles[i]) == Some(&(i * 2)));
+    }
+    cache.validate();
+
+    let mut empty_handle = WeakFreeListHandle::invalid();
+    assert_eq!(cache.replace_or_insert(&mut empty_handle, 0, 100), None);
+    assert_eq!(cache.get_opt(&empty_handle), Some(&100));
+}
diff --git a/gfx/wr/webrender/src/picture.rs b/gfx/wr/webrender/src/picture.rs
new file mode 100644
index 0000000000..31d14b7243
--- /dev/null
+++ b/gfx/wr/webrender/src/picture.rs
@@ -0,0 +1,7379 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! A picture represents a dynamically rendered image.
+//!
+//! # Overview
+//!
+//! Pictures consists of:
+//!
+//! - A number of primitives that are drawn onto the picture.
+//! - A composite operation describing how to composite this
+//!   picture into its parent.
+//! - A configuration describing how to draw the primitives on
+//!   this picture (e.g. in screen space or local space).
+//!
+//! The tree of pictures are generated during scene building.
+//!
+//! Depending on their composite operations pictures can be rendered into
+//! intermediate targets or folded into their parent picture.
+//!
+//! ## Picture caching
+//!
+//! Pictures can be cached to reduce the amount of rasterization happening per
+//! frame.
+//!
+//! When picture caching is enabled, the scene is cut into a small number of slices,
+//! typically:
+//!
+//! - content slice
+//! - UI slice
+//! - background UI slice which is hidden by the other two slices most of the time.
+//!
+//! Each of these slice is made up of fixed-size large tiles of 2048x512 pixels
+//! (or 128x128 for the UI slice).
+//!
+//! Tiles can be either cached rasterized content into a texture or "clear tiles"
+//! that contain only a solid color rectangle rendered directly during the composite
+//! pass.
+//!
+//! ## Invalidation
+//!
+//! Each tile keeps track of the elements that affect it, which can be:
+//!
+//! - primitives
+//! - clips
+//! - image keys
+//! - opacity bindings
+//! - transforms
+//!
+//! These dependency lists are built each frame and compared to the previous frame to
+//! see if the tile changed.
+//!
+//! The tile's primitive dependency information is organized in a quadtree, each node
+//! storing an index buffer of tile primitive dependencies.
+//!
+//! The union of the invalidated leaves of each quadtree produces a per-tile dirty rect
+//! which defines the scissor rect used when replaying the tile's drawing commands and
+//! can be used for partial present.
+//!
+//! ## Display List shape
+//!
+//! WR will first look for an iframe item in the root stacking context to apply
+//! picture caching to. If that's not found, it will apply to the entire root
+//! stacking context of the display list. Apart from that, the format of the
+//! display list is not important to picture caching. Each time a new scroll root
+//! is encountered, a new picture cache slice will be created. If the display
+//! list contains more than some arbitrary number of slices (currently 8), the
+//! content will all be squashed into a single slice, in order to save GPU memory
+//! and compositing performance.
+//!
+//! ## Compositor Surfaces
+//!
+//! Sometimes, a primitive would prefer to exist as a native compositor surface.
+//! This allows a large and/or regularly changing primitive (such as a video, or
+//! webgl canvas) to be updated each frame without invalidating the content of
+//! tiles, and can provide a significant performance win and battery saving.
+//!
+//! Since drawing a primitive as a compositor surface alters the ordering of
+//! primitives in a tile, we use 'overlay tiles' to ensure correctness. If a
+//! tile has a compositor surface, _and_ that tile has primitives that overlap
+//! the compositor surface rect, the tile switches to be drawn in alpha mode.
+//!
+//! We rely on only promoting compositor surfaces that are opaque primitives.
+//! With this assumption, the tile(s) that intersect the compositor surface get
+//! a 'cutout' in the rectangle where the compositor surface exists (not the
+//! entire tile), allowing that tile to be drawn as an alpha tile after the
+//! compositor surface.
+//!
+//! Tiles are only drawn in overlay mode if there is content that exists on top
+//! of the compositor surface. Otherwise, we can draw the tiles in the normal fast
+//! path before the compositor surface is drawn. Use of the per-tile valid and
+//! dirty rects ensure that we do a minimal amount of per-pixel work here to
+//! blend the overlay tile (this is not always optimal right now, but will be
+//! improved as a follow up).
+
+use api::{MixBlendMode, PremultipliedColorF, FilterPrimitiveKind};
+use api::{PropertyBinding, PropertyBindingId, FilterPrimitive, RasterSpace};
+use api::{DebugFlags, ImageKey, ColorF, ColorU, PrimitiveFlags};
+use api::{ImageRendering, ColorDepth, YuvRangedColorSpace, YuvFormat, AlphaType};
+use api::units::*;
+use crate::command_buffer::PrimitiveCommand;
+use crate::box_shadow::BLUR_SAMPLE_SCALE;
+use crate::clip::{ClipStore, ClipChainInstance, ClipLeafId, ClipNodeId, ClipTreeBuilder};
+use crate::spatial_tree::{SpatialTree, CoordinateSpaceMapping, SpatialNodeIndex, VisibleFace};
+use crate::composite::{CompositorKind, CompositeState, NativeSurfaceId, NativeTileId, CompositeTileSurface, tile_kind};
+use crate::composite::{ExternalSurfaceDescriptor, ExternalSurfaceDependency, CompositeTileDescriptor, CompositeTile};
+use crate::composite::{CompositorTransformIndex};
+use crate::debug_colors;
+use euclid::{vec3, Point2D, Scale, Vector2D, Box2D};
+use euclid::approxeq::ApproxEq;
+use crate::filterdata::SFilterData;
+use crate::intern::ItemUid;
+use crate::internal_types::{FastHashMap, FastHashSet, PlaneSplitter, Filter, FrameId};
+use crate::internal_types::{PlaneSplitterIndex, PlaneSplitAnchor, TextureSource};
+use crate::frame_builder::{FrameBuildingContext, FrameBuildingState, PictureState, PictureContext};
+use crate::gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
+use crate::gpu_types::{UvRectKind, ZBufferId};
+use plane_split::{Clipper, Polygon};
+use crate::prim_store::{PrimitiveTemplateKind, PictureIndex, PrimitiveInstance, PrimitiveInstanceKind};
+use crate::prim_store::{ColorBindingStorage, ColorBindingIndex, PrimitiveScratchBuffer};
+use crate::print_tree::{PrintTree, PrintTreePrinter};
+use crate::render_backend::DataStores;
+use crate::render_task_graph::RenderTaskId;
+use crate::render_target::RenderTargetKind;
+use crate::render_task::{BlurTask, RenderTask, RenderTaskLocation, BlurTaskCache};
+use crate::render_task::{StaticRenderTaskSurface, RenderTaskKind};
+use crate::renderer::BlendMode;
+use crate::resource_cache::{ResourceCache, ImageGeneration, ImageRequest};
+use crate::space::SpaceMapper;
+use crate::scene::SceneProperties;
+use crate::spatial_tree::CoordinateSystemId;
+use crate::surface::{SurfaceDescriptor, SurfaceTileDescriptor};
+use smallvec::SmallVec;
+use std::{mem, u8, marker, u32};
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::collections::hash_map::Entry;
+use std::ops::Range;
+use crate::picture_textures::PictureCacheTextureHandle;
+use crate::util::{MaxRect, VecHelper, MatrixHelpers, Recycler, ScaleOffset};
+use crate::filterdata::{FilterDataHandle};
+use crate::tile_cache::{SliceDebugInfo, TileDebugInfo, DirtyTileDebugInfo};
+use crate::visibility::{PrimitiveVisibilityFlags, FrameVisibilityContext};
+use crate::visibility::{VisibilityState, FrameVisibilityState};
+use crate::scene_building::{SliceFlags};
+
+// Maximum blur radius for blur filter (different than box-shadow blur).
+// Taken from FilterNodeSoftware.cpp in Gecko.
+const MAX_BLUR_RADIUS: f32 = 100.;
+
+/// Specify whether a surface allows subpixel AA text rendering.
+#[derive(Debug, Copy, Clone)]
+pub enum SubpixelMode {
+    /// This surface allows subpixel AA text
+    Allow,
+    /// Subpixel AA text cannot be drawn on this surface
+    Deny,
+    /// Subpixel AA can be drawn on this surface, if not intersecting
+    /// with the excluded regions, and inside the allowed rect.
+    Conditional {
+        allowed_rect: PictureRect,
+    },
+}
+
+/// A comparable transform matrix, that compares with epsilon checks.
+#[derive(Debug, Clone)]
+struct MatrixKey {
+    m: [f32; 16],
+}
+
+impl PartialEq for MatrixKey {
+    fn eq(&self, other: &Self) -> bool {
+        const EPSILON: f32 = 0.001;
+
+        // TODO(gw): It's possible that we may need to adjust the epsilon
+        //           to be tighter on most of the matrix, except the
+        //           translation parts?
+        for (i, j) in self.m.iter().zip(other.m.iter()) {
+            if !i.approx_eq_eps(j, &EPSILON) {
+                return false;
+            }
+        }
+
+        true
+    }
+}
+
+/// A comparable scale-offset, that compares with epsilon checks.
+#[derive(Debug, Clone)]
+struct ScaleOffsetKey {
+    sx: f32,
+    sy: f32,
+    tx: f32,
+    ty: f32,
+}
+
+impl PartialEq for ScaleOffsetKey {
+    fn eq(&self, other: &Self) -> bool {
+        const EPSILON: f32 = 0.001;
+
+        self.sx.approx_eq_eps(&other.sx, &EPSILON) &&
+        self.sy.approx_eq_eps(&other.sy, &EPSILON) &&
+        self.tx.approx_eq_eps(&other.tx, &EPSILON) &&
+        self.ty.approx_eq_eps(&other.ty, &EPSILON)
+    }
+}
+
+/// A comparable / hashable version of a coordinate space mapping. Used to determine
+/// if a transform dependency for a tile has changed.
+#[derive(Debug, PartialEq, Clone)]
+enum TransformKey {
+    Local,
+    ScaleOffset {
+        so: ScaleOffsetKey,
+    },
+    Transform {
+        m: MatrixKey,
+    }
+}
+
+impl<Src, Dst> From<CoordinateSpaceMapping<Src, Dst>> for TransformKey {
+    fn from(transform: CoordinateSpaceMapping<Src, Dst>) -> TransformKey {
+        match transform {
+            CoordinateSpaceMapping::Local => {
+                TransformKey::Local
+            }
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
+                TransformKey::ScaleOffset {
+                    so: ScaleOffsetKey {
+                        sx: scale_offset.scale.x,
+                        sy: scale_offset.scale.y,
+                        tx: scale_offset.offset.x,
+                        ty: scale_offset.offset.y,
+                    }
+                }
+            }
+            CoordinateSpaceMapping::Transform(ref m) => {
+                TransformKey::Transform {
+                    m: MatrixKey {
+                        m: m.to_array(),
+                    },
+                }
+            }
+        }
+    }
+}
+
+/// Unit for tile coordinates.
+#[derive(Hash, Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
+pub struct TileCoordinate;
+
+// Geometry types for tile coordinates.
+pub type TileOffset = Point2D<i32, TileCoordinate>;
+pub type TileRect = Box2D<i32, TileCoordinate>;
+
+/// The maximum number of compositor surfaces that are allowed per picture cache. This
+/// is an arbitrary number that should be enough for common cases, but low enough to
+/// prevent performance and memory usage drastically degrading in pathological cases.
+const MAX_COMPOSITOR_SURFACES: usize = 4;
+
+/// The size in device pixels of a normal cached tile.
+pub const TILE_SIZE_DEFAULT: DeviceIntSize = DeviceIntSize {
+    width: 1024,
+    height: 512,
+    _unit: marker::PhantomData,
+};
+
+/// The size in device pixels of a tile for horizontal scroll bars
+pub const TILE_SIZE_SCROLLBAR_HORIZONTAL: DeviceIntSize = DeviceIntSize {
+    width: 1024,
+    height: 32,
+    _unit: marker::PhantomData,
+};
+
+/// The size in device pixels of a tile for vertical scroll bars
+pub const TILE_SIZE_SCROLLBAR_VERTICAL: DeviceIntSize = DeviceIntSize {
+    width: 32,
+    height: 1024,
+    _unit: marker::PhantomData,
+};
+
+/// The maximum size per axis of a surface,
+///  in WorldPixel coordinates.
+const MAX_SURFACE_SIZE: usize = 4096;
+/// Maximum size of a compositor surface.
+const MAX_COMPOSITOR_SURFACES_SIZE: f32 = 8192.0;
+
+/// The maximum number of sub-dependencies (e.g. clips, transforms) we can handle
+/// per-primitive. If a primitive has more than this, it will invalidate every frame.
+const MAX_PRIM_SUB_DEPS: usize = u8::MAX as usize;
+
+/// Used to get unique tile IDs, even when the tile cache is
+/// destroyed between display lists / scenes.
+static NEXT_TILE_ID: AtomicUsize = AtomicUsize::new(0);
+
+fn clamp(value: i32, low: i32, high: i32) -> i32 {
+    value.max(low).min(high)
+}
+
+fn clampf(value: f32, low: f32, high: f32) -> f32 {
+    value.max(low).min(high)
+}
+
+/// An index into the prims array in a TileDescriptor.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveDependencyIndex(pub u32);
+
+/// Information about the state of a binding.
+#[derive(Debug)]
+pub struct BindingInfo<T> {
+    /// The current value retrieved from dynamic scene properties.
+    value: T,
+    /// True if it was changed (or is new) since the last frame build.
+    changed: bool,
+}
+
+/// Information stored in a tile descriptor for a binding.
+#[derive(Debug, PartialEq, Clone, Copy)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum Binding<T> {
+    Value(T),
+    Binding(PropertyBindingId),
+}
+
+impl<T> From<PropertyBinding<T>> for Binding<T> {
+    fn from(binding: PropertyBinding<T>) -> Binding<T> {
+        match binding {
+            PropertyBinding::Binding(key, _) => Binding::Binding(key.id),
+            PropertyBinding::Value(value) => Binding::Value(value),
+        }
+    }
+}
+
+pub type OpacityBinding = Binding<f32>;
+pub type OpacityBindingInfo = BindingInfo<f32>;
+
+pub type ColorBinding = Binding<ColorU>;
+pub type ColorBindingInfo = BindingInfo<ColorU>;
+
+/// A dependency for a transform is defined by the spatial node index + frame it was used
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SpatialNodeKey {
+    spatial_node_index: SpatialNodeIndex,
+    frame_id: FrameId,
+}
+
+/// A helper for comparing spatial nodes between frames. The comparisons
+/// are done by-value, so that if the shape of the spatial node tree
+/// changes, invalidations aren't done simply due to the spatial node
+/// index changing between display lists.
+struct SpatialNodeComparer {
+    /// The root spatial node index of the tile cache
+    ref_spatial_node_index: SpatialNodeIndex,
+    /// Maintains a map of currently active transform keys
+    spatial_nodes: FastHashMap<SpatialNodeKey, TransformKey>,
+    /// A cache of recent comparisons between prev and current spatial nodes
+    compare_cache: FastHashMap<(SpatialNodeKey, SpatialNodeKey), bool>,
+    /// A set of frames that we need to retain spatial node entries for
+    referenced_frames: FastHashSet<FrameId>,
+}
+
+impl SpatialNodeComparer {
+    /// Construct a new comparer
+    fn new() -> Self {
+        SpatialNodeComparer {
+            ref_spatial_node_index: SpatialNodeIndex::INVALID,
+            spatial_nodes: FastHashMap::default(),
+            compare_cache: FastHashMap::default(),
+            referenced_frames: FastHashSet::default(),
+        }
+    }
+
+    /// Advance to the next frame
+    fn next_frame(
+        &mut self,
+        ref_spatial_node_index: SpatialNodeIndex,
+    ) {
+        // Drop any node information for unreferenced frames, to ensure that the
+        // hashmap doesn't grow indefinitely!
+        let referenced_frames = &self.referenced_frames;
+        self.spatial_nodes.retain(|key, _| {
+            referenced_frames.contains(&key.frame_id)
+        });
+
+        // Update the root spatial node for this comparer
+        self.ref_spatial_node_index = ref_spatial_node_index;
+        self.compare_cache.clear();
+        self.referenced_frames.clear();
+    }
+
+    /// Register a transform that is used, and build the transform key for it if new.
+    fn register_used_transform(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        frame_id: FrameId,
+        spatial_tree: &SpatialTree,
+    ) {
+        let key = SpatialNodeKey {
+            spatial_node_index,
+            frame_id,
+        };
+
+        if let Entry::Vacant(entry) = self.spatial_nodes.entry(key) {
+            entry.insert(
+                get_transform_key(
+                    spatial_node_index,
+                    self.ref_spatial_node_index,
+                    spatial_tree,
+                )
+            );
+        }
+    }
+
+    /// Return true if the transforms for two given spatial nodes are considered equivalent
+    fn are_transforms_equivalent(
+        &mut self,
+        prev_spatial_node_key: &SpatialNodeKey,
+        curr_spatial_node_key: &SpatialNodeKey,
+    ) -> bool {
+        let key = (*prev_spatial_node_key, *curr_spatial_node_key);
+        let spatial_nodes = &self.spatial_nodes;
+
+        *self.compare_cache
+            .entry(key)
+            .or_insert_with(|| {
+                let prev = &spatial_nodes[&prev_spatial_node_key];
+                let curr = &spatial_nodes[&curr_spatial_node_key];
+                curr == prev
+            })
+    }
+
+    /// Ensure that the comparer won't GC any nodes for a given frame id
+    fn retain_for_frame(&mut self, frame_id: FrameId) {
+        self.referenced_frames.insert(frame_id);
+    }
+}
+
+// Immutable context passed to picture cache tiles during pre_update
+struct TilePreUpdateContext {
+    /// Maps from picture cache coords -> world space coords.
+    pic_to_world_mapper: SpaceMapper<PicturePixel, WorldPixel>,
+
+    /// The optional background color of the picture cache instance
+    background_color: Option<ColorF>,
+
+    /// The visible part of the screen in world coords.
+    global_screen_world_rect: WorldRect,
+
+    /// Current size of tiles in picture units.
+    tile_size: PictureSize,
+
+    /// The current frame id for this picture cache
+    frame_id: FrameId,
+}
+
+// Immutable context passed to picture cache tiles during update_dirty_and_valid_rects
+struct TileUpdateDirtyContext<'a> {
+    /// Maps from picture cache coords -> world space coords.
+    pic_to_world_mapper: SpaceMapper<PicturePixel, WorldPixel>,
+
+    /// Global scale factor from world -> device pixels.
+    global_device_pixel_scale: DevicePixelScale,
+
+    /// Information about opacity bindings from the picture cache.
+    opacity_bindings: &'a FastHashMap<PropertyBindingId, OpacityBindingInfo>,
+
+    /// Information about color bindings from the picture cache.
+    color_bindings: &'a FastHashMap<PropertyBindingId, ColorBindingInfo>,
+
+    /// The local rect of the overall picture cache
+    local_rect: PictureRect,
+
+    /// If true, the scale factor of the root transform for this picture
+    /// cache changed, so we need to invalidate the tile and re-render.
+    invalidate_all: bool,
+}
+
+// Mutable state passed to picture cache tiles during update_dirty_and_valid_rects
+struct TileUpdateDirtyState<'a> {
+    /// Allow access to the texture cache for requesting tiles
+    resource_cache: &'a mut ResourceCache,
+
+    /// Current configuration and setup for compositing all the picture cache tiles in renderer.
+    composite_state: &'a mut CompositeState,
+
+    /// A cache of comparison results to avoid re-computation during invalidation.
+    compare_cache: &'a mut FastHashMap<PrimitiveComparisonKey, PrimitiveCompareResult>,
+
+    /// Information about transform node differences from last frame.
+    spatial_node_comparer: &'a mut SpatialNodeComparer,
+}
+
+// Immutable context passed to picture cache tiles during post_update
+struct TilePostUpdateContext {
+    /// The local clip rect (in picture space) of the entire picture cache
+    local_clip_rect: PictureRect,
+
+    /// The calculated backdrop information for this cache instance.
+    backdrop: Option<BackdropInfo>,
+
+    /// Current size in device pixels of tiles for this cache
+    current_tile_size: DeviceIntSize,
+
+    /// Pre-allocated z-id to assign to tiles during post_update.
+    z_id: ZBufferId,
+}
+
+// Mutable state passed to picture cache tiles during post_update
+struct TilePostUpdateState<'a> {
+    /// Allow access to the texture cache for requesting tiles
+    resource_cache: &'a mut ResourceCache,
+
+    /// Current configuration and setup for compositing all the picture cache tiles in renderer.
+    composite_state: &'a mut CompositeState,
+}
+
+/// Information about the dependencies of a single primitive instance.
+struct PrimitiveDependencyInfo {
+    /// Unique content identifier of the primitive.
+    prim_uid: ItemUid,
+
+    /// The (conservative) clipped area in picture space this primitive occupies.
+    prim_clip_box: PictureBox2D,
+
+    /// Image keys this primitive depends on.
+    images: SmallVec<[ImageDependency; 8]>,
+
+    /// Opacity bindings this primitive depends on.
+    opacity_bindings: SmallVec<[OpacityBinding; 4]>,
+
+    /// Color binding this primitive depends on.
+    color_binding: Option<ColorBinding>,
+
+    /// Clips that this primitive depends on.
+    clips: SmallVec<[ItemUid; 8]>,
+
+    /// Spatial nodes references by the clip dependencies of this primitive.
+    spatial_nodes: SmallVec<[SpatialNodeIndex; 4]>,
+}
+
+impl PrimitiveDependencyInfo {
+    /// Construct dependency info for a new primitive.
+    fn new(
+        prim_uid: ItemUid,
+        prim_clip_box: PictureBox2D,
+    ) -> Self {
+        PrimitiveDependencyInfo {
+            prim_uid,
+            images: SmallVec::new(),
+            opacity_bindings: SmallVec::new(),
+            color_binding: None,
+            prim_clip_box,
+            clips: SmallVec::new(),
+            spatial_nodes: SmallVec::new(),
+        }
+    }
+}
+
+/// A stable ID for a given tile, to help debugging. These are also used
+/// as unique identifiers for tile surfaces when using a native compositor.
+#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Ord, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct TileId(pub usize);
+
+/// Uniquely identifies a tile within a picture cache slice
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, PartialEq, Hash, Eq)]
+pub struct TileKey {
+    // Tile index (x,y)
+    pub tile_offset: TileOffset,
+    // Sub-slice (z)
+    pub sub_slice_index: SubSliceIndex,
+}
+
+/// A descriptor for the kind of texture that a picture cache tile will
+/// be drawn into.
+#[derive(Debug)]
+pub enum SurfaceTextureDescriptor {
+    /// When using the WR compositor, the tile is drawn into an entry
+    /// in the WR texture cache.
+    TextureCache {
+        handle: Option<PictureCacheTextureHandle>,
+    },
+    /// When using an OS compositor, the tile is drawn into a native
+    /// surface identified by arbitrary id.
+    Native {
+        /// The arbitrary id of this tile.
+        id: Option<NativeTileId>,
+    },
+}
+
+/// This is the same as a `SurfaceTextureDescriptor` but has been resolved
+/// into a texture cache handle (if appropriate) that can be used by the
+/// batching and compositing code in the renderer.
+#[derive(Clone, Debug, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum ResolvedSurfaceTexture {
+    TextureCache {
+        /// The texture ID to draw to.
+        texture: TextureSource,
+    },
+    Native {
+        /// The arbitrary id of this tile.
+        id: NativeTileId,
+        /// The size of the tile in device pixels.
+        size: DeviceIntSize,
+    }
+}
+
+impl SurfaceTextureDescriptor {
+    /// Create a resolved surface texture for this descriptor
+    pub fn resolve(
+        &self,
+        resource_cache: &ResourceCache,
+        size: DeviceIntSize,
+    ) -> ResolvedSurfaceTexture {
+        match self {
+            SurfaceTextureDescriptor::TextureCache { handle } => {
+                let texture = resource_cache
+                    .picture_textures
+                    .get_texture_source(handle.as_ref().unwrap());
+
+                ResolvedSurfaceTexture::TextureCache { texture }
+            }
+            SurfaceTextureDescriptor::Native { id } => {
+                ResolvedSurfaceTexture::Native {
+                    id: id.expect("bug: native surface not allocated"),
+                    size,
+                }
+            }
+        }
+    }
+}
+
+/// The backing surface for this tile.
+#[derive(Debug)]
+pub enum TileSurface {
+    Texture {
+        /// Descriptor for the surface that this tile draws into.
+        descriptor: SurfaceTextureDescriptor,
+    },
+    Color {
+        color: ColorF,
+    },
+    Clear,
+}
+
+impl TileSurface {
+    fn kind(&self) -> &'static str {
+        match *self {
+            TileSurface::Color { .. } => "Color",
+            TileSurface::Texture { .. } => "Texture",
+            TileSurface::Clear => "Clear",
+        }
+    }
+}
+
+/// Optional extra information returned by is_same when
+/// logging is enabled.
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum CompareHelperResult<T> {
+    /// Primitives match
+    Equal,
+    /// Counts differ
+    Count {
+        prev_count: u8,
+        curr_count: u8,
+    },
+    /// Sentinel
+    Sentinel,
+    /// Two items are not equal
+    NotEqual {
+        prev: T,
+        curr: T,
+    },
+    /// User callback returned true on item
+    PredicateTrue {
+        curr: T
+    },
+}
+
+/// The result of a primitive dependency comparison. Size is a u8
+/// since this is a hot path in the code, and keeping the data small
+/// is a performance win.
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(u8)]
+pub enum PrimitiveCompareResult {
+    /// Primitives match
+    Equal,
+    /// Something in the PrimitiveDescriptor was different
+    Descriptor,
+    /// The clip node content or spatial node changed
+    Clip,
+    /// The value of the transform changed
+    Transform,
+    /// An image dependency was dirty
+    Image,
+    /// The value of an opacity binding changed
+    OpacityBinding,
+    /// The value of a color binding changed
+    ColorBinding,
+}
+
+/// Debugging information about why a tile was invalidated
+#[derive(Debug,Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum InvalidationReason {
+    /// The background color changed
+    BackgroundColor,
+    /// The opaque state of the backing native surface changed
+    SurfaceOpacityChanged,
+    /// There was no backing texture (evicted or never rendered)
+    NoTexture,
+    /// There was no backing native surface (never rendered, or recreated)
+    NoSurface,
+    /// The primitive count in the dependency list was different
+    PrimCount,
+    /// The content of one of the primitives was different
+    Content,
+    // The compositor type changed
+    CompositorKindChanged,
+    // The valid region of the tile changed
+    ValidRectChanged,
+    // The overall scale of the picture cache changed
+    ScaleChanged,
+    // The content of the sampling surface changed
+    SurfaceContentChanged,
+}
+
+/// Information about a cached tile.
+pub struct Tile {
+    /// The grid position of this tile within the picture cache
+    pub tile_offset: TileOffset,
+    /// The current world rect of this tile.
+    pub world_tile_rect: WorldRect,
+    /// The current local rect of this tile.
+    pub local_tile_rect: PictureRect,
+    /// The picture space dirty rect for this tile.
+    pub local_dirty_rect: PictureRect,
+    /// The device space dirty rect for this tile.
+    /// TODO(gw): We have multiple dirty rects available due to the quadtree above. In future,
+    ///           expose these as multiple dirty rects, which will help in some cases.
+    pub device_dirty_rect: DeviceRect,
+    /// World space rect that contains valid pixels region of this tile.
+    pub world_valid_rect: WorldRect,
+    /// Device space rect that contains valid pixels region of this tile.
+    pub device_valid_rect: DeviceRect,
+    /// Uniquely describes the content of this tile, in a way that can be
+    /// (reasonably) efficiently hashed and compared.
+    pub current_descriptor: TileDescriptor,
+    /// The content descriptor for this tile from the previous frame.
+    pub prev_descriptor: TileDescriptor,
+    /// Handle to the backing surface for this tile.
+    pub surface: Option<TileSurface>,
+    /// If true, this tile is marked valid, and the existing texture
+    /// cache handle can be used. Tiles are invalidated during the
+    /// build_dirty_regions method.
+    pub is_valid: bool,
+    /// If true, this tile intersects with the currently visible screen
+    /// rect, and will be drawn.
+    pub is_visible: bool,
+    /// The tile id is stable between display lists and / or frames,
+    /// if the tile is retained. Useful for debugging tile evictions.
+    pub id: TileId,
+    /// If true, the tile was determined to be opaque, which means blending
+    /// can be disabled when drawing it.
+    pub is_opaque: bool,
+    /// Root node of the quadtree dirty rect tracker.
+    root: TileNode,
+    /// The last rendered background color on this tile.
+    background_color: Option<ColorF>,
+    /// The first reason the tile was invalidated this frame.
+    invalidation_reason: Option<InvalidationReason>,
+    /// The local space valid rect for all primitives that affect this tile.
+    pub local_valid_rect: PictureBox2D,
+    /// z-buffer id for this tile
+    pub z_id: ZBufferId,
+    /// The last frame this tile had its dependencies updated (dependency updating is
+    /// skipped if a tile is off-screen).
+    pub last_updated_frame_id: FrameId,
+
+    pub sub_graphs: Vec<(PictureRect, Vec<(PictureCompositeMode, SurfaceIndex)>)>,
+}
+
+impl Tile {
+    /// Construct a new, invalid tile.
+    fn new(tile_offset: TileOffset) -> Self {
+        let id = TileId(NEXT_TILE_ID.fetch_add(1, Ordering::Relaxed));
+
+        Tile {
+            tile_offset,
+            local_tile_rect: PictureRect::zero(),
+            world_tile_rect: WorldRect::zero(),
+            world_valid_rect: WorldRect::zero(),
+            device_valid_rect: DeviceRect::zero(),
+            local_dirty_rect: PictureRect::zero(),
+            device_dirty_rect: DeviceRect::zero(),
+            surface: None,
+            current_descriptor: TileDescriptor::new(),
+            prev_descriptor: TileDescriptor::new(),
+            is_valid: false,
+            is_visible: false,
+            id,
+            is_opaque: false,
+            root: TileNode::new_leaf(Vec::new()),
+            background_color: None,
+            invalidation_reason: None,
+            local_valid_rect: PictureBox2D::zero(),
+            z_id: ZBufferId::invalid(),
+            last_updated_frame_id: FrameId::INVALID,
+            sub_graphs: Vec::new(),
+        }
+    }
+
+    /// Print debug information about this tile to a tree printer.
+    fn print(&self, pt: &mut dyn PrintTreePrinter) {
+        pt.new_level(format!("Tile {:?}", self.id));
+        pt.add_item(format!("local_tile_rect: {:?}", self.local_tile_rect));
+        pt.add_item(format!("background_color: {:?}", self.background_color));
+        pt.add_item(format!("invalidation_reason: {:?}", self.invalidation_reason));
+        self.current_descriptor.print(pt);
+        pt.end_level();
+    }
+
+    /// Check if the content of the previous and current tile descriptors match
+    fn update_dirty_rects(
+        &mut self,
+        ctx: &TileUpdateDirtyContext,
+        state: &mut TileUpdateDirtyState,
+        invalidation_reason: &mut Option<InvalidationReason>,
+        frame_context: &FrameVisibilityContext,
+    ) -> PictureRect {
+        let mut prim_comparer = PrimitiveComparer::new(
+            &self.prev_descriptor,
+            &self.current_descriptor,
+            state.resource_cache,
+            state.spatial_node_comparer,
+            ctx.opacity_bindings,
+            ctx.color_bindings,
+        );
+
+        let mut dirty_rect = PictureBox2D::zero();
+        self.root.update_dirty_rects(
+            &self.prev_descriptor.prims,
+            &self.current_descriptor.prims,
+            &mut prim_comparer,
+            &mut dirty_rect,
+            state.compare_cache,
+            invalidation_reason,
+            frame_context,
+        );
+
+        dirty_rect
+    }
+
+    /// Invalidate a tile based on change in content. This
+    /// must be called even if the tile is not currently
+    /// visible on screen. We might be able to improve this
+    /// later by changing how ComparableVec is used.
+    fn update_content_validity(
+        &mut self,
+        ctx: &TileUpdateDirtyContext,
+        state: &mut TileUpdateDirtyState,
+        frame_context: &FrameVisibilityContext,
+    ) {
+        // Check if the contents of the primitives, clips, and
+        // other dependencies are the same.
+        state.compare_cache.clear();
+        let mut invalidation_reason = None;
+        let dirty_rect = self.update_dirty_rects(
+            ctx,
+            state,
+            &mut invalidation_reason,
+            frame_context,
+        );
+        if !dirty_rect.is_empty() {
+            self.invalidate(
+                Some(dirty_rect),
+                invalidation_reason.expect("bug: no invalidation_reason"),
+            );
+        }
+        if ctx.invalidate_all {
+            self.invalidate(None, InvalidationReason::ScaleChanged);
+        }
+        // TODO(gw): We can avoid invalidating the whole tile in some cases here,
+        //           but it should be a fairly rare invalidation case.
+        if self.current_descriptor.local_valid_rect != self.prev_descriptor.local_valid_rect {
+            self.invalidate(None, InvalidationReason::ValidRectChanged);
+            state.composite_state.dirty_rects_are_valid = false;
+        }
+    }
+
+    /// Invalidate this tile. If `invalidation_rect` is None, the entire
+    /// tile is invalidated.
+    fn invalidate(
+        &mut self,
+        invalidation_rect: Option<PictureRect>,
+        reason: InvalidationReason,
+    ) {
+        self.is_valid = false;
+
+        match invalidation_rect {
+            Some(rect) => {
+                self.local_dirty_rect = self.local_dirty_rect.union(&rect);
+            }
+            None => {
+                self.local_dirty_rect = self.local_tile_rect;
+            }
+        }
+
+        if self.invalidation_reason.is_none() {
+            self.invalidation_reason = Some(reason);
+        }
+    }
+
+    /// Called during pre_update of a tile cache instance. Allows the
+    /// tile to setup state before primitive dependency calculations.
+    fn pre_update(
+        &mut self,
+        ctx: &TilePreUpdateContext,
+    ) {
+        self.local_tile_rect = PictureRect::from_origin_and_size(
+            PicturePoint::new(
+                self.tile_offset.x as f32 * ctx.tile_size.width,
+                self.tile_offset.y as f32 * ctx.tile_size.height,
+            ),
+            ctx.tile_size,
+        );
+        // TODO(gw): This is a hack / fix for Box2D::union in euclid not working with
+        //           zero sized rect accumulation. Once that lands, we'll revert this
+        //           to be zero.
+        self.local_valid_rect = PictureBox2D::new(
+            PicturePoint::new( 1.0e32,  1.0e32),
+            PicturePoint::new(-1.0e32, -1.0e32),
+        );
+        self.invalidation_reason  = None;
+        self.sub_graphs.clear();
+
+        self.world_tile_rect = ctx.pic_to_world_mapper
+            .map(&self.local_tile_rect)
+            .expect("bug: map local tile rect");
+
+        // Check if this tile is currently on screen.
+        self.is_visible = self.world_tile_rect.intersects(&ctx.global_screen_world_rect);
+
+        // If the tile isn't visible, early exit, skipping the normal set up to
+        // validate dependencies. Instead, we will only compare the current tile
+        // dependencies the next time it comes into view.
+        if !self.is_visible {
+            return;
+        }
+
+        if ctx.background_color != self.background_color {
+            self.invalidate(None, InvalidationReason::BackgroundColor);
+            self.background_color = ctx.background_color;
+        }
+
+        // Clear any dependencies so that when we rebuild them we
+        // can compare if the tile has the same content.
+        mem::swap(
+            &mut self.current_descriptor,
+            &mut self.prev_descriptor,
+        );
+        self.current_descriptor.clear();
+        self.root.clear(self.local_tile_rect);
+
+        // Since this tile is determined to be visible, it will get updated
+        // dependencies, so update the frame id we are storing dependencies for.
+        self.last_updated_frame_id = ctx.frame_id;
+    }
+
+    /// Add dependencies for a given primitive to this tile.
+    fn add_prim_dependency(
+        &mut self,
+        info: &PrimitiveDependencyInfo,
+    ) {
+        // If this tile isn't currently visible, we don't want to update the dependencies
+        // for this tile, as an optimization, since it won't be drawn anyway.
+        if !self.is_visible {
+            return;
+        }
+
+        // Incorporate the bounding rect of the primitive in the local valid rect
+        // for this tile. This is used to minimize the size of the scissor rect
+        // during rasterization and the draw rect during composition of partial tiles.
+        self.local_valid_rect = self.local_valid_rect.union(&info.prim_clip_box);
+
+        // Include any image keys this tile depends on.
+        self.current_descriptor.images.extend_from_slice(&info.images);
+
+        // Include any opacity bindings this primitive depends on.
+        self.current_descriptor.opacity_bindings.extend_from_slice(&info.opacity_bindings);
+
+        // Include any clip nodes that this primitive depends on.
+        self.current_descriptor.clips.extend_from_slice(&info.clips);
+
+        // Include any transforms that this primitive depends on.
+        for spatial_node_index in &info.spatial_nodes {
+            self.current_descriptor.transforms.push(
+                SpatialNodeKey {
+                    spatial_node_index: *spatial_node_index,
+                    frame_id: self.last_updated_frame_id,
+                }
+            );
+        }
+
+        // Include any color bindings this primitive depends on.
+        if info.color_binding.is_some() {
+            self.current_descriptor.color_bindings.insert(
+                self.current_descriptor.color_bindings.len(), info.color_binding.unwrap());
+        }
+
+        // TODO(gw): The prim_clip_rect can be impacted by the clip rect of the display port,
+        //           which can cause invalidations when a new display list with changed
+        //           display port is received. To work around this, clamp the prim clip rect
+        //           to the tile boundaries - if the clip hasn't affected the tile, then the
+        //           changed clip can't affect the content of the primitive on this tile.
+        //           In future, we could consider supplying the display port clip from Gecko
+        //           in a different way (e.g. as a scroll frame clip) which still provides
+        //           the desired clip for checkerboarding, but doesn't require this extra
+        //           work below.
+
+        // TODO(gw): This is a hot part of the code - we could probably optimize further by:
+        //           - Using min/max instead of clamps below (if we guarantee the rects are well formed)
+
+        let tile_p0 = self.local_tile_rect.min;
+        let tile_p1 = self.local_tile_rect.max;
+
+        let prim_clip_box = PictureBox2D::new(
+            PicturePoint::new(
+                clampf(info.prim_clip_box.min.x, tile_p0.x, tile_p1.x),
+                clampf(info.prim_clip_box.min.y, tile_p0.y, tile_p1.y),
+            ),
+            PicturePoint::new(
+                clampf(info.prim_clip_box.max.x, tile_p0.x, tile_p1.x),
+                clampf(info.prim_clip_box.max.y, tile_p0.y, tile_p1.y),
+            ),
+        );
+
+        // Update the tile descriptor, used for tile comparison during scene swaps.
+        let prim_index = PrimitiveDependencyIndex(self.current_descriptor.prims.len() as u32);
+
+        // We know that the casts below will never overflow because the array lengths are
+        // truncated to MAX_PRIM_SUB_DEPS during update_prim_dependencies.
+        debug_assert!(info.spatial_nodes.len() <= MAX_PRIM_SUB_DEPS);
+        debug_assert!(info.clips.len() <= MAX_PRIM_SUB_DEPS);
+        debug_assert!(info.images.len() <= MAX_PRIM_SUB_DEPS);
+        debug_assert!(info.opacity_bindings.len() <= MAX_PRIM_SUB_DEPS);
+
+        self.current_descriptor.prims.push(PrimitiveDescriptor {
+            prim_uid: info.prim_uid,
+            prim_clip_box,
+            transform_dep_count: info.spatial_nodes.len()  as u8,
+            clip_dep_count: info.clips.len() as u8,
+            image_dep_count: info.images.len() as u8,
+            opacity_binding_dep_count: info.opacity_bindings.len() as u8,
+            color_binding_dep_count: if info.color_binding.is_some() { 1 } else { 0 } as u8,
+        });
+
+        // Add this primitive to the dirty rect quadtree.
+        self.root.add_prim(prim_index, &info.prim_clip_box);
+    }
+
+    /// Called during tile cache instance post_update. Allows invalidation and dirty
+    /// rect calculation after primitive dependencies have been updated.
+    fn update_dirty_and_valid_rects(
+        &mut self,
+        ctx: &TileUpdateDirtyContext,
+        state: &mut TileUpdateDirtyState,
+        frame_context: &FrameVisibilityContext,
+    ) {
+        // Register the frame id of this tile with the spatial node comparer, to ensure
+        // that it doesn't GC any spatial nodes from the comparer that are referenced
+        // by this tile. Must be done before we early exit below, so that we retain
+        // spatial node info even for tiles that are currently not visible.
+        state.spatial_node_comparer.retain_for_frame(self.last_updated_frame_id);
+
+        // If tile is not visible, just early out from here - we don't update dependencies
+        // so don't want to invalidate, merge, split etc. The tile won't need to be drawn
+        // (and thus updated / invalidated) until it is on screen again.
+        if !self.is_visible {
+            return;
+        }
+
+        // Calculate the overall valid rect for this tile.
+        self.current_descriptor.local_valid_rect = self.local_valid_rect;
+
+        // TODO(gw): In theory, the local tile rect should always have an
+        //           intersection with the overall picture rect. In practice,
+        //           due to some accuracy issues with how fract_offset (and
+        //           fp accuracy) are used in the calling method, this isn't
+        //           always true. In this case, it's safe to set the local
+        //           valid rect to zero, which means it will be clipped out
+        //           and not affect the scene. In future, we should fix the
+        //           accuracy issue above, so that this assumption holds, but
+        //           it shouldn't have any noticeable effect on performance
+        //           or memory usage (textures should never get allocated).
+        self.current_descriptor.local_valid_rect = self.local_tile_rect
+            .intersection(&ctx.local_rect)
+            .and_then(|r| r.intersection(&self.current_descriptor.local_valid_rect))
+            .unwrap_or_else(PictureRect::zero);
+
+        // The device_valid_rect is referenced during `update_content_validity` so it
+        // must be updated here first.
+        self.world_valid_rect = ctx.pic_to_world_mapper
+            .map(&self.current_descriptor.local_valid_rect)
+            .expect("bug: map local valid rect");
+
+        // The device rect is guaranteed to be aligned on a device pixel - the round
+        // is just to deal with float accuracy. However, the valid rect is not
+        // always aligned to a device pixel. To handle this, round out to get all
+        // required pixels, and intersect with the tile device rect.
+        let device_rect = (self.world_tile_rect * ctx.global_device_pixel_scale).round();
+        self.device_valid_rect = (self.world_valid_rect * ctx.global_device_pixel_scale)
+            .round_out()
+            .intersection(&device_rect)
+            .unwrap_or_else(DeviceRect::zero);
+
+        // Invalidate the tile based on the content changing.
+        self.update_content_validity(ctx, state, frame_context);
+    }
+
+    /// Called during tile cache instance post_update. Allows invalidation and dirty
+    /// rect calculation after primitive dependencies have been updated.
+    fn post_update(
+        &mut self,
+        ctx: &TilePostUpdateContext,
+        state: &mut TilePostUpdateState,
+        frame_context: &FrameVisibilityContext,
+    ) {
+        // If tile is not visible, just early out from here - we don't update dependencies
+        // so don't want to invalidate, merge, split etc. The tile won't need to be drawn
+        // (and thus updated / invalidated) until it is on screen again.
+        if !self.is_visible {
+            return;
+        }
+
+        // If there are no primitives there is no need to draw or cache it.
+        // Bug 1719232 - The final device valid rect does not always describe a non-empty
+        // region. Cull the tile as a workaround.
+        if self.current_descriptor.prims.is_empty() || self.device_valid_rect.is_empty() {
+            // If there is a native compositor surface allocated for this (now empty) tile
+            // it must be freed here, otherwise the stale tile with previous contents will
+            // be composited. If the tile subsequently gets new primitives added to it, the
+            // surface will be re-allocated when it's added to the composite draw list.
+            if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { mut id, .. }, .. }) = self.surface.take() {
+                if let Some(id) = id.take() {
+                    state.resource_cache.destroy_compositor_tile(id);
+                }
+            }
+
+            self.is_visible = false;
+            return;
+        }
+
+        // Check if this tile can be considered opaque. Opacity state must be updated only
+        // after all early out checks have been performed. Otherwise, we might miss updating
+        // the native surface next time this tile becomes visible.
+        let clipped_rect = self.current_descriptor.local_valid_rect
+            .intersection(&ctx.local_clip_rect)
+            .unwrap_or_else(PictureRect::zero);
+
+        let has_opaque_bg_color = self.background_color.map_or(false, |c| c.a >= 1.0);
+        let has_opaque_backdrop = ctx.backdrop.map_or(false, |b| b.opaque_rect.contains_box(&clipped_rect));
+        let is_opaque = has_opaque_bg_color || has_opaque_backdrop;
+
+        // Set the correct z_id for this tile
+        self.z_id = ctx.z_id;
+
+        if is_opaque != self.is_opaque {
+            // If opacity changed, the native compositor surface and all tiles get invalidated.
+            // (this does nothing if not using native compositor mode).
+            // TODO(gw): This property probably changes very rarely, so it is OK to invalidate
+            //           everything in this case. If it turns out that this isn't true, we could
+            //           consider other options, such as per-tile opacity (natively supported
+            //           on CoreAnimation, and supported if backed by non-virtual surfaces in
+            //           DirectComposition).
+            if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { ref mut id, .. }, .. }) = self.surface {
+                if let Some(id) = id.take() {
+                    state.resource_cache.destroy_compositor_tile(id);
+                }
+            }
+
+            // Invalidate the entire tile to force a redraw.
+            self.invalidate(None, InvalidationReason::SurfaceOpacityChanged);
+            self.is_opaque = is_opaque;
+        }
+
+        // Check if the selected composite mode supports dirty rect updates. For Draw composite
+        // mode, we can always update the content with smaller dirty rects, unless there is a
+        // driver bug to workaround. For native composite mode, we can only use dirty rects if
+        // the compositor supports partial surface updates.
+        let (supports_dirty_rects, supports_simple_prims) = match state.composite_state.compositor_kind {
+            CompositorKind::Draw { .. } => {
+                (frame_context.config.gpu_supports_render_target_partial_update, true)
+            }
+            CompositorKind::Native { capabilities, .. } => {
+                (capabilities.max_update_rects > 0, false)
+            }
+        };
+
+        // TODO(gw): Consider using smaller tiles and/or tile splits for
+        //           native compositors that don't support dirty rects.
+        if supports_dirty_rects {
+            // Only allow splitting for normal content sized tiles
+            if ctx.current_tile_size == state.resource_cache.picture_textures.default_tile_size() {
+                let max_split_level = 3;
+
+                // Consider splitting / merging dirty regions
+                self.root.maybe_merge_or_split(
+                    0,
+                    &self.current_descriptor.prims,
+                    max_split_level,
+                );
+            }
+        }
+
+        // The dirty rect will be set correctly by now. If the underlying platform
+        // doesn't support partial updates, and this tile isn't valid, force the dirty
+        // rect to be the size of the entire tile.
+        if !self.is_valid && !supports_dirty_rects {
+            self.local_dirty_rect = self.local_tile_rect;
+        }
+
+        // See if this tile is a simple color, in which case we can just draw
+        // it as a rect, and avoid allocating a texture surface and drawing it.
+        // TODO(gw): Initial native compositor interface doesn't support simple
+        //           color tiles. We can definitely support this in DC, so this
+        //           should be added as a follow up.
+        let is_simple_prim =
+            ctx.backdrop.map_or(false, |b| b.kind.is_some()) &&
+            self.current_descriptor.prims.len() == 1 &&
+            self.is_opaque &&
+            supports_simple_prims;
+
+        // Set up the backing surface for this tile.
+        let surface = if is_simple_prim {
+            // If we determine the tile can be represented by a color, set the
+            // surface unconditionally (this will drop any previously used
+            // texture cache backing surface).
+            match ctx.backdrop.unwrap().kind {
+                Some(BackdropKind::Color { color }) => {
+                    TileSurface::Color {
+                        color,
+                    }
+                }
+                Some(BackdropKind::Clear) => {
+                    TileSurface::Clear
+                }
+                None => {
+                    // This should be prevented by the is_simple_prim check above.
+                    unreachable!();
+                }
+            }
+        } else {
+            // If this tile will be backed by a surface, we want to retain
+            // the texture handle from the previous frame, if possible. If
+            // the tile was previously a color, or not set, then just set
+            // up a new texture cache handle.
+            match self.surface.take() {
+                Some(TileSurface::Texture { descriptor }) => {
+                    // Reuse the existing descriptor and vis mask
+                    TileSurface::Texture {
+                        descriptor,
+                    }
+                }
+                Some(TileSurface::Color { .. }) | Some(TileSurface::Clear) | None => {
+                    // This is the case where we are constructing a tile surface that
+                    // involves drawing to a texture. Create the correct surface
+                    // descriptor depending on the compositing mode that will read
+                    // the output.
+                    let descriptor = match state.composite_state.compositor_kind {
+                        CompositorKind::Draw { .. } => {
+                            // For a texture cache entry, create an invalid handle that
+                            // will be allocated when update_picture_cache is called.
+                            SurfaceTextureDescriptor::TextureCache {
+                                handle: None,
+                            }
+                        }
+                        CompositorKind::Native { .. } => {
+                            // Create a native surface surface descriptor, but don't allocate
+                            // a surface yet. The surface is allocated *after* occlusion
+                            // culling occurs, so that only visible tiles allocate GPU memory.
+                            SurfaceTextureDescriptor::Native {
+                                id: None,
+                            }
+                        }
+                    };
+
+                    TileSurface::Texture {
+                        descriptor,
+                    }
+                }
+            }
+        };
+
+        // Store the current surface backing info for use during batching.
+        self.surface = Some(surface);
+    }
+}
+
+/// Defines a key that uniquely identifies a primitive instance.
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveDescriptor {
+    /// Uniquely identifies the content of the primitive template.
+    pub prim_uid: ItemUid,
+    /// The clip rect for this primitive. Included here in
+    /// dependencies since there is no entry in the clip chain
+    /// dependencies for the local clip rect.
+    pub prim_clip_box: PictureBox2D,
+    /// The number of extra dependencies that this primitive has.
+    transform_dep_count: u8,
+    image_dep_count: u8,
+    opacity_binding_dep_count: u8,
+    clip_dep_count: u8,
+    color_binding_dep_count: u8,
+}
+
+impl PartialEq for PrimitiveDescriptor {
+    fn eq(&self, other: &Self) -> bool {
+        const EPSILON: f32 = 0.001;
+
+        if self.prim_uid != other.prim_uid {
+            return false;
+        }
+
+        if !self.prim_clip_box.min.x.approx_eq_eps(&other.prim_clip_box.min.x, &EPSILON) {
+            return false;
+        }
+        if !self.prim_clip_box.min.y.approx_eq_eps(&other.prim_clip_box.min.y, &EPSILON) {
+            return false;
+        }
+        if !self.prim_clip_box.max.x.approx_eq_eps(&other.prim_clip_box.max.x, &EPSILON) {
+            return false;
+        }
+        if !self.prim_clip_box.max.y.approx_eq_eps(&other.prim_clip_box.max.y, &EPSILON) {
+            return false;
+        }
+
+        true
+    }
+}
+
+/// A small helper to compare two arrays of primitive dependencies.
+struct CompareHelper<'a, T> where T: Copy {
+    offset_curr: usize,
+    offset_prev: usize,
+    curr_items: &'a [T],
+    prev_items: &'a [T],
+}
+
+impl<'a, T> CompareHelper<'a, T> where T: Copy + PartialEq {
+    /// Construct a new compare helper for a current / previous set of dependency information.
+    fn new(
+        prev_items: &'a [T],
+        curr_items: &'a [T],
+    ) -> Self {
+        CompareHelper {
+            offset_curr: 0,
+            offset_prev: 0,
+            curr_items,
+            prev_items,
+        }
+    }
+
+    /// Reset the current position in the dependency array to the start
+    fn reset(&mut self) {
+        self.offset_prev = 0;
+        self.offset_curr = 0;
+    }
+
+    /// Test if two sections of the dependency arrays are the same, by checking both
+    /// item equality, and a user closure to see if the content of the item changed.
+    fn is_same<F>(
+        &self,
+        prev_count: u8,
+        curr_count: u8,
+        mut f: F,
+    ) -> bool where F: FnMut(&T, &T) -> bool {
+        // If the number of items is different, trivial reject.
+        if prev_count != curr_count {
+            return false;
+        }
+        // If both counts are 0, then no need to check these dependencies.
+        if curr_count == 0 {
+            return true;
+        }
+        // If both counts are u8::MAX, this is a sentinel that we can't compare these
+        // deps, so just trivial reject.
+        if curr_count as usize == MAX_PRIM_SUB_DEPS {
+            return false;
+        }
+
+        let end_prev = self.offset_prev + prev_count as usize;
+        let end_curr = self.offset_curr + curr_count as usize;
+
+        let curr_items = &self.curr_items[self.offset_curr .. end_curr];
+        let prev_items = &self.prev_items[self.offset_prev .. end_prev];
+
+        for (curr, prev) in curr_items.iter().zip(prev_items.iter()) {
+            if !f(prev, curr) {
+                return false;
+            }
+        }
+
+        true
+    }
+
+    // Advance the prev dependency array by a given amount
+    fn advance_prev(&mut self, count: u8) {
+        self.offset_prev += count as usize;
+    }
+
+    // Advance the current dependency array by a given amount
+    fn advance_curr(&mut self, count: u8) {
+        self.offset_curr  += count as usize;
+    }
+}
+
+/// Uniquely describes the content of this tile, in a way that can be
+/// (reasonably) efficiently hashed and compared.
+#[cfg_attr(any(feature="capture",feature="replay"), derive(Clone))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct TileDescriptor {
+    /// List of primitive instance unique identifiers. The uid is guaranteed
+    /// to uniquely describe the content of the primitive template, while
+    /// the other parameters describe the clip chain and instance params.
+    pub prims: Vec<PrimitiveDescriptor>,
+
+    /// List of clip node descriptors.
+    clips: Vec<ItemUid>,
+
+    /// List of image keys that this tile depends on.
+    images: Vec<ImageDependency>,
+
+    /// The set of opacity bindings that this tile depends on.
+    // TODO(gw): Ugh, get rid of all opacity binding support!
+    opacity_bindings: Vec<OpacityBinding>,
+
+    /// List of the effects of transforms that we care about
+    /// tracking for this tile.
+    transforms: Vec<SpatialNodeKey>,
+
+    /// Picture space rect that contains valid pixels region of this tile.
+    pub local_valid_rect: PictureRect,
+
+    /// List of the effects of color that we care about
+    /// tracking for this tile.
+    color_bindings: Vec<ColorBinding>,
+}
+
+impl TileDescriptor {
+    fn new() -> Self {
+        TileDescriptor {
+            prims: Vec::new(),
+            clips: Vec::new(),
+            opacity_bindings: Vec::new(),
+            images: Vec::new(),
+            transforms: Vec::new(),
+            local_valid_rect: PictureRect::zero(),
+            color_bindings: Vec::new(),
+        }
+    }
+
+    /// Print debug information about this tile descriptor to a tree printer.
+    fn print(&self, pt: &mut dyn PrintTreePrinter) {
+        pt.new_level("current_descriptor".to_string());
+
+        pt.new_level("prims".to_string());
+        for prim in &self.prims {
+            pt.new_level(format!("prim uid={}", prim.prim_uid.get_uid()));
+            pt.add_item(format!("clip: p0={},{} p1={},{}",
+                prim.prim_clip_box.min.x,
+                prim.prim_clip_box.min.y,
+                prim.prim_clip_box.max.x,
+                prim.prim_clip_box.max.y,
+            ));
+            pt.add_item(format!("deps: t={} i={} o={} c={} color={}",
+                prim.transform_dep_count,
+                prim.image_dep_count,
+                prim.opacity_binding_dep_count,
+                prim.clip_dep_count,
+                prim.color_binding_dep_count,
+            ));
+            pt.end_level();
+        }
+        pt.end_level();
+
+        if !self.clips.is_empty() {
+            pt.new_level("clips".to_string());
+            for clip in &self.clips {
+                pt.new_level(format!("clip uid={}", clip.get_uid()));
+                pt.end_level();
+            }
+            pt.end_level();
+        }
+
+        if !self.images.is_empty() {
+            pt.new_level("images".to_string());
+            for info in &self.images {
+                pt.new_level(format!("key={:?}", info.key));
+                pt.add_item(format!("generation={:?}", info.generation));
+                pt.end_level();
+            }
+            pt.end_level();
+        }
+
+        if !self.opacity_bindings.is_empty() {
+            pt.new_level("opacity_bindings".to_string());
+            for opacity_binding in &self.opacity_bindings {
+                pt.new_level(format!("binding={:?}", opacity_binding));
+                pt.end_level();
+            }
+            pt.end_level();
+        }
+
+        if !self.transforms.is_empty() {
+            pt.new_level("transforms".to_string());
+            for transform in &self.transforms {
+                pt.new_level(format!("spatial_node={:?}", transform));
+                pt.end_level();
+            }
+            pt.end_level();
+        }
+
+        if !self.color_bindings.is_empty() {
+            pt.new_level("color_bindings".to_string());
+            for color_binding in &self.color_bindings {
+                pt.new_level(format!("binding={:?}", color_binding));
+                pt.end_level();
+            }
+            pt.end_level();
+        }
+
+        pt.end_level();
+    }
+
+    /// Clear the dependency information for a tile, when the dependencies
+    /// are being rebuilt.
+    fn clear(&mut self) {
+        self.prims.clear();
+        self.clips.clear();
+        self.opacity_bindings.clear();
+        self.images.clear();
+        self.transforms.clear();
+        self.local_valid_rect = PictureRect::zero();
+        self.color_bindings.clear();
+    }
+}
+
+/// Represents the dirty region of a tile cache picture.
+#[derive(Clone)]
+pub struct DirtyRegion {
+    /// The overall dirty rect, a combination of dirty_rects
+    pub combined: WorldRect,
+
+    /// Spatial node of the picture cache this region represents
+    spatial_node_index: SpatialNodeIndex,
+}
+
+impl DirtyRegion {
+    /// Construct a new dirty region tracker.
+    pub fn new(
+        spatial_node_index: SpatialNodeIndex,
+    ) -> Self {
+        DirtyRegion {
+            combined: WorldRect::zero(),
+            spatial_node_index,
+        }
+    }
+
+    /// Reset the dirty regions back to empty
+    pub fn reset(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+    ) {
+        self.combined = WorldRect::zero();
+        self.spatial_node_index = spatial_node_index;
+    }
+
+    /// Add a dirty region to the tracker. Returns the visibility mask that corresponds to
+    /// this region in the tracker.
+    pub fn add_dirty_region(
+        &mut self,
+        rect_in_pic_space: PictureRect,
+        spatial_tree: &SpatialTree,
+    ) {
+        let map_pic_to_world = SpaceMapper::new_with_target(
+            spatial_tree.root_reference_frame_index(),
+            self.spatial_node_index,
+            WorldRect::max_rect(),
+            spatial_tree,
+        );
+
+        let world_rect = map_pic_to_world
+            .map(&rect_in_pic_space)
+            .expect("bug");
+
+        // Include this in the overall dirty rect
+        self.combined = self.combined.union(&world_rect);
+    }
+}
+
+// TODO(gw): Tidy this up by:
+//      - Rename Clear variant to something more appropriate to what it does
+//      - Add an Other variant for things like opaque gradient backdrops
+#[derive(Debug, Copy, Clone)]
+pub enum BackdropKind {
+    Color {
+        color: ColorF,
+    },
+    Clear,
+}
+
+/// Stores information about the calculated opaque backdrop of this slice.
+#[derive(Debug, Copy, Clone)]
+pub struct BackdropInfo {
+    /// The picture space rectangle that is known to be opaque. This is used
+    /// to determine where subpixel AA can be used, and where alpha blending
+    /// can be disabled.
+    pub opaque_rect: PictureRect,
+    /// If the backdrop covers the entire slice with an opaque color, this
+    /// will be set and can be used as a clear color for the slice's tiles.
+    pub spanning_opaque_color: Option<ColorF>,
+    /// Kind of the backdrop
+    pub kind: Option<BackdropKind>,
+    /// The picture space rectangle of the backdrop, if kind is set.
+    pub backdrop_rect: PictureRect,
+}
+
+impl BackdropInfo {
+    fn empty() -> Self {
+        BackdropInfo {
+            opaque_rect: PictureRect::zero(),
+            spanning_opaque_color: None,
+            kind: None,
+            backdrop_rect: PictureRect::zero(),
+        }
+    }
+}
+
+/// Represents the native surfaces created for a picture cache, if using
+/// a native compositor. An opaque and alpha surface is always created,
+/// but tiles are added to a surface based on current opacity. If the
+/// calculated opacity of a tile changes, the tile is invalidated and
+/// attached to a different native surface. This means that we don't
+/// need to invalidate the entire surface if only some tiles are changing
+/// opacity. It also means we can take advantage of opaque tiles on cache
+/// slices where only some of the tiles are opaque. There is an assumption
+/// that creating a native surface is cheap, and only when a tile is added
+/// to a surface is there a significant cost. This assumption holds true
+/// for the current native compositor implementations on Windows and Mac.
+pub struct NativeSurface {
+    /// Native surface for opaque tiles
+    pub opaque: NativeSurfaceId,
+    /// Native surface for alpha tiles
+    pub alpha: NativeSurfaceId,
+}
+
+/// Hash key for an external native compositor surface
+#[derive(PartialEq, Eq, Hash)]
+pub struct ExternalNativeSurfaceKey {
+    /// The YUV/RGB image keys that are used to draw this surface.
+    pub image_keys: [ImageKey; 3],
+    /// The current device size of the surface.
+    pub size: DeviceIntSize,
+    /// True if this is an 'external' compositor surface created via
+    /// Compositor::create_external_surface.
+    pub is_external_surface: bool,
+}
+
+/// Information about a native compositor surface cached between frames.
+pub struct ExternalNativeSurface {
+    /// If true, the surface was used this frame. Used for a simple form
+    /// of GC to remove old surfaces.
+    pub used_this_frame: bool,
+    /// The native compositor surface handle
+    pub native_surface_id: NativeSurfaceId,
+    /// List of image keys, and current image generations, that are drawn in this surface.
+    /// The image generations are used to check if the compositor surface is dirty and
+    /// needs to be updated.
+    pub image_dependencies: [ImageDependency; 3],
+}
+
+/// The key that identifies a tile cache instance. For now, it's simple the index of
+/// the slice as it was created during scene building.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SliceId(usize);
+
+impl SliceId {
+    pub fn new(index: usize) -> Self {
+        SliceId(index)
+    }
+}
+
+/// Information that is required to reuse or create a new tile cache. Created
+/// during scene building and passed to the render backend / frame builder.
+pub struct TileCacheParams {
+    // Index of the slice (also effectively the key of the tile cache, though we use SliceId where that matters)
+    pub slice: usize,
+    // Flags describing content of this cache (e.g. scrollbars)
+    pub slice_flags: SliceFlags,
+    // The anchoring spatial node / scroll root
+    pub spatial_node_index: SpatialNodeIndex,
+    // Optional background color of this tilecache. If present, can be used as an optimization
+    // to enable opaque blending and/or subpixel AA in more places.
+    pub background_color: Option<ColorF>,
+    // Node in the clip-tree that defines where we exclude clips from child prims
+    pub shared_clip_node_id: ClipNodeId,
+    // Clip leaf that is used to build the clip-chain for this tile cache.
+    pub shared_clip_leaf_id: Option<ClipLeafId>,
+    // Virtual surface sizes are always square, so this represents both the width and height
+    pub virtual_surface_size: i32,
+    // The number of compositor surfaces that are being requested for this tile cache.
+    // This is only a suggestion - the tile cache will clamp this as a reasonable number
+    // and only promote a limited number of surfaces.
+    pub compositor_surface_count: usize,
+}
+
+/// Defines which sub-slice (effectively a z-index) a primitive exists on within
+/// a picture cache instance.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub struct SubSliceIndex(u8);
+
+impl SubSliceIndex {
+    pub const DEFAULT: SubSliceIndex = SubSliceIndex(0);
+
+    pub fn new(index: usize) -> Self {
+        SubSliceIndex(index as u8)
+    }
+
+    /// Return true if this sub-slice is the primary sub-slice (for now, we assume
+    /// that only the primary sub-slice may be opaque and support subpixel AA, for example).
+    pub fn is_primary(&self) -> bool {
+        self.0 == 0
+    }
+}
+
+/// Wrapper struct around an external surface descriptor with a little more information
+/// that the picture caching code needs.
+pub struct CompositorSurface {
+    // External surface descriptor used by compositing logic
+    pub descriptor: ExternalSurfaceDescriptor,
+    // The compositor surface rect + any intersecting prims. Later prims that intersect
+    // with this must be added to the next sub-slice.
+    prohibited_rect: PictureRect,
+    // If the compositor surface content is opaque.
+    pub is_opaque: bool,
+}
+
+/// A SubSlice represents a potentially overlapping set of tiles within a picture cache. Most
+/// picture cache instances will have only a single sub-slice. The exception to this is when
+/// a picture cache has compositor surfaces, in which case sub slices are used to interleave
+/// content under or order the compositor surface(s).
+pub struct SubSlice {
+    /// Hash of tiles present in this picture.
+    pub tiles: FastHashMap<TileOffset, Box<Tile>>,
+    /// The allocated compositor surfaces for this picture cache. May be None if
+    /// not using native compositor, or if the surface was destroyed and needs
+    /// to be reallocated next time this surface contains valid tiles.
+    pub native_surface: Option<NativeSurface>,
+    /// List of compositor surfaces that have been promoted from primitives
+    /// in this tile cache.
+    pub compositor_surfaces: Vec<CompositorSurface>,
+    /// List of visible tiles to be composited for this subslice
+    pub composite_tiles: Vec<CompositeTile>,
+    /// Compositor descriptors of visible, opaque tiles (used by composite_state.push_surface)
+    pub opaque_tile_descriptors: Vec<CompositeTileDescriptor>,
+    /// Compositor descriptors of visible, alpha tiles (used by composite_state.push_surface)
+    pub alpha_tile_descriptors: Vec<CompositeTileDescriptor>,
+}
+
+impl SubSlice {
+    /// Construct a new sub-slice
+    fn new() -> Self {
+        SubSlice {
+            tiles: FastHashMap::default(),
+            native_surface: None,
+            compositor_surfaces: Vec::new(),
+            composite_tiles: Vec::new(),
+            opaque_tile_descriptors: Vec::new(),
+            alpha_tile_descriptors: Vec::new(),
+        }
+    }
+
+    /// Reset the list of compositor surfaces that follow this sub-slice.
+    /// Built per-frame, since APZ may change whether an image is suitable to be a compositor surface.
+    fn reset(&mut self) {
+        self.compositor_surfaces.clear();
+        self.composite_tiles.clear();
+        self.opaque_tile_descriptors.clear();
+        self.alpha_tile_descriptors.clear();
+    }
+
+    /// Resize the tile grid to match a new tile bounds
+    fn resize(&mut self, new_tile_rect: TileRect) -> FastHashMap<TileOffset, Box<Tile>> {
+        let mut old_tiles = mem::replace(&mut self.tiles, FastHashMap::default());
+        self.tiles.reserve(new_tile_rect.area() as usize);
+
+        for y in new_tile_rect.min.y .. new_tile_rect.max.y {
+            for x in new_tile_rect.min.x .. new_tile_rect.max.x {
+                let key = TileOffset::new(x, y);
+                let tile = old_tiles
+                    .remove(&key)
+                    .unwrap_or_else(|| {
+                        Box::new(Tile::new(key))
+                    });
+                self.tiles.insert(key, tile);
+            }
+        }
+
+        old_tiles
+    }
+}
+
+pub struct BackdropSurface {
+    pub id: NativeSurfaceId,
+    color: ColorF,
+    pub device_rect: DeviceRect,
+}
+
+/// Represents a cache of tiles that make up a picture primitives.
+pub struct TileCacheInstance {
+    /// Index of the tile cache / slice for this frame builder. It's determined
+    /// by the setup_picture_caching method during flattening, which splits the
+    /// picture tree into multiple slices. It's used as a simple input to the tile
+    /// keys. It does mean we invalidate tiles if a new layer gets inserted / removed
+    /// between display lists - this seems very unlikely to occur on most pages, but
+    /// can be revisited if we ever notice that.
+    pub slice: usize,
+    /// Propagated information about the slice
+    pub slice_flags: SliceFlags,
+    /// The currently selected tile size to use for this cache
+    pub current_tile_size: DeviceIntSize,
+    /// The list of sub-slices in this tile cache
+    pub sub_slices: Vec<SubSlice>,
+    /// The positioning node for this tile cache.
+    pub spatial_node_index: SpatialNodeIndex,
+    /// List of opacity bindings, with some extra information
+    /// about whether they changed since last frame.
+    opacity_bindings: FastHashMap<PropertyBindingId, OpacityBindingInfo>,
+    /// Switch back and forth between old and new bindings hashmaps to avoid re-allocating.
+    old_opacity_bindings: FastHashMap<PropertyBindingId, OpacityBindingInfo>,
+    /// A helper to compare transforms between previous and current frame.
+    spatial_node_comparer: SpatialNodeComparer,
+    /// List of color bindings, with some extra information
+    /// about whether they changed since last frame.
+    color_bindings: FastHashMap<PropertyBindingId, ColorBindingInfo>,
+    /// Switch back and forth between old and new bindings hashmaps to avoid re-allocating.
+    old_color_bindings: FastHashMap<PropertyBindingId, ColorBindingInfo>,
+    /// The current dirty region tracker for this picture.
+    pub dirty_region: DirtyRegion,
+    /// Current size of tiles in picture units.
+    tile_size: PictureSize,
+    /// Tile coords of the currently allocated grid.
+    tile_rect: TileRect,
+    /// Pre-calculated versions of the tile_rect above, used to speed up the
+    /// calculations in get_tile_coords_for_rect.
+    tile_bounds_p0: TileOffset,
+    tile_bounds_p1: TileOffset,
+    /// Local rect (unclipped) of the picture this cache covers.
+    pub local_rect: PictureRect,
+    /// The local clip rect, from the shared clips of this picture.
+    pub local_clip_rect: PictureRect,
+    /// The screen rect, transformed to local picture space.
+    pub screen_rect_in_pic_space: PictureRect,
+    /// The surface index that this tile cache will be drawn into.
+    surface_index: SurfaceIndex,
+    /// The background color from the renderer. If this is set opaque, we know it's
+    /// fine to clear the tiles to this and allow subpixel text on the first slice.
+    pub background_color: Option<ColorF>,
+    /// Information about the calculated backdrop content of this cache.
+    pub backdrop: BackdropInfo,
+    /// The allowed subpixel mode for this surface, which depends on the detected
+    /// opacity of the background.
+    pub subpixel_mode: SubpixelMode,
+    // Node in the clip-tree that defines where we exclude clips from child prims
+    pub shared_clip_node_id: ClipNodeId,
+    // Clip leaf that is used to build the clip-chain for this tile cache.
+    pub shared_clip_leaf_id: Option<ClipLeafId>,
+    /// The number of frames until this cache next evaluates what tile size to use.
+    /// If a picture rect size is regularly changing just around a size threshold,
+    /// we don't want to constantly invalidate and reallocate different tile size
+    /// configuration each frame.
+    frames_until_size_eval: usize,
+    /// For DirectComposition, virtual surfaces don't support negative coordinates. However,
+    /// picture cache tile coordinates can be negative. To handle this, we apply an offset
+    /// to each tile in DirectComposition. We want to change this as little as possible,
+    /// to avoid invalidating tiles. However, if we have a picture cache tile coordinate
+    /// which is outside the virtual surface bounds, we must change this to allow
+    /// correct remapping of the coordinates passed to BeginDraw in DC.
+    virtual_offset: DeviceIntPoint,
+    /// keep around the hash map used as compare_cache to avoid reallocating it each
+    /// frame.
+    compare_cache: FastHashMap<PrimitiveComparisonKey, PrimitiveCompareResult>,
+    /// The currently considered tile size override. Used to check if we should
+    /// re-evaluate tile size, even if the frame timer hasn't expired.
+    tile_size_override: Option<DeviceIntSize>,
+    /// A cache of compositor surfaces that are retained between frames
+    pub external_native_surface_cache: FastHashMap<ExternalNativeSurfaceKey, ExternalNativeSurface>,
+    /// Current frame ID of this tile cache instance. Used for book-keeping / garbage collecting
+    frame_id: FrameId,
+    /// Registered transform in CompositeState for this picture cache
+    pub transform_index: CompositorTransformIndex,
+    /// Current transform mapping local picture space to compositor surface space
+    local_to_surface: ScaleOffset,
+    /// If true, we need to invalidate all tiles during `post_update`
+    invalidate_all_tiles: bool,
+    /// Current transform mapping compositor surface space to final device space
+    surface_to_device: ScaleOffset,
+    /// The current raster scale for tiles in this cache
+    current_raster_scale: f32,
+    /// Depth of off-screen surfaces that are currently pushed during dependency updates
+    current_surface_traversal_depth: usize,
+    /// A list of extra dirty invalidation tests that can only be checked once we
+    /// know the dirty rect of all tiles
+    deferred_dirty_tests: Vec<DeferredDirtyTest>,
+    /// Is there a backdrop associated with this cache
+    found_prims_after_backdrop: bool,
+    pub backdrop_surface: Option<BackdropSurface>,
+}
+
+enum SurfacePromotionResult {
+    Failed,
+    Success,
+}
+
+impl TileCacheInstance {
+    pub fn new(params: TileCacheParams) -> Self {
+        // Determine how many sub-slices we need. Clamp to an arbitrary limit to ensure
+        // we don't create a huge number of OS compositor tiles and sub-slices.
+        let sub_slice_count = params.compositor_surface_count.min(MAX_COMPOSITOR_SURFACES) + 1;
+
+        let mut sub_slices = Vec::with_capacity(sub_slice_count);
+        for _ in 0 .. sub_slice_count {
+            sub_slices.push(SubSlice::new());
+        }
+
+        TileCacheInstance {
+            slice: params.slice,
+            slice_flags: params.slice_flags,
+            spatial_node_index: params.spatial_node_index,
+            sub_slices,
+            opacity_bindings: FastHashMap::default(),
+            old_opacity_bindings: FastHashMap::default(),
+            spatial_node_comparer: SpatialNodeComparer::new(),
+            color_bindings: FastHashMap::default(),
+            old_color_bindings: FastHashMap::default(),
+            dirty_region: DirtyRegion::new(params.spatial_node_index),
+            tile_size: PictureSize::zero(),
+            tile_rect: TileRect::zero(),
+            tile_bounds_p0: TileOffset::zero(),
+            tile_bounds_p1: TileOffset::zero(),
+            local_rect: PictureRect::zero(),
+            local_clip_rect: PictureRect::zero(),
+            screen_rect_in_pic_space: PictureRect::zero(),
+            surface_index: SurfaceIndex(0),
+            background_color: params.background_color,
+            backdrop: BackdropInfo::empty(),
+            subpixel_mode: SubpixelMode::Allow,
+            shared_clip_node_id: params.shared_clip_node_id,
+            shared_clip_leaf_id: params.shared_clip_leaf_id,
+            current_tile_size: DeviceIntSize::zero(),
+            frames_until_size_eval: 0,
+            // Default to centering the virtual offset in the middle of the DC virtual surface
+            virtual_offset: DeviceIntPoint::new(
+                params.virtual_surface_size / 2,
+                params.virtual_surface_size / 2,
+            ),
+            compare_cache: FastHashMap::default(),
+            tile_size_override: None,
+            external_native_surface_cache: FastHashMap::default(),
+            frame_id: FrameId::INVALID,
+            transform_index: CompositorTransformIndex::INVALID,
+            surface_to_device: ScaleOffset::identity(),
+            local_to_surface: ScaleOffset::identity(),
+            invalidate_all_tiles: true,
+            current_raster_scale: 1.0,
+            current_surface_traversal_depth: 0,
+            deferred_dirty_tests: Vec::new(),
+            found_prims_after_backdrop: false,
+            backdrop_surface: None,
+        }
+    }
+
+    /// Return the total number of tiles allocated by this tile cache
+    pub fn tile_count(&self) -> usize {
+        self.tile_rect.area() as usize * self.sub_slices.len()
+    }
+
+    /// Trims memory held by the tile cache, such as native surfaces.
+    pub fn memory_pressure(&mut self, resource_cache: &mut ResourceCache) {
+        for sub_slice in &mut self.sub_slices {
+            for tile in sub_slice.tiles.values_mut() {
+                if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { ref mut id, .. }, .. }) = tile.surface {
+                    // Reseting the id to None with take() ensures that a new
+                    // tile will be allocated during the next frame build.
+                    if let Some(id) = id.take() {
+                        resource_cache.destroy_compositor_tile(id);
+                    }
+                }
+            }
+            if let Some(native_surface) = sub_slice.native_surface.take() {
+                resource_cache.destroy_compositor_surface(native_surface.opaque);
+                resource_cache.destroy_compositor_surface(native_surface.alpha);
+            }
+        }
+    }
+
+    /// Reset this tile cache with the updated parameters from a new scene
+    /// that has arrived. This allows the tile cache to be retained across
+    /// new scenes.
+    pub fn prepare_for_new_scene(
+        &mut self,
+        params: TileCacheParams,
+        resource_cache: &mut ResourceCache,
+    ) {
+        // We should only receive updated state for matching slice key
+        assert_eq!(self.slice, params.slice);
+
+        // Determine how many sub-slices we need, based on how many compositor surface prims are
+        // in the supplied primitive list.
+        let required_sub_slice_count = params.compositor_surface_count.min(MAX_COMPOSITOR_SURFACES) + 1;
+
+        if self.sub_slices.len() != required_sub_slice_count {
+            self.tile_rect = TileRect::zero();
+
+            if self.sub_slices.len() > required_sub_slice_count {
+                let old_sub_slices = self.sub_slices.split_off(required_sub_slice_count);
+
+                for mut sub_slice in old_sub_slices {
+                    for tile in sub_slice.tiles.values_mut() {
+                        if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { ref mut id, .. }, .. }) = tile.surface {
+                            if let Some(id) = id.take() {
+                                resource_cache.destroy_compositor_tile(id);
+                            }
+                        }
+                    }
+
+                    if let Some(native_surface) = sub_slice.native_surface {
+                        resource_cache.destroy_compositor_surface(native_surface.opaque);
+                        resource_cache.destroy_compositor_surface(native_surface.alpha);
+                    }
+                }
+            } else {
+                while self.sub_slices.len() < required_sub_slice_count {
+                    self.sub_slices.push(SubSlice::new());
+                }
+            }
+        }
+
+        // Store the parameters from the scene builder for this slice. Other
+        // params in the tile cache are retained and reused, or are always
+        // updated during pre/post_update.
+        self.slice_flags = params.slice_flags;
+        self.spatial_node_index = params.spatial_node_index;
+        self.background_color = params.background_color;
+        self.shared_clip_leaf_id = params.shared_clip_leaf_id;
+        self.shared_clip_node_id = params.shared_clip_node_id;
+
+        // Since the slice flags may have changed, ensure we re-evaluate the
+        // appropriate tile size for this cache next update.
+        self.frames_until_size_eval = 0;
+    }
+
+    /// Destroy any manually managed resources before this picture cache is
+    /// destroyed, such as native compositor surfaces.
+    pub fn destroy(
+        self,
+        resource_cache: &mut ResourceCache,
+    ) {
+        for sub_slice in self.sub_slices {
+            if let Some(native_surface) = sub_slice.native_surface {
+                resource_cache.destroy_compositor_surface(native_surface.opaque);
+                resource_cache.destroy_compositor_surface(native_surface.alpha);
+            }
+        }
+
+        for (_, external_surface) in self.external_native_surface_cache {
+            resource_cache.destroy_compositor_surface(external_surface.native_surface_id)
+        }
+
+        if let Some(backdrop_surface) = &self.backdrop_surface {
+            resource_cache.destroy_compositor_surface(backdrop_surface.id);
+        }
+    }
+
+    /// Get the tile coordinates for a given rectangle.
+    fn get_tile_coords_for_rect(
+        &self,
+        rect: &PictureRect,
+    ) -> (TileOffset, TileOffset) {
+        // Get the tile coordinates in the picture space.
+        let mut p0 = TileOffset::new(
+            (rect.min.x / self.tile_size.width).floor() as i32,
+            (rect.min.y / self.tile_size.height).floor() as i32,
+        );
+
+        let mut p1 = TileOffset::new(
+            (rect.max.x / self.tile_size.width).ceil() as i32,
+            (rect.max.y / self.tile_size.height).ceil() as i32,
+        );
+
+        // Clamp the tile coordinates here to avoid looping over irrelevant tiles later on.
+        p0.x = clamp(p0.x, self.tile_bounds_p0.x, self.tile_bounds_p1.x);
+        p0.y = clamp(p0.y, self.tile_bounds_p0.y, self.tile_bounds_p1.y);
+        p1.x = clamp(p1.x, self.tile_bounds_p0.x, self.tile_bounds_p1.x);
+        p1.y = clamp(p1.y, self.tile_bounds_p0.y, self.tile_bounds_p1.y);
+
+        (p0, p1)
+    }
+
+    /// Update transforms, opacity, color bindings and tile rects.
+    pub fn pre_update(
+        &mut self,
+        pic_rect: PictureRect,
+        surface_index: SurfaceIndex,
+        frame_context: &FrameVisibilityContext,
+        frame_state: &mut FrameVisibilityState,
+    ) -> WorldRect {
+        self.surface_index = surface_index;
+        self.local_rect = pic_rect;
+        self.local_clip_rect = PictureRect::max_rect();
+        self.deferred_dirty_tests.clear();
+
+        for sub_slice in &mut self.sub_slices {
+            sub_slice.reset();
+        }
+
+        // Reset the opaque rect + subpixel mode, as they are calculated
+        // during the prim dependency checks.
+        self.backdrop = BackdropInfo::empty();
+
+        // Calculate the screen rect in picture space, for later comparison against
+        // backdrops, and prims potentially covering backdrops.
+        let pic_to_world_mapper = SpaceMapper::new_with_target(
+            frame_context.root_spatial_node_index,
+            self.spatial_node_index,
+            frame_context.global_screen_world_rect,
+            frame_context.spatial_tree,
+        );
+        self.screen_rect_in_pic_space = pic_to_world_mapper
+            .unmap(&frame_context.global_screen_world_rect)
+            .expect("unable to unmap screen rect");
+
+        // If there is a valid set of shared clips, build a clip chain instance for this,
+        // which will provide a local clip rect. This is useful for establishing things
+        // like whether the backdrop rect supplied by Gecko can be considered opaque.
+        if let Some(shared_clip_leaf_id) = self.shared_clip_leaf_id {
+            let map_local_to_surface = SpaceMapper::new(
+                self.spatial_node_index,
+                pic_rect,
+            );
+
+            frame_state.clip_store.set_active_clips(
+                self.spatial_node_index,
+                map_local_to_surface.ref_spatial_node_index,
+                shared_clip_leaf_id,
+                frame_context.spatial_tree,
+                &mut frame_state.data_stores.clip,
+                &frame_state.clip_tree,
+            );
+
+            let clip_chain_instance = frame_state.clip_store.build_clip_chain_instance(
+                pic_rect.cast_unit(),
+                &map_local_to_surface,
+                &pic_to_world_mapper,
+                frame_context.spatial_tree,
+                frame_state.gpu_cache,
+                frame_state.resource_cache,
+                frame_context.global_device_pixel_scale,
+                &frame_context.global_screen_world_rect,
+                &mut frame_state.data_stores.clip,
+                true,
+            );
+
+            // Ensure that if the entire picture cache is clipped out, the local
+            // clip rect is zero. This makes sure we don't register any occluders
+            // that are actually off-screen.
+            self.local_clip_rect = clip_chain_instance.map_or(PictureRect::zero(), |clip_chain_instance| {
+                clip_chain_instance.pic_coverage_rect
+            });
+        }
+
+        // Advance the current frame ID counter for this picture cache (must be done
+        // after any retained prev state is taken above).
+        self.frame_id.advance();
+
+        // Notify the spatial node comparer that a new frame has started, and the
+        // current reference spatial node for this tile cache.
+        self.spatial_node_comparer.next_frame(self.spatial_node_index);
+
+        // At the start of the frame, step through each current compositor surface
+        // and mark it as unused. Later, this is used to free old compositor surfaces.
+        // TODO(gw): In future, we might make this more sophisticated - for example,
+        //           retaining them for >1 frame if unused, or retaining them in some
+        //           kind of pool to reduce future allocations.
+        for external_native_surface in self.external_native_surface_cache.values_mut() {
+            external_native_surface.used_this_frame = false;
+        }
+
+        // Only evaluate what tile size to use fairly infrequently, so that we don't end
+        // up constantly invalidating and reallocating tiles if the picture rect size is
+        // changing near a threshold value.
+        if self.frames_until_size_eval == 0 ||
+           self.tile_size_override != frame_context.config.tile_size_override {
+
+            // Work out what size tile is appropriate for this picture cache.
+            let desired_tile_size = match frame_context.config.tile_size_override {
+                Some(tile_size_override) => {
+                    tile_size_override
+                }
+                None => {
+                    if self.slice_flags.contains(SliceFlags::IS_SCROLLBAR) {
+                        if pic_rect.width() <= pic_rect.height() {
+                            TILE_SIZE_SCROLLBAR_VERTICAL
+                        } else {
+                            TILE_SIZE_SCROLLBAR_HORIZONTAL
+                        }
+                    } else {
+                        frame_state.resource_cache.picture_textures.default_tile_size()
+                    }
+                }
+            };
+
+            // If the desired tile size has changed, then invalidate and drop any
+            // existing tiles.
+            if desired_tile_size != self.current_tile_size {
+                for sub_slice in &mut self.sub_slices {
+                    // Destroy any native surfaces on the tiles that will be dropped due
+                    // to resizing.
+                    if let Some(native_surface) = sub_slice.native_surface.take() {
+                        frame_state.resource_cache.destroy_compositor_surface(native_surface.opaque);
+                        frame_state.resource_cache.destroy_compositor_surface(native_surface.alpha);
+                    }
+                    sub_slice.tiles.clear();
+                }
+                self.tile_rect = TileRect::zero();
+                self.current_tile_size = desired_tile_size;
+            }
+
+            // Reset counter until next evaluating the desired tile size. This is an
+            // arbitrary value.
+            self.frames_until_size_eval = 120;
+            self.tile_size_override = frame_context.config.tile_size_override;
+        }
+
+        // Get the complete scale-offset from local space to device space
+        let local_to_device = get_relative_scale_offset(
+            self.spatial_node_index,
+            frame_context.root_spatial_node_index,
+            frame_context.spatial_tree,
+        );
+
+        // Get the compositor transform, which depends on pinch-zoom mode
+        let mut surface_to_device = local_to_device;
+
+        if frame_context.config.low_quality_pinch_zoom {
+            surface_to_device.scale.x /= self.current_raster_scale;
+            surface_to_device.scale.y /= self.current_raster_scale;
+        } else {
+            surface_to_device.scale.x = 1.0;
+            surface_to_device.scale.y = 1.0;
+        }
+
+        // Use that compositor transform to calculate a relative local to surface
+        let local_to_surface = local_to_device.accumulate(&surface_to_device.inverse());
+
+        const EPSILON: f32 = 0.001;
+        let compositor_translation_changed =
+            !surface_to_device.offset.x.approx_eq_eps(&self.surface_to_device.offset.x, &EPSILON) ||
+            !surface_to_device.offset.y.approx_eq_eps(&self.surface_to_device.offset.y, &EPSILON);
+        let compositor_scale_changed =
+            !surface_to_device.scale.x.approx_eq_eps(&self.surface_to_device.scale.x, &EPSILON) ||
+            !surface_to_device.scale.y.approx_eq_eps(&self.surface_to_device.scale.y, &EPSILON);
+        let surface_scale_changed =
+            !local_to_surface.scale.x.approx_eq_eps(&self.local_to_surface.scale.x, &EPSILON) ||
+            !local_to_surface.scale.y.approx_eq_eps(&self.local_to_surface.scale.y, &EPSILON);
+
+        if compositor_translation_changed ||
+           compositor_scale_changed ||
+           surface_scale_changed ||
+           frame_context.config.force_invalidation {
+            frame_state.composite_state.dirty_rects_are_valid = false;
+        }
+
+        self.surface_to_device = surface_to_device;
+        self.local_to_surface = local_to_surface;
+        self.invalidate_all_tiles = surface_scale_changed || frame_context.config.force_invalidation;
+
+        // Do a hacky diff of opacity binding values from the last frame. This is
+        // used later on during tile invalidation tests.
+        let current_properties = frame_context.scene_properties.float_properties();
+        mem::swap(&mut self.opacity_bindings, &mut self.old_opacity_bindings);
+
+        self.opacity_bindings.clear();
+        for (id, value) in current_properties {
+            let changed = match self.old_opacity_bindings.get(id) {
+                Some(old_property) => !old_property.value.approx_eq(value),
+                None => true,
+            };
+            self.opacity_bindings.insert(*id, OpacityBindingInfo {
+                value: *value,
+                changed,
+            });
+        }
+
+        // Do a hacky diff of color binding values from the last frame. This is
+        // used later on during tile invalidation tests.
+        let current_properties = frame_context.scene_properties.color_properties();
+        mem::swap(&mut self.color_bindings, &mut self.old_color_bindings);
+
+        self.color_bindings.clear();
+        for (id, value) in current_properties {
+            let changed = match self.old_color_bindings.get(id) {
+                Some(old_property) => old_property.value != (*value).into(),
+                None => true,
+            };
+            self.color_bindings.insert(*id, ColorBindingInfo {
+                value: (*value).into(),
+                changed,
+            });
+        }
+
+        let world_tile_size = WorldSize::new(
+            self.current_tile_size.width as f32 / frame_context.global_device_pixel_scale.0,
+            self.current_tile_size.height as f32 / frame_context.global_device_pixel_scale.0,
+        );
+
+        self.tile_size = PictureSize::new(
+            world_tile_size.width / self.local_to_surface.scale.x,
+            world_tile_size.height / self.local_to_surface.scale.y,
+        );
+
+        // Inflate the needed rect a bit, so that we retain tiles that we have drawn
+        // but have just recently gone off-screen. This means that we avoid re-drawing
+        // tiles if the user is scrolling up and down small amounts, at the cost of
+        // a bit of extra texture memory.
+        let desired_rect_in_pic_space = self.screen_rect_in_pic_space
+            .inflate(0.0, 1.0 * self.tile_size.height);
+
+        let needed_rect_in_pic_space = desired_rect_in_pic_space
+            .intersection(&pic_rect)
+            .unwrap_or_else(Box2D::zero);
+
+        let p0 = needed_rect_in_pic_space.min;
+        let p1 = needed_rect_in_pic_space.max;
+
+        let x0 = (p0.x / self.tile_size.width).floor() as i32;
+        let x1 = (p1.x / self.tile_size.width).ceil() as i32;
+
+        let y0 = (p0.y / self.tile_size.height).floor() as i32;
+        let y1 = (p1.y / self.tile_size.height).ceil() as i32;
+
+        let new_tile_rect = TileRect {
+            min: TileOffset::new(x0, y0),
+            max: TileOffset::new(x1, y1),
+        };
+
+        // Determine whether the current bounds of the tile grid will exceed the
+        // bounds of the DC virtual surface, taking into account the current
+        // virtual offset. If so, we need to invalidate all tiles, and set up
+        // a new virtual offset, centered around the current tile grid.
+
+        let virtual_surface_size = frame_context.config.compositor_kind.get_virtual_surface_size();
+        // We only need to invalidate in this case if the underlying platform
+        // uses virtual surfaces.
+        if virtual_surface_size > 0 {
+            // Get the extremities of the tile grid after virtual offset is applied
+            let tx0 = self.virtual_offset.x + x0 * self.current_tile_size.width;
+            let ty0 = self.virtual_offset.y + y0 * self.current_tile_size.height;
+            let tx1 = self.virtual_offset.x + (x1+1) * self.current_tile_size.width;
+            let ty1 = self.virtual_offset.y + (y1+1) * self.current_tile_size.height;
+
+            let need_new_virtual_offset = tx0 < 0 ||
+                                          ty0 < 0 ||
+                                          tx1 >= virtual_surface_size ||
+                                          ty1 >= virtual_surface_size;
+
+            if need_new_virtual_offset {
+                // Calculate a new virtual offset, centered around the middle of the
+                // current tile grid. This means we won't need to invalidate and get
+                // a new offset for a long time!
+                self.virtual_offset = DeviceIntPoint::new(
+                    (virtual_surface_size/2) - ((x0 + x1) / 2) * self.current_tile_size.width,
+                    (virtual_surface_size/2) - ((y0 + y1) / 2) * self.current_tile_size.height,
+                );
+
+                // Invalidate all native tile surfaces. They will be re-allocated next time
+                // they are scheduled to be rasterized.
+                for sub_slice in &mut self.sub_slices {
+                    for tile in sub_slice.tiles.values_mut() {
+                        if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { ref mut id, .. }, .. }) = tile.surface {
+                            if let Some(id) = id.take() {
+                                frame_state.resource_cache.destroy_compositor_tile(id);
+                                tile.surface = None;
+                                // Invalidate the entire tile to force a redraw.
+                                // TODO(gw): Add a new invalidation reason for virtual offset changing
+                                tile.invalidate(None, InvalidationReason::CompositorKindChanged);
+                            }
+                        }
+                    }
+
+                    // Destroy the native virtual surfaces. They will be re-allocated next time a tile
+                    // that references them is scheduled to draw.
+                    if let Some(native_surface) = sub_slice.native_surface.take() {
+                        frame_state.resource_cache.destroy_compositor_surface(native_surface.opaque);
+                        frame_state.resource_cache.destroy_compositor_surface(native_surface.alpha);
+                    }
+                }
+            }
+        }
+
+        // Rebuild the tile grid if the picture cache rect has changed.
+        if new_tile_rect != self.tile_rect {
+            for sub_slice in &mut self.sub_slices {
+                let mut old_tiles = sub_slice.resize(new_tile_rect);
+
+                // When old tiles that remain after the loop, dirty rects are not valid.
+                if !old_tiles.is_empty() {
+                    frame_state.composite_state.dirty_rects_are_valid = false;
+                }
+
+                // Any old tiles that remain after the loop above are going to be dropped. For
+                // simple composite mode, the texture cache handle will expire and be collected
+                // by the texture cache. For native compositor mode, we need to explicitly
+                // invoke a callback to the client to destroy that surface.
+                frame_state.composite_state.destroy_native_tiles(
+                    old_tiles.values_mut(),
+                    frame_state.resource_cache,
+                );
+            }
+        }
+
+        // This is duplicated information from tile_rect, but cached here to avoid
+        // redundant calculations during get_tile_coords_for_rect
+        self.tile_bounds_p0 = TileOffset::new(x0, y0);
+        self.tile_bounds_p1 = TileOffset::new(x1, y1);
+        self.tile_rect = new_tile_rect;
+
+        let mut world_culling_rect = WorldRect::zero();
+
+        let mut ctx = TilePreUpdateContext {
+            pic_to_world_mapper,
+            background_color: self.background_color,
+            global_screen_world_rect: frame_context.global_screen_world_rect,
+            tile_size: self.tile_size,
+            frame_id: self.frame_id,
+        };
+
+        // Pre-update each tile
+        for sub_slice in &mut self.sub_slices {
+            for tile in sub_slice.tiles.values_mut() {
+                tile.pre_update(&ctx);
+
+                // Only include the tiles that are currently in view into the world culling
+                // rect. This is a very important optimization for a couple of reasons:
+                // (1) Primitives that intersect with tiles in the grid that are not currently
+                //     visible can be skipped from primitive preparation, clip chain building
+                //     and tile dependency updates.
+                // (2) When we need to allocate an off-screen surface for a child picture (for
+                //     example a CSS filter) we clip the size of the GPU surface to the world
+                //     culling rect below (to ensure we draw enough of it to be sampled by any
+                //     tiles that reference it). Making the world culling rect only affected
+                //     by visible tiles (rather than the entire virtual tile display port) can
+                //     result in allocating _much_ smaller GPU surfaces for cases where the
+                //     true off-screen surface size is very large.
+                if tile.is_visible {
+                    world_culling_rect = world_culling_rect.union(&tile.world_tile_rect);
+                }
+            }
+
+            // The background color can only be applied to the first sub-slice.
+            ctx.background_color = None;
+        }
+
+        // If compositor mode is changed, need to drop all incompatible tiles.
+        match frame_context.config.compositor_kind {
+            CompositorKind::Draw { .. } => {
+                for sub_slice in &mut self.sub_slices {
+                    for tile in sub_slice.tiles.values_mut() {
+                        if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { ref mut id, .. }, .. }) = tile.surface {
+                            if let Some(id) = id.take() {
+                                frame_state.resource_cache.destroy_compositor_tile(id);
+                            }
+                            tile.surface = None;
+                            // Invalidate the entire tile to force a redraw.
+                            tile.invalidate(None, InvalidationReason::CompositorKindChanged);
+                        }
+                    }
+
+                    if let Some(native_surface) = sub_slice.native_surface.take() {
+                        frame_state.resource_cache.destroy_compositor_surface(native_surface.opaque);
+                        frame_state.resource_cache.destroy_compositor_surface(native_surface.alpha);
+                    }
+                }
+
+                for (_, external_surface) in self.external_native_surface_cache.drain() {
+                    frame_state.resource_cache.destroy_compositor_surface(external_surface.native_surface_id)
+                }
+            }
+            CompositorKind::Native { .. } => {
+                // This could hit even when compositor mode is not changed,
+                // then we need to check if there are incompatible tiles.
+                for sub_slice in &mut self.sub_slices {
+                    for tile in sub_slice.tiles.values_mut() {
+                        if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::TextureCache { .. }, .. }) = tile.surface {
+                            tile.surface = None;
+                            // Invalidate the entire tile to force a redraw.
+                            tile.invalidate(None, InvalidationReason::CompositorKindChanged);
+                        }
+                    }
+                }
+            }
+        }
+
+        world_culling_rect
+    }
+
+    fn can_promote_to_surface(
+        &mut self,
+        flags: PrimitiveFlags,
+        prim_clip_chain: &ClipChainInstance,
+        prim_spatial_node_index: SpatialNodeIndex,
+        is_root_tile_cache: bool,
+        sub_slice_index: usize,
+        frame_context: &FrameVisibilityContext,
+    ) -> SurfacePromotionResult {
+        // Check if this primitive _wants_ to be promoted to a compositor surface.
+        if !flags.contains(PrimitiveFlags::PREFER_COMPOSITOR_SURFACE) {
+            return SurfacePromotionResult::Failed;
+        }
+
+        // For now, only support a small (arbitrary) number of compositor surfaces.
+        if sub_slice_index == MAX_COMPOSITOR_SURFACES {
+            return SurfacePromotionResult::Failed;
+        }
+
+        // If a complex clip is being applied to this primitive, it can't be
+        // promoted directly to a compositor surface (we might be able to
+        // do this in limited cases in future, some native compositors do
+        // support rounded rect clips, for example)
+        if prim_clip_chain.needs_mask {
+            return SurfacePromotionResult::Failed;
+        }
+
+        // If not on the root picture cache, it has some kind of
+        // complex effect (such as a filter, mix-blend-mode or 3d transform).
+        if !is_root_tile_cache {
+            return SurfacePromotionResult::Failed;
+        }
+
+        let mapper : SpaceMapper<PicturePixel, WorldPixel> = SpaceMapper::new_with_target(
+            frame_context.root_spatial_node_index,
+            prim_spatial_node_index,
+            frame_context.global_screen_world_rect,
+            &frame_context.spatial_tree);
+        let transform = mapper.get_transform();
+        if !transform.is_2d_scale_translation() {
+            return SurfacePromotionResult::Failed;
+        }
+
+        if self.slice_flags.contains(SliceFlags::IS_ATOMIC) {
+            return SurfacePromotionResult::Failed;
+        }
+
+        SurfacePromotionResult::Success
+    }
+
+    fn setup_compositor_surfaces_yuv(
+        &mut self,
+        sub_slice_index: usize,
+        prim_info: &mut PrimitiveDependencyInfo,
+        flags: PrimitiveFlags,
+        local_prim_rect: LayoutRect,
+        prim_spatial_node_index: SpatialNodeIndex,
+        pic_coverage_rect: PictureRect,
+        frame_context: &FrameVisibilityContext,
+        image_dependencies: &[ImageDependency;3],
+        api_keys: &[ImageKey; 3],
+        resource_cache: &mut ResourceCache,
+        composite_state: &mut CompositeState,
+        gpu_cache: &mut GpuCache,
+        image_rendering: ImageRendering,
+        color_depth: ColorDepth,
+        color_space: YuvRangedColorSpace,
+        format: YuvFormat,
+    ) -> bool {
+        for &key in api_keys {
+            if key != ImageKey::DUMMY {
+                // TODO: See comment in setup_compositor_surfaces_rgb.
+                resource_cache.request_image(ImageRequest {
+                        key,
+                        rendering: image_rendering,
+                        tile: None,
+                    },
+                    gpu_cache,
+                );
+            }
+        }
+
+        self.setup_compositor_surfaces_impl(
+            sub_slice_index,
+            prim_info,
+            flags,
+            local_prim_rect,
+            prim_spatial_node_index,
+            pic_coverage_rect,
+            frame_context,
+            ExternalSurfaceDependency::Yuv {
+                image_dependencies: *image_dependencies,
+                color_space,
+                format,
+                channel_bit_depth: color_depth.bit_depth(),
+            },
+            api_keys,
+            resource_cache,
+            composite_state,
+            image_rendering,
+            true,
+        )
+    }
+
+    fn setup_compositor_surfaces_rgb(
+        &mut self,
+        sub_slice_index: usize,
+        prim_info: &mut PrimitiveDependencyInfo,
+        flags: PrimitiveFlags,
+        local_prim_rect: LayoutRect,
+        prim_spatial_node_index: SpatialNodeIndex,
+        pic_coverage_rect: PictureRect,
+        frame_context: &FrameVisibilityContext,
+        image_dependency: ImageDependency,
+        api_key: ImageKey,
+        resource_cache: &mut ResourceCache,
+        composite_state: &mut CompositeState,
+        gpu_cache: &mut GpuCache,
+        image_rendering: ImageRendering,
+    ) -> bool {
+        let mut api_keys = [ImageKey::DUMMY; 3];
+        api_keys[0] = api_key;
+
+        // TODO: The picture compositing code requires images promoted
+        // into their own picture cache slices to be requested every
+        // frame even if they are not visible. However the image updates
+        // are only reached on the prepare pass for visible primitives.
+        // So we make sure to trigger an image request when promoting
+        // the image here.
+        resource_cache.request_image(ImageRequest {
+                key: api_key,
+                rendering: image_rendering,
+                tile: None,
+            },
+            gpu_cache,
+        );
+
+        let is_opaque = resource_cache.get_image_properties(api_key)
+            .map_or(false, |properties| properties.descriptor.is_opaque());
+
+        self.setup_compositor_surfaces_impl(
+            sub_slice_index,
+            prim_info,
+            flags,
+            local_prim_rect,
+            prim_spatial_node_index,
+            pic_coverage_rect,
+            frame_context,
+            ExternalSurfaceDependency::Rgb {
+                image_dependency,
+            },
+            &api_keys,
+            resource_cache,
+            composite_state,
+            image_rendering,
+            is_opaque,
+        )
+    }
+
+    // returns false if composition is not available for this surface,
+    // and the non-compositor path should be used to draw it instead.
+    fn setup_compositor_surfaces_impl(
+        &mut self,
+        sub_slice_index: usize,
+        prim_info: &mut PrimitiveDependencyInfo,
+        flags: PrimitiveFlags,
+        local_prim_rect: LayoutRect,
+        prim_spatial_node_index: SpatialNodeIndex,
+        pic_coverage_rect: PictureRect,
+        frame_context: &FrameVisibilityContext,
+        dependency: ExternalSurfaceDependency,
+        api_keys: &[ImageKey; 3],
+        resource_cache: &mut ResourceCache,
+        composite_state: &mut CompositeState,
+        image_rendering: ImageRendering,
+        is_opaque: bool,
+    ) -> bool {
+        let map_local_to_surface = SpaceMapper::new_with_target(
+            self.spatial_node_index,
+            prim_spatial_node_index,
+            self.local_rect,
+            frame_context.spatial_tree,
+        );
+
+        // Map the primitive local rect into picture space.
+        let prim_rect = match map_local_to_surface.map(&local_prim_rect) {
+            Some(rect) => rect,
+            None => return true,
+        };
+
+        // If the rect is invalid, no need to create dependencies.
+        if prim_rect.is_empty() {
+            return true;
+        }
+
+        let pic_to_world_mapper = SpaceMapper::new_with_target(
+            frame_context.root_spatial_node_index,
+            self.spatial_node_index,
+            frame_context.global_screen_world_rect,
+            frame_context.spatial_tree,
+        );
+
+        let world_clip_rect = pic_to_world_mapper
+            .map(&prim_info.prim_clip_box)
+            .expect("bug: unable to map clip to world space");
+
+        let is_visible = world_clip_rect.intersects(&frame_context.global_screen_world_rect);
+        if !is_visible {
+            return true;
+        }
+
+        let prim_offset = ScaleOffset::from_offset(local_prim_rect.min.to_vector().cast_unit());
+
+        let local_prim_to_device = get_relative_scale_offset(
+            prim_spatial_node_index,
+            frame_context.root_spatial_node_index,
+            frame_context.spatial_tree,
+        );
+
+        let normalized_prim_to_device = prim_offset.accumulate(&local_prim_to_device);
+
+        let local_to_surface = ScaleOffset::identity();
+        let surface_to_device = normalized_prim_to_device;
+
+        let compositor_transform_index = composite_state.register_transform(
+            local_to_surface,
+            surface_to_device,
+        );
+
+        let surface_size = composite_state.get_surface_rect(
+            &local_prim_rect,
+            &local_prim_rect,
+            compositor_transform_index,
+        ).size();
+
+        let clip_rect = (world_clip_rect * frame_context.global_device_pixel_scale).round();
+
+        if surface_size.width >= MAX_COMPOSITOR_SURFACES_SIZE ||
+           surface_size.height >= MAX_COMPOSITOR_SURFACES_SIZE {
+           return false;
+        }
+
+        // If this primitive is an external image, and supports being used
+        // directly by a native compositor, then lookup the external image id
+        // so we can pass that through.
+        let external_image_id = if flags.contains(PrimitiveFlags::SUPPORTS_EXTERNAL_COMPOSITOR_SURFACE)
+            && image_rendering == ImageRendering::Auto {
+            resource_cache.get_image_properties(api_keys[0])
+                .and_then(|properties| properties.external_image)
+                .and_then(|image| Some(image.id))
+        } else {
+            None
+        };
+
+        // When using native compositing, we need to find an existing native surface
+        // handle to use, or allocate a new one. For existing native surfaces, we can
+        // also determine whether this needs to be updated, depending on whether the
+        // image generation(s) of the planes have changed since last composite.
+        let (native_surface_id, update_params) = match composite_state.compositor_kind {
+            CompositorKind::Draw { .. } => {
+                (None, None)
+            }
+            CompositorKind::Native { .. } => {
+                let native_surface_size = surface_size.to_i32();
+
+                let key = ExternalNativeSurfaceKey {
+                    image_keys: *api_keys,
+                    size: native_surface_size,
+                    is_external_surface: external_image_id.is_some(),
+                };
+
+                let native_surface = self.external_native_surface_cache
+                    .entry(key)
+                    .or_insert_with(|| {
+                        // No existing surface, so allocate a new compositor surface.
+                        let native_surface_id = match external_image_id {
+                            Some(_external_image) => {
+                                // If we have a suitable external image, then create an external
+                                // surface to attach to.
+                                resource_cache.create_compositor_external_surface(is_opaque)
+                            }
+                            None => {
+                                // Otherwise create a normal compositor surface and a single
+                                // compositor tile that covers the entire surface.
+                                let native_surface_id =
+                                resource_cache.create_compositor_surface(
+                                    DeviceIntPoint::zero(),
+                                    native_surface_size,
+                                    is_opaque,
+                                );
+
+                                let tile_id = NativeTileId {
+                                    surface_id: native_surface_id,
+                                    x: 0,
+                                    y: 0,
+                                };
+                                resource_cache.create_compositor_tile(tile_id);
+
+                                native_surface_id
+                            }
+                        };
+
+                        ExternalNativeSurface {
+                            used_this_frame: true,
+                            native_surface_id,
+                            image_dependencies: [ImageDependency::INVALID; 3],
+                        }
+                    });
+
+                // Mark that the surface is referenced this frame so that the
+                // backing native surface handle isn't freed.
+                native_surface.used_this_frame = true;
+
+                let update_params = match external_image_id {
+                    Some(external_image) => {
+                        // If this is an external image surface, then there's no update
+                        // to be done. Just attach the current external image to the surface
+                        // and we're done.
+                        resource_cache.attach_compositor_external_image(
+                            native_surface.native_surface_id,
+                            external_image,
+                        );
+                        None
+                    }
+                    None => {
+                        // If the image dependencies match, there is no need to update
+                        // the backing native surface.
+                        match dependency {
+                            ExternalSurfaceDependency::Yuv{ image_dependencies, .. } => {
+                                if image_dependencies == native_surface.image_dependencies {
+                                    None
+                                } else {
+                                    Some(native_surface_size)
+                                }
+                            },
+                            ExternalSurfaceDependency::Rgb{ image_dependency, .. } => {
+                                if image_dependency == native_surface.image_dependencies[0] {
+                                    None
+                                } else {
+                                    Some(native_surface_size)
+                                }
+                            },
+                        }
+                    }
+                };
+
+                (Some(native_surface.native_surface_id), update_params)
+            }
+        };
+
+        // For compositor surfaces, if we didn't find an earlier sub-slice to add to,
+        // we know we can append to the current slice.
+        assert!(sub_slice_index < self.sub_slices.len() - 1);
+        let sub_slice = &mut self.sub_slices[sub_slice_index];
+
+        // Each compositor surface allocates a unique z-id
+        sub_slice.compositor_surfaces.push(CompositorSurface {
+            prohibited_rect: pic_coverage_rect,
+            is_opaque,
+            descriptor: ExternalSurfaceDescriptor {
+                local_surface_size: local_prim_rect.size(),
+                local_rect: prim_rect,
+                local_clip_rect: prim_info.prim_clip_box,
+                dependency,
+                image_rendering,
+                clip_rect,
+                transform_index: compositor_transform_index,
+                z_id: ZBufferId::invalid(),
+                native_surface_id,
+                update_params,
+            },
+        });
+
+        true
+    }
+
+    /// Push an estimated rect for an off-screen surface during dependency updates. This is
+    /// a workaround / hack that allows the picture cache code to know when it should be
+    /// processing primitive dependencies as a single atomic unit. In future, we aim to remove
+    /// this hack by having the primitive dependencies stored _within_ each owning picture.
+    /// This is part of the work required to support child picture caching anyway!
+    pub fn push_surface(
+        &mut self,
+        estimated_local_rect: LayoutRect,
+        surface_spatial_node_index: SpatialNodeIndex,
+        spatial_tree: &SpatialTree,
+    ) {
+        // Only need to evaluate sub-slice regions if we have compositor surfaces present
+        if self.current_surface_traversal_depth == 0 && self.sub_slices.len() > 1 {
+            let map_local_to_surface = SpaceMapper::new_with_target(
+                self.spatial_node_index,
+                surface_spatial_node_index,
+                self.local_rect,
+                spatial_tree,
+            );
+
+            if let Some(pic_rect) = map_local_to_surface.map(&estimated_local_rect) {
+                // Find the first sub-slice we can add this primitive to (we want to add
+                // prims to the primary surface if possible, so they get subpixel AA).
+                for sub_slice in &mut self.sub_slices {
+                    let mut intersects_prohibited_region = false;
+
+                    for surface in &mut sub_slice.compositor_surfaces {
+                        if pic_rect.intersects(&surface.prohibited_rect) {
+                            surface.prohibited_rect = surface.prohibited_rect.union(&pic_rect);
+
+                            intersects_prohibited_region = true;
+                        }
+                    }
+
+                    if !intersects_prohibited_region {
+                        break;
+                    }
+                }
+            }
+        }
+
+        self.current_surface_traversal_depth += 1;
+    }
+
+    /// Pop an off-screen surface off the stack during dependency updates
+    pub fn pop_surface(&mut self) {
+        self.current_surface_traversal_depth -= 1;
+    }
+
+    /// Update the dependencies for each tile for a given primitive instance.
+    pub fn update_prim_dependencies(
+        &mut self,
+        prim_instance: &mut PrimitiveInstance,
+        prim_spatial_node_index: SpatialNodeIndex,
+        local_prim_rect: LayoutRect,
+        frame_context: &FrameVisibilityContext,
+        data_stores: &DataStores,
+        clip_store: &ClipStore,
+        pictures: &[PicturePrimitive],
+        resource_cache: &mut ResourceCache,
+        color_bindings: &ColorBindingStorage,
+        surface_stack: &[(PictureIndex, SurfaceIndex)],
+        composite_state: &mut CompositeState,
+        gpu_cache: &mut GpuCache,
+        scratch: &mut PrimitiveScratchBuffer,
+        is_root_tile_cache: bool,
+        surfaces: &mut [SurfaceInfo],
+    ) {
+        // This primitive exists on the last element on the current surface stack.
+        profile_scope!("update_prim_dependencies");
+        let prim_surface_index = surface_stack.last().unwrap().1;
+        let prim_clip_chain = &prim_instance.vis.clip_chain;
+
+        // Accumulate the exact (clipped) local rect in to the parent surface
+        let mut surface = &mut surfaces[prim_surface_index.0];
+        surface.clipped_local_rect = surface.clipped_local_rect.union(&prim_clip_chain.pic_coverage_rect);
+
+        // If the primitive is directly drawn onto this picture cache surface, then
+        // the pic_coverage_rect is in the same space. If not, we need to map it from
+        // the surface space into the picture cache space.
+        let on_picture_surface = prim_surface_index == self.surface_index;
+        let pic_coverage_rect = if on_picture_surface {
+            prim_clip_chain.pic_coverage_rect
+        } else {
+            // We want to get the rect in the tile cache surface space that this primitive
+            // occupies, in order to enable correct invalidation regions. Each surface
+            // that exists in the chain between this primitive and the tile cache surface
+            // may have an arbitrary inflation factor (for example, in the case of a series
+            // of nested blur elements). To account for this, step through the current
+            // surface stack, mapping the primitive rect into each surface space, including
+            // the inflation factor from each intermediate surface.
+            let mut current_pic_coverage_rect = prim_clip_chain.pic_coverage_rect;
+            let mut current_spatial_node_index = surfaces[prim_surface_index.0]
+                .surface_spatial_node_index;
+
+            for (pic_index, surface_index) in surface_stack.iter().rev() {
+                let surface = &surfaces[surface_index.0];
+                let pic = &pictures[pic_index.0];
+
+                let map_local_to_surface = SpaceMapper::new_with_target(
+                    surface.surface_spatial_node_index,
+                    current_spatial_node_index,
+                    surface.unclipped_local_rect,
+                    frame_context.spatial_tree,
+                );
+
+                // Map the rect into the parent surface, and inflate if this surface requires
+                // it. If the rect can't be mapping (e.g. due to an invalid transform) then
+                // just bail out from the dependencies and cull this primitive.
+                current_pic_coverage_rect = match map_local_to_surface.map(&current_pic_coverage_rect) {
+                    Some(rect) => {
+                        // TODO(gw): The casts here are a hack. We have some interface inconsistencies
+                        //           between layout/picture rects which don't really work with the
+                        //           current unit system, since sometimes the local rect of a picture
+                        //           is a LayoutRect, and sometimes it's a PictureRect. Consider how
+                        //           we can improve this?
+                        pic.composite_mode.as_ref().unwrap().get_coverage(
+                            surface,
+                            Some(rect.cast_unit()),
+                        ).cast_unit()
+                    }
+                    None => {
+                        return;
+                    }
+                };
+
+                current_spatial_node_index = surface.surface_spatial_node_index;
+            }
+
+            current_pic_coverage_rect
+        };
+
+        // Get the tile coordinates in the picture space.
+        let (p0, p1) = self.get_tile_coords_for_rect(&pic_coverage_rect);
+
+        // If the primitive is outside the tiling rects, it's known to not
+        // be visible.
+        if p0.x == p1.x || p0.y == p1.y {
+            return;
+        }
+
+        // Build the list of resources that this primitive has dependencies on.
+        let mut prim_info = PrimitiveDependencyInfo::new(
+            prim_instance.uid(),
+            pic_coverage_rect,
+        );
+
+        let mut sub_slice_index = self.sub_slices.len() - 1;
+
+        // Only need to evaluate sub-slice regions if we have compositor surfaces present
+        if sub_slice_index > 0 {
+            // Find the first sub-slice we can add this primitive to (we want to add
+            // prims to the primary surface if possible, so they get subpixel AA).
+            for (i, sub_slice) in self.sub_slices.iter_mut().enumerate() {
+                let mut intersects_prohibited_region = false;
+
+                for surface in &mut sub_slice.compositor_surfaces {
+                    if pic_coverage_rect.intersects(&surface.prohibited_rect) {
+                        surface.prohibited_rect = surface.prohibited_rect.union(&pic_coverage_rect);
+
+                        intersects_prohibited_region = true;
+                    }
+                }
+
+                if !intersects_prohibited_region {
+                    sub_slice_index = i;
+                    break;
+                }
+            }
+        }
+
+        // Include the prim spatial node, if differs relative to cache root.
+        if prim_spatial_node_index != self.spatial_node_index {
+            prim_info.spatial_nodes.push(prim_spatial_node_index);
+        }
+
+        // If there was a clip chain, add any clip dependencies to the list for this tile.
+        let clip_instances = &clip_store
+            .clip_node_instances[prim_clip_chain.clips_range.to_range()];
+        for clip_instance in clip_instances {
+            let clip = &data_stores.clip[clip_instance.handle];
+
+            prim_info.clips.push(clip_instance.handle.uid());
+
+            // If the clip has the same spatial node, the relative transform
+            // will always be the same, so there's no need to depend on it.
+            if clip.item.spatial_node_index != self.spatial_node_index
+                && !prim_info.spatial_nodes.contains(&clip.item.spatial_node_index) {
+                prim_info.spatial_nodes.push(clip.item.spatial_node_index);
+            }
+        }
+
+        // Certain primitives may select themselves to be a backdrop candidate, which is
+        // then applied below.
+        let mut backdrop_candidate = None;
+
+        // For pictures, we don't (yet) know the valid clip rect, so we can't correctly
+        // use it to calculate the local bounding rect for the tiles. If we include them
+        // then we may calculate a bounding rect that is too large, since it won't include
+        // the clip bounds of the picture. Excluding them from the bounding rect here
+        // fixes any correctness issues (the clips themselves are considered when we
+        // consider the bounds of the primitives that are *children* of the picture),
+        // however it does potentially result in some un-necessary invalidations of a
+        // tile (in cases where the picture local rect affects the tile, but the clip
+        // rect eventually means it doesn't affect that tile).
+        // TODO(gw): Get picture clips earlier (during the initial picture traversal
+        //           pass) so that we can calculate these correctly.
+        match prim_instance.kind {
+            PrimitiveInstanceKind::Picture { pic_index,.. } => {
+                // Pictures can depend on animated opacity bindings.
+                let pic = &pictures[pic_index.0];
+                if let Some(PictureCompositeMode::Filter(Filter::Opacity(binding, _))) = pic.composite_mode {
+                    prim_info.opacity_bindings.push(binding.into());
+                }
+            }
+            PrimitiveInstanceKind::Rectangle { data_handle, color_binding_index, .. } => {
+                // Rectangles can only form a backdrop candidate if they are known opaque.
+                // TODO(gw): We could resolve the opacity binding here, but the common
+                //           case for background rects is that they don't have animated opacity.
+                let color = match data_stores.prim[data_handle].kind {
+                    PrimitiveTemplateKind::Rectangle { color, .. } => {
+                        frame_context.scene_properties.resolve_color(&color)
+                    }
+                    _ => unreachable!(),
+                };
+                if color.a >= 1.0 {
+                    backdrop_candidate = Some(BackdropInfo {
+                        opaque_rect: pic_coverage_rect,
+                        spanning_opaque_color: None,
+                        kind: Some(BackdropKind::Color { color }),
+                        backdrop_rect: pic_coverage_rect,
+                    });
+                }
+
+                if color_binding_index != ColorBindingIndex::INVALID {
+                    prim_info.color_binding = Some(color_bindings[color_binding_index].into());
+                }
+            }
+            PrimitiveInstanceKind::Image { data_handle, ref mut is_compositor_surface, .. } => {
+                let image_key = &data_stores.image[data_handle];
+                let image_data = &image_key.kind;
+
+                let mut promote_to_surface = false;
+                match self.can_promote_to_surface(image_key.common.flags,
+                                                  prim_clip_chain,
+                                                  prim_spatial_node_index,
+                                                  is_root_tile_cache,
+                                                  sub_slice_index,
+                                                  frame_context) {
+                    SurfacePromotionResult::Failed => {
+                    }
+                    SurfacePromotionResult::Success => {
+                        promote_to_surface = true;
+                    }
+                }
+
+                // Native OS compositors (DC and CA, at least) support premultiplied alpha
+                // only. If we have an image that's not pre-multiplied alpha, we can't promote it.
+                if image_data.alpha_type == AlphaType::Alpha {
+                    promote_to_surface = false;
+                }
+
+                if let Some(image_properties) = resource_cache.get_image_properties(image_data.key) {
+                    // For an image to be a possible opaque backdrop, it must:
+                    // - Have a valid, opaque image descriptor
+                    // - Not use tiling (since they can fail to draw)
+                    // - Not having any spacing / padding
+                    // - Have opaque alpha in the instance (flattened) color
+                    if image_properties.descriptor.is_opaque() &&
+                       image_properties.tiling.is_none() &&
+                       image_data.tile_spacing == LayoutSize::zero() &&
+                       image_data.color.a >= 1.0 {
+                        backdrop_candidate = Some(BackdropInfo {
+                            opaque_rect: pic_coverage_rect,
+                            spanning_opaque_color: None,
+                            kind: None,
+                            backdrop_rect: PictureRect::zero(),
+                        });
+                    }
+                }
+
+                if promote_to_surface {
+                    promote_to_surface = self.setup_compositor_surfaces_rgb(
+                        sub_slice_index,
+                        &mut prim_info,
+                        image_key.common.flags,
+                        local_prim_rect,
+                        prim_spatial_node_index,
+                        pic_coverage_rect,
+                        frame_context,
+                        ImageDependency {
+                            key: image_data.key,
+                            generation: resource_cache.get_image_generation(image_data.key),
+                        },
+                        image_data.key,
+                        resource_cache,
+                        composite_state,
+                        gpu_cache,
+                        image_data.image_rendering,
+                    );
+                }
+
+                *is_compositor_surface = promote_to_surface;
+
+                if promote_to_surface {
+                    prim_instance.vis.state = VisibilityState::Culled;
+                    return;
+                } else {
+                    prim_info.images.push(ImageDependency {
+                        key: image_data.key,
+                        generation: resource_cache.get_image_generation(image_data.key),
+                    });
+                }
+            }
+            PrimitiveInstanceKind::YuvImage { data_handle, ref mut is_compositor_surface, .. } => {
+                let prim_data = &data_stores.yuv_image[data_handle];
+                let mut promote_to_surface = match self.can_promote_to_surface(
+                                            prim_data.common.flags,
+                                            prim_clip_chain,
+                                            prim_spatial_node_index,
+                                            is_root_tile_cache,
+                                            sub_slice_index,
+                                            frame_context) {
+                    SurfacePromotionResult::Failed => false,
+                    SurfacePromotionResult::Success => true,
+                };
+
+                // TODO(gw): When we support RGBA images for external surfaces, we also
+                //           need to check if opaque (YUV images are implicitly opaque).
+
+                // If this primitive is being promoted to a surface, construct an external
+                // surface descriptor for use later during batching and compositing. We only
+                // add the image keys for this primitive as a dependency if this is _not_
+                // a promoted surface, since we don't want the tiles to invalidate when the
+                // video content changes, if it's a compositor surface!
+                if promote_to_surface {
+                    // Build dependency for each YUV plane, with current image generation for
+                    // later detection of when the composited surface has changed.
+                    let mut image_dependencies = [ImageDependency::INVALID; 3];
+                    for (key, dep) in prim_data.kind.yuv_key.iter().cloned().zip(image_dependencies.iter_mut()) {
+                        *dep = ImageDependency {
+                            key,
+                            generation: resource_cache.get_image_generation(key),
+                        }
+                    }
+
+                    promote_to_surface = self.setup_compositor_surfaces_yuv(
+                        sub_slice_index,
+                        &mut prim_info,
+                        prim_data.common.flags,
+                        local_prim_rect,
+                        prim_spatial_node_index,
+                        pic_coverage_rect,
+                        frame_context,
+                        &image_dependencies,
+                        &prim_data.kind.yuv_key,
+                        resource_cache,
+                        composite_state,
+                        gpu_cache,
+                        prim_data.kind.image_rendering,
+                        prim_data.kind.color_depth,
+                        prim_data.kind.color_space.with_range(prim_data.kind.color_range),
+                        prim_data.kind.format,
+                    );
+                }
+
+                // Store on the YUV primitive instance whether this is a promoted surface.
+                // This is used by the batching code to determine whether to draw the
+                // image to the content tiles, or just a transparent z-write.
+                *is_compositor_surface = promote_to_surface;
+
+                if promote_to_surface {
+                    prim_instance.vis.state = VisibilityState::Culled;
+                    return;
+                } else {
+                    prim_info.images.extend(
+                        prim_data.kind.yuv_key.iter().map(|key| {
+                            ImageDependency {
+                                key: *key,
+                                generation: resource_cache.get_image_generation(*key),
+                            }
+                        })
+                    );
+                }
+            }
+            PrimitiveInstanceKind::ImageBorder { data_handle, .. } => {
+                let border_data = &data_stores.image_border[data_handle].kind;
+                prim_info.images.push(ImageDependency {
+                    key: border_data.request.key,
+                    generation: resource_cache.get_image_generation(border_data.request.key),
+                });
+            }
+            PrimitiveInstanceKind::Clear { .. } => {
+                backdrop_candidate = Some(BackdropInfo {
+                    opaque_rect: pic_coverage_rect,
+                    spanning_opaque_color: None,
+                    kind: Some(BackdropKind::Clear),
+                    backdrop_rect: pic_coverage_rect,        
+                });
+            }
+            PrimitiveInstanceKind::LinearGradient { data_handle, .. }
+            | PrimitiveInstanceKind::CachedLinearGradient { data_handle, .. } => {
+                let gradient_data = &data_stores.linear_grad[data_handle];
+                if gradient_data.stops_opacity.is_opaque
+                    && gradient_data.tile_spacing == LayoutSize::zero()
+                {
+                    backdrop_candidate = Some(BackdropInfo {
+                        opaque_rect: pic_coverage_rect,
+                        spanning_opaque_color: None,
+                        kind: None,
+                        backdrop_rect: PictureRect::zero(),
+                    });
+                }
+            }
+            PrimitiveInstanceKind::ConicGradient { data_handle, .. } => {
+                let gradient_data = &data_stores.conic_grad[data_handle];
+                if gradient_data.stops_opacity.is_opaque
+                    && gradient_data.tile_spacing == LayoutSize::zero()
+                {
+                    backdrop_candidate = Some(BackdropInfo {
+                        opaque_rect: pic_coverage_rect,
+                        spanning_opaque_color: None,
+                        kind: None,
+                        backdrop_rect: PictureRect::zero(),
+                    });
+                }
+            }
+            PrimitiveInstanceKind::RadialGradient { data_handle, .. } => {
+                let gradient_data = &data_stores.radial_grad[data_handle];
+                if gradient_data.stops_opacity.is_opaque
+                    && gradient_data.tile_spacing == LayoutSize::zero()
+                {
+                    backdrop_candidate = Some(BackdropInfo {
+                        opaque_rect: pic_coverage_rect,
+                        spanning_opaque_color: None,
+                        kind: None,
+                        backdrop_rect: PictureRect::zero(),
+                    });
+                }
+            }
+            PrimitiveInstanceKind::BackdropCapture { .. } => {}
+            PrimitiveInstanceKind::BackdropRender { pic_index, .. } => {
+                // If the area that the backdrop covers in the space of the surface it draws on
+                // is empty, skip any sub-graph processing. This is not just a performance win,
+                // it also ensures that we don't do a deferred dirty test that invalidates a tile
+                // even if the tile isn't actually dirty, which can cause panics later in the
+                // WR pipeline.
+                if !pic_coverage_rect.is_empty() {
+                    // Mark that we need the sub-graph this render depends on so that
+                    // we don't skip it during the prepare pass
+                    scratch.required_sub_graphs.insert(pic_index);
+
+                    // If this is a sub-graph, register the bounds on any affected tiles
+                    // so we know how much to expand the content tile by.
+
+                    // Implicitly, we know that any slice with a sub-graph disables compositor
+                    // surface promotion, so sub_slice_index will always be 0.
+                    debug_assert_eq!(sub_slice_index, 0);
+                    let sub_slice = &mut self.sub_slices[sub_slice_index];
+
+                    let mut surface_info = Vec::new();
+                    for (pic_index, surface_index) in surface_stack.iter().rev() {
+                        let pic = &pictures[pic_index.0];
+                        surface_info.push((pic.composite_mode.as_ref().unwrap().clone(), *surface_index));
+                    }
+
+                    for y in p0.y .. p1.y {
+                        for x in p0.x .. p1.x {
+                            let key = TileOffset::new(x, y);
+                            let tile = sub_slice.tiles.get_mut(&key).expect("bug: no tile");
+                            tile.sub_graphs.push((pic_coverage_rect, surface_info.clone()));
+                        }
+                    }
+
+                    // For backdrop-filter, we need to check if any of the dirty rects
+                    // in tiles that are affected by the filter primitive are dirty.
+                    self.deferred_dirty_tests.push(DeferredDirtyTest {
+                        tile_rect: TileRect::new(p0, p1),
+                        prim_rect: pic_coverage_rect,
+                    });
+                }
+            }
+            PrimitiveInstanceKind::LineDecoration { .. } |
+            PrimitiveInstanceKind::NormalBorder { .. } |
+            PrimitiveInstanceKind::TextRun { .. } => {
+                // These don't contribute dependencies
+            }
+        };
+        
+        // Calculate the screen rect in local space. When we calculate backdrops, we
+        // care only that they cover the visible rect, and don't have any overlapping
+        // prims in the visible rect. 
+        let visible_local_rect = self.local_rect.intersection(&self.screen_rect_in_pic_space).unwrap_or_default();
+        if pic_coverage_rect.intersects(&visible_local_rect) {
+            self.found_prims_after_backdrop = true;
+        }
+
+        // If this primitive considers itself a backdrop candidate, apply further
+        // checks to see if it matches all conditions to be a backdrop.
+        let mut vis_flags = PrimitiveVisibilityFlags::empty();
+        let sub_slice = &mut self.sub_slices[sub_slice_index];
+        if let Some(mut backdrop_candidate) = backdrop_candidate {
+            // Update whether the surface that this primitive exists on
+            // can be considered opaque. Any backdrop kind other than
+            // a clear primitive (e.g. color, gradient, image) can be
+            // considered.
+            match backdrop_candidate.kind {
+                Some(BackdropKind::Color { .. }) | None => {
+                    let surface = &mut surfaces[prim_surface_index.0];
+
+                    let is_same_coord_system = frame_context.spatial_tree.is_matching_coord_system(
+                        prim_spatial_node_index,
+                        surface.surface_spatial_node_index,
+                    );
+
+                    // To be an opaque backdrop, it must:
+                    // - Be the same coordinate system (axis-aligned)
+                    // - Have no clip mask
+                    // - Have a rect that covers the surface local rect
+                    if is_same_coord_system &&
+                       !prim_clip_chain.needs_mask &&
+                       prim_clip_chain.pic_coverage_rect.contains_box(&surface.unclipped_local_rect)
+                    {
+                        // Note that we use `prim_clip_chain.pic_clip_rect` here rather
+                        // than `backdrop_candidate.opaque_rect`. The former is in the
+                        // local space of the surface, the latter is in the local space
+                        // of the top level tile-cache.
+                        surface.is_opaque = true;
+                    }
+                }
+                Some(BackdropKind::Clear) => {}
+            }
+
+            let is_suitable_backdrop = match backdrop_candidate.kind {
+                Some(BackdropKind::Clear) => {
+                    // Clear prims are special - they always end up in their own slice,
+                    // and always set the backdrop. In future, we hope to completely
+                    // remove clear prims, since they don't integrate with the compositing
+                    // system cleanly.
+                    true
+                }
+                Some(BackdropKind::Color { .. }) | None => {
+                    // Check a number of conditions to see if we can consider this
+                    // primitive as an opaque backdrop rect. Several of these are conservative
+                    // checks and could be relaxed in future. However, these checks
+                    // are quick and capture the common cases of background rects and images.
+                    // Specifically, we currently require:
+                    //  - The primitive is on the main picture cache surface.
+                    //  - Same coord system as picture cache (ensures rects are axis-aligned).
+                    //  - No clip masks exist.
+                    let same_coord_system = frame_context.spatial_tree.is_matching_coord_system(
+                        prim_spatial_node_index,
+                        self.spatial_node_index,
+                    );
+
+                    same_coord_system && on_picture_surface
+                }
+            };
+
+            if sub_slice_index == 0 &&
+               is_suitable_backdrop &&
+               sub_slice.compositor_surfaces.is_empty() {
+
+                // If the backdrop candidate has a clip-mask, try to extract an opaque inner
+                // rect that is safe to use for subpixel rendering
+                if prim_clip_chain.needs_mask {
+                    backdrop_candidate.opaque_rect = clip_store
+                        .get_inner_rect_for_clip_chain(
+                            prim_clip_chain,
+                            &data_stores.clip,
+                            frame_context.spatial_tree,
+                        )
+                        .unwrap_or(PictureRect::zero());
+                }
+                
+                // We set the backdrop opaque_rect here, indicating the coverage area, which
+                // is useful for calculate_subpixel_mode. We will only set the backdrop kind
+                // if it covers the visible rect.
+                if backdrop_candidate.opaque_rect.contains_box(&self.backdrop.opaque_rect) {
+                    self.backdrop.opaque_rect = backdrop_candidate.opaque_rect;
+                }
+
+                if let Some(kind) = backdrop_candidate.kind {
+                    if backdrop_candidate.opaque_rect.contains_box(&visible_local_rect) {
+                        self.found_prims_after_backdrop = false;
+                        self.backdrop.kind = Some(kind);
+                        self.backdrop.backdrop_rect = backdrop_candidate.opaque_rect;
+                        
+                        // If we have a color backdrop that spans the entire local rect, mark
+                        // the visibility flags of the primitive so it is skipped during batching
+                        // (and also clears any previous primitives). Additionally, update our
+                        // background color to match the backdrop color, which will ensure that
+                        // our tiles are cleared to this color.
+                        if let BackdropKind::Color { color } = kind {
+                            if backdrop_candidate.opaque_rect.contains_box(&self.local_rect) {
+                                vis_flags |= PrimitiveVisibilityFlags::IS_BACKDROP;
+                                self.backdrop.spanning_opaque_color = Some(color);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // Record any new spatial nodes in the used list.
+        for spatial_node_index in &prim_info.spatial_nodes {
+            self.spatial_node_comparer.register_used_transform(
+                *spatial_node_index,
+                self.frame_id,
+                frame_context.spatial_tree,
+            );
+        }
+
+        // Truncate the lengths of dependency arrays to the max size we can handle.
+        // Any arrays this size or longer will invalidate every frame.
+        prim_info.clips.truncate(MAX_PRIM_SUB_DEPS);
+        prim_info.opacity_bindings.truncate(MAX_PRIM_SUB_DEPS);
+        prim_info.spatial_nodes.truncate(MAX_PRIM_SUB_DEPS);
+        prim_info.images.truncate(MAX_PRIM_SUB_DEPS);
+
+        // Normalize the tile coordinates before adding to tile dependencies.
+        // For each affected tile, mark any of the primitive dependencies.
+        for y in p0.y .. p1.y {
+            for x in p0.x .. p1.x {
+                // TODO(gw): Convert to 2d array temporarily to avoid hash lookups per-tile?
+                let key = TileOffset::new(x, y);
+                let tile = sub_slice.tiles.get_mut(&key).expect("bug: no tile");
+
+                tile.add_prim_dependency(&prim_info);
+            }
+        }
+
+        prim_instance.vis.state = VisibilityState::Visible {
+            vis_flags,
+            tile_rect: TileRect::new(p0, p1),
+            sub_slice_index: SubSliceIndex::new(sub_slice_index),
+        };
+    }
+
+    /// Print debug information about this picture cache to a tree printer.
+    fn print(&self) {
+        // TODO(gw): This initial implementation is very basic - just printing
+        //           the picture cache state to stdout. In future, we can
+        //           make this dump each frame to a file, and produce a report
+        //           stating which frames had invalidations. This will allow
+        //           diff'ing the invalidation states in a visual tool.
+        let mut pt = PrintTree::new("Picture Cache");
+
+        pt.new_level(format!("Slice {:?}", self.slice));
+
+        pt.add_item(format!("background_color: {:?}", self.background_color));
+
+        for (sub_slice_index, sub_slice) in self.sub_slices.iter().enumerate() {
+            pt.new_level(format!("SubSlice {:?}", sub_slice_index));
+
+            for y in self.tile_bounds_p0.y .. self.tile_bounds_p1.y {
+                for x in self.tile_bounds_p0.x .. self.tile_bounds_p1.x {
+                    let key = TileOffset::new(x, y);
+                    let tile = &sub_slice.tiles[&key];
+                    tile.print(&mut pt);
+                }
+            }
+
+            pt.end_level();
+        }
+
+        pt.end_level();
+    }
+
+    fn calculate_subpixel_mode(&self) -> SubpixelMode {
+        let has_opaque_bg_color = self.background_color.map_or(false, |c| c.a >= 1.0);
+
+        // If the overall tile cache is known opaque, subpixel AA is allowed everywhere
+        if has_opaque_bg_color {
+            return SubpixelMode::Allow;
+        }
+
+        // If we didn't find any valid opaque backdrop, no subpixel AA allowed
+        if self.backdrop.opaque_rect.is_empty() {
+            return SubpixelMode::Deny;
+        }
+
+        // If the opaque backdrop rect covers the entire tile cache surface,
+        // we can allow subpixel AA anywhere, skipping the per-text-run tests
+        // later on during primitive preparation.
+        if self.backdrop.opaque_rect.contains_box(&self.local_rect) {
+            return SubpixelMode::Allow;
+        }
+
+        // If none of the simple cases above match, we need test where we can support subpixel AA.
+        // TODO(gw): In future, it may make sense to have > 1 inclusion rect,
+        //           but this handles the common cases.
+        // TODO(gw): If a text run gets animated such that it's moving in a way that is
+        //           sometimes intersecting with the video rect, this can result in subpixel
+        //           AA flicking on/off for that text run. It's probably very rare, but
+        //           something we should handle in future.
+        SubpixelMode::Conditional {
+            allowed_rect: self.backdrop.opaque_rect,
+        }
+    }
+
+    /// Apply any updates after prim dependency updates. This applies
+    /// any late tile invalidations, and sets up the dirty rect and
+    /// set of tile blits.
+    pub fn post_update(
+        &mut self,
+        frame_context: &FrameVisibilityContext,
+        frame_state: &mut FrameVisibilityState,
+    ) {
+        assert!(self.current_surface_traversal_depth == 0);
+
+        self.dirty_region.reset(self.spatial_node_index);
+        self.subpixel_mode = self.calculate_subpixel_mode();
+
+        self.transform_index = frame_state.composite_state.register_transform(
+            self.local_to_surface,
+            // TODO(gw): Once we support scaling of picture cache tiles during compositing,
+            //           that transform gets plugged in here!
+            self.surface_to_device,
+        );
+
+        let map_pic_to_world = SpaceMapper::new_with_target(
+            frame_context.root_spatial_node_index,
+            self.spatial_node_index,
+            frame_context.global_screen_world_rect,
+            frame_context.spatial_tree,
+        );
+
+        // A simple GC of the native external surface cache, to remove and free any
+        // surfaces that were not referenced during the update_prim_dependencies pass.
+        self.external_native_surface_cache.retain(|_, surface| {
+            if !surface.used_this_frame {
+                // If we removed an external surface, we need to mark the dirty rects as
+                // invalid so a full composite occurs on the next frame.
+                frame_state.composite_state.dirty_rects_are_valid = false;
+
+                frame_state.resource_cache.destroy_compositor_surface(surface.native_surface_id);
+            }
+
+            surface.used_this_frame
+        });
+
+        let pic_to_world_mapper = SpaceMapper::new_with_target(
+            frame_context.root_spatial_node_index,
+            self.spatial_node_index,
+            frame_context.global_screen_world_rect,
+            frame_context.spatial_tree,
+        );
+
+        let ctx = TileUpdateDirtyContext {
+            pic_to_world_mapper,
+            global_device_pixel_scale: frame_context.global_device_pixel_scale,
+            opacity_bindings: &self.opacity_bindings,
+            color_bindings: &self.color_bindings,
+            local_rect: self.local_rect,
+            invalidate_all: self.invalidate_all_tiles,
+        };
+
+        let mut state = TileUpdateDirtyState {
+            resource_cache: frame_state.resource_cache,
+            composite_state: frame_state.composite_state,
+            compare_cache: &mut self.compare_cache,
+            spatial_node_comparer: &mut self.spatial_node_comparer,
+        };
+
+        // Step through each tile and invalidate if the dependencies have changed. Determine
+        // the current opacity setting and whether it's changed.
+        for sub_slice in &mut self.sub_slices {
+            for tile in sub_slice.tiles.values_mut() {
+                tile.update_dirty_and_valid_rects(&ctx, &mut state, frame_context);
+            }
+        }
+
+        // Process any deferred dirty checks
+        for sub_slice in &mut self.sub_slices {
+            for dirty_test in self.deferred_dirty_tests.drain(..) {
+                // Calculate the total dirty rect from all tiles that this primitive affects
+                let mut total_dirty_rect = PictureRect::zero();
+
+                for y in dirty_test.tile_rect.min.y .. dirty_test.tile_rect.max.y {
+                    for x in dirty_test.tile_rect.min.x .. dirty_test.tile_rect.max.x {
+                        let key = TileOffset::new(x, y);
+                        let tile = sub_slice.tiles.get_mut(&key).expect("bug: no tile");
+                        total_dirty_rect = total_dirty_rect.union(&tile.local_dirty_rect);
+                    }
+                }
+
+                // If that dirty rect intersects with the local rect of the primitive
+                // being checked, invalidate that region in all of the affected tiles.
+                // TODO(gw): This is somewhat conservative, we could be more clever
+                //           here and avoid invalidating every tile when this changes.
+                //           We could also store the dirty rect only when the prim
+                //           is encountered, so that we don't invalidate if something
+                //           *after* the query in the rendering order affects invalidation.
+                if total_dirty_rect.intersects(&dirty_test.prim_rect) {
+                    for y in dirty_test.tile_rect.min.y .. dirty_test.tile_rect.max.y {
+                        for x in dirty_test.tile_rect.min.x .. dirty_test.tile_rect.max.x {
+                            let key = TileOffset::new(x, y);
+                            let tile = sub_slice.tiles.get_mut(&key).expect("bug: no tile");
+                            tile.invalidate(
+                                Some(dirty_test.prim_rect),
+                                InvalidationReason::SurfaceContentChanged,
+                            );
+                        }
+                    }
+                }
+            }
+        }
+
+        let mut ctx = TilePostUpdateContext {
+            local_clip_rect: self.local_clip_rect,
+            backdrop: None,
+            current_tile_size: self.current_tile_size,
+            z_id: ZBufferId::invalid(),
+        };
+
+        let mut state = TilePostUpdateState {
+            resource_cache: frame_state.resource_cache,
+            composite_state: frame_state.composite_state,
+        };
+
+        for (i, sub_slice) in self.sub_slices.iter_mut().enumerate().rev() {
+            // The backdrop is only relevant for the first sub-slice
+            if i == 0 {
+                ctx.backdrop = Some(self.backdrop);
+            }
+
+            for compositor_surface in sub_slice.compositor_surfaces.iter_mut().rev() {
+                compositor_surface.descriptor.z_id = state.composite_state.z_generator.next();
+            }
+
+            ctx.z_id = state.composite_state.z_generator.next();
+
+            for tile in sub_slice.tiles.values_mut() {
+                tile.post_update(&ctx, &mut state, frame_context);
+            }
+        }
+
+        // Register any opaque external compositor surfaces as potential occluders. This
+        // is especially useful when viewing video in full-screen mode, as it is
+        // able to occlude every background tile (avoiding allocation, rasterizion
+        // and compositing).
+
+        for sub_slice in &self.sub_slices {
+            for compositor_surface in &sub_slice.compositor_surfaces {
+                if compositor_surface.is_opaque {
+                    let local_surface_rect = compositor_surface
+                        .descriptor
+                        .local_rect
+                        .intersection(&compositor_surface.descriptor.local_clip_rect)
+                        .and_then(|r| {
+                            r.intersection(&self.local_clip_rect)
+                        });
+
+                    if let Some(local_surface_rect) = local_surface_rect {
+                        let world_surface_rect = map_pic_to_world
+                            .map(&local_surface_rect)
+                            .expect("bug: unable to map external surface to world space");
+
+                        frame_state.composite_state.register_occluder(
+                            compositor_surface.descriptor.z_id,
+                            world_surface_rect,
+                        );
+                    }
+                }
+            }
+        }
+
+        // Register the opaque region of this tile cache as an occluder, which
+        // is used later in the frame to occlude other tiles.
+        if !self.backdrop.opaque_rect.is_empty() {
+            let z_id_backdrop = frame_state.composite_state.z_generator.next();
+
+            let backdrop_rect = self.backdrop.opaque_rect
+                .intersection(&self.local_rect)
+                .and_then(|r| {
+                    r.intersection(&self.local_clip_rect)
+                });
+
+            if let Some(backdrop_rect) = backdrop_rect {
+                let world_backdrop_rect = map_pic_to_world
+                    .map(&backdrop_rect)
+                    .expect("bug: unable to map backdrop to world space");
+
+                // Since we register the entire backdrop rect, use the opaque z-id for the
+                // picture cache slice.
+                frame_state.composite_state.register_occluder(
+                    z_id_backdrop,
+                    world_backdrop_rect,
+                );
+            }
+        }
+    }
+}
+
+pub struct PictureScratchBuffer {
+    surface_stack: Vec<SurfaceIndex>,
+}
+
+impl Default for PictureScratchBuffer {
+    fn default() -> Self {
+        PictureScratchBuffer {
+            surface_stack: Vec::new(),
+        }
+    }
+}
+
+impl PictureScratchBuffer {
+    pub fn begin_frame(&mut self) {
+        self.surface_stack.clear();
+    }
+
+    pub fn recycle(&mut self, recycler: &mut Recycler) {
+        recycler.recycle_vec(&mut self.surface_stack);
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SurfaceIndex(pub usize);
+
+/// Information about an offscreen surface. For now,
+/// it contains information about the size and coordinate
+/// system of the surface. In the future, it will contain
+/// information about the contents of the surface, which
+/// will allow surfaces to be cached / retained between
+/// frames and display lists.
+pub struct SurfaceInfo {
+    /// A local rect defining the size of this surface, in the
+    /// coordinate system of the surface itself. This contains
+    /// the unclipped bounding rect of child primitives.
+    pub unclipped_local_rect: PictureRect,
+    /// The local space coverage of child primitives after they are
+    /// are clipped to their owning clip-chain.
+    pub clipped_local_rect: PictureRect,
+    /// If true, we know this surface is completely opaque
+    pub is_opaque: bool,
+    /// The (conservative) valid part of this surface rect. Used
+    /// to reduce the size of render target allocation.
+    pub clipping_rect: PictureRect,
+    /// Helper structs for mapping local rects in different
+    /// coordinate systems into the surface coordinates.
+    pub map_local_to_surface: SpaceMapper<LayoutPixel, PicturePixel>,
+    /// Defines the positioning node for the surface itself,
+    /// and the rasterization root for this surface.
+    pub raster_spatial_node_index: SpatialNodeIndex,
+    pub surface_spatial_node_index: SpatialNodeIndex,
+    /// The device pixel ratio specific to this surface.
+    pub device_pixel_scale: DevicePixelScale,
+    /// The scale factors of the surface to world transform.
+    pub world_scale_factors: (f32, f32),
+    /// Local scale factors surface to raster transform
+    pub local_scale: (f32, f32),
+    /// If true, allow snapping on this and child surfaces
+    pub allow_snapping: bool,
+}
+
+impl SurfaceInfo {
+    pub fn new(
+        surface_spatial_node_index: SpatialNodeIndex,
+        raster_spatial_node_index: SpatialNodeIndex,
+        world_rect: WorldRect,
+        spatial_tree: &SpatialTree,
+        device_pixel_scale: DevicePixelScale,
+        world_scale_factors: (f32, f32),
+        local_scale: (f32, f32),
+        allow_snapping: bool,
+    ) -> Self {
+        let map_surface_to_world = SpaceMapper::new_with_target(
+            spatial_tree.root_reference_frame_index(),
+            surface_spatial_node_index,
+            world_rect,
+            spatial_tree,
+        );
+
+        let pic_bounds = map_surface_to_world
+            .unmap(&map_surface_to_world.bounds)
+            .unwrap_or_else(PictureRect::max_rect);
+
+        let map_local_to_surface = SpaceMapper::new(
+            surface_spatial_node_index,
+            pic_bounds,
+        );
+
+        SurfaceInfo {
+            unclipped_local_rect: PictureRect::zero(),
+            clipped_local_rect: PictureRect::zero(),
+            is_opaque: false,
+            clipping_rect: PictureRect::zero(),
+            map_local_to_surface,
+            raster_spatial_node_index,
+            surface_spatial_node_index,
+            device_pixel_scale,
+            world_scale_factors,
+            local_scale,
+            allow_snapping,
+        }
+    }
+
+    /// Clamps the blur radius depending on scale factors.
+    pub fn clamp_blur_radius(
+        &self,
+        x_blur_radius: f32,
+        y_blur_radius: f32,
+    ) -> (f32, f32) {
+        // Clamping must occur after scale factors are applied, but scale factors are not applied
+        // until later on. To clamp the blur radius, we first apply the scale factors and then clamp
+        // and finally revert the scale factors.
+
+        let sx_blur_radius = x_blur_radius * self.local_scale.0;
+        let sy_blur_radius = y_blur_radius * self.local_scale.1;
+
+        let largest_scaled_blur_radius = f32::max(
+            sx_blur_radius * self.world_scale_factors.0,
+            sy_blur_radius * self.world_scale_factors.1,
+        );
+
+        if largest_scaled_blur_radius > MAX_BLUR_RADIUS {
+            let sf = MAX_BLUR_RADIUS / largest_scaled_blur_radius;
+            (x_blur_radius * sf, y_blur_radius * sf)
+        } else {
+            // Return the original blur radius to avoid any rounding errors
+            (x_blur_radius, y_blur_radius)
+        }
+    }
+
+    pub fn map_to_device_rect(
+        &self,
+        local_rect: &PictureRect,
+        spatial_tree: &SpatialTree,
+    ) -> DeviceRect {
+        let raster_rect = if self.raster_spatial_node_index != self.surface_spatial_node_index {
+            assert_eq!(self.device_pixel_scale.0, 1.0);
+
+            let local_to_world = SpaceMapper::new_with_target(
+                spatial_tree.root_reference_frame_index(),
+                self.surface_spatial_node_index,
+                WorldRect::max_rect(),
+                spatial_tree,
+            );
+
+            local_to_world.map(&local_rect).unwrap()
+        } else {
+            local_rect.cast_unit()
+        };
+
+        raster_rect * self.device_pixel_scale
+    }
+
+    /// Clip and transform a local rect to a device rect suitable for allocating
+    /// a child off-screen surface of this surface (e.g. for clip-masks)
+    pub fn get_surface_rect(
+        &self,
+        local_rect: &PictureRect,
+        spatial_tree: &SpatialTree,
+    ) -> Option<DeviceRect> {
+        let local_rect = match local_rect.intersection(&self.clipping_rect) {
+            Some(rect) => rect,
+            None => return None,
+        };
+
+        let raster_rect = if self.raster_spatial_node_index != self.surface_spatial_node_index {
+            assert_eq!(self.device_pixel_scale.0, 1.0);
+
+            let local_to_world = SpaceMapper::new_with_target(
+                spatial_tree.root_reference_frame_index(),
+                self.surface_spatial_node_index,
+                WorldRect::max_rect(),
+                spatial_tree,
+            );
+
+            local_to_world.map(&local_rect).unwrap()
+        } else {
+            local_rect.cast_unit()
+        };
+
+        Some((raster_rect * self.device_pixel_scale).round_out())
+    }
+}
+
+/// Information from `get_surface_rects` about the allocated size, UV sampling
+/// parameters etc for an off-screen surface
+struct SurfaceAllocInfo {
+    task_size: DeviceIntSize,
+    needs_scissor_rect: bool,
+    clipped: DeviceRect,
+    unclipped: DeviceRect,
+    clipped_local: PictureRect,
+    uv_rect_kind: UvRectKind,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct RasterConfig {
+    /// How this picture should be composited into
+    /// the parent surface.
+    // TODO(gw): We should remove this and just use what is in PicturePrimitive
+    pub composite_mode: PictureCompositeMode,
+    /// Index to the surface descriptor for this
+    /// picture.
+    pub surface_index: SurfaceIndex,
+}
+
+bitflags! {
+    /// A set of flags describing why a picture may need a backing surface.
+    #[cfg_attr(feature = "capture", derive(Serialize))]
+    pub struct BlitReason: u32 {
+        /// Mix-blend-mode on a child that requires isolation.
+        const ISOLATE = 1;
+        /// Clip node that _might_ require a surface.
+        const CLIP = 2;
+        /// Preserve-3D requires a surface for plane-splitting.
+        const PRESERVE3D = 4;
+        /// A backdrop that is reused which requires a surface.
+        const BACKDROP = 8;
+    }
+}
+
+/// Specifies how this Picture should be composited
+/// onto the target it belongs to.
+#[allow(dead_code)]
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub enum PictureCompositeMode {
+    /// Apply CSS mix-blend-mode effect.
+    MixBlend(MixBlendMode),
+    /// Apply a CSS filter (except component transfer).
+    Filter(Filter),
+    /// Apply a component transfer filter.
+    ComponentTransferFilter(FilterDataHandle),
+    /// Draw to intermediate surface, copy straight across. This
+    /// is used for CSS isolation, and plane splitting.
+    Blit(BlitReason),
+    /// Used to cache a picture as a series of tiles.
+    TileCache {
+        slice_id: SliceId,
+    },
+    /// Apply an SVG filter
+    SvgFilter(Vec<FilterPrimitive>, Vec<SFilterData>),
+    /// A surface that is used as an input to another primitive
+    IntermediateSurface,
+}
+
+impl PictureCompositeMode {
+    pub fn get_rect(
+        &self,
+        surface: &SurfaceInfo,
+        sub_rect: Option<LayoutRect>,
+    ) -> LayoutRect {
+        let surface_rect = match sub_rect {
+            Some(sub_rect) => sub_rect,
+            None => surface.clipped_local_rect.cast_unit(),
+        };
+
+        match self {
+            PictureCompositeMode::Filter(Filter::Blur { width, height, should_inflate }) => {
+                if *should_inflate {
+                    let (width_factor, height_factor) = surface.clamp_blur_radius(*width, *height);
+
+                    surface_rect.inflate(
+                        width_factor.ceil() * BLUR_SAMPLE_SCALE,
+                        height_factor.ceil() * BLUR_SAMPLE_SCALE,
+                    )
+                } else {
+                    surface_rect
+                }
+            }
+            PictureCompositeMode::Filter(Filter::DropShadows(ref shadows)) => {
+                let mut max_blur_radius = 0.0;
+                for shadow in shadows {
+                    max_blur_radius = f32::max(max_blur_radius, shadow.blur_radius);
+                }
+
+                let (max_blur_radius_x, max_blur_radius_y) = surface.clamp_blur_radius(
+                    max_blur_radius,
+                    max_blur_radius,
+                );
+                let blur_inflation_x = max_blur_radius_x * BLUR_SAMPLE_SCALE;
+                let blur_inflation_y = max_blur_radius_y * BLUR_SAMPLE_SCALE;
+
+                surface_rect.inflate(blur_inflation_x, blur_inflation_y)
+            }
+            PictureCompositeMode::SvgFilter(primitives, _) => {
+                let mut result_rect = surface_rect;
+                let mut output_rects = Vec::with_capacity(primitives.len());
+
+                for (cur_index, primitive) in primitives.iter().enumerate() {
+                    let output_rect = match primitive.kind {
+                        FilterPrimitiveKind::Blur(ref primitive) => {
+                            let input = primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect);
+                            let width_factor = primitive.width.round() * BLUR_SAMPLE_SCALE;
+                            let height_factor = primitive.height.round() * BLUR_SAMPLE_SCALE;
+                            input.inflate(width_factor, height_factor)
+                        }
+                        FilterPrimitiveKind::DropShadow(ref primitive) => {
+                            let inflation_factor = primitive.shadow.blur_radius.ceil() * BLUR_SAMPLE_SCALE;
+                            let input = primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect);
+                            let shadow_rect = input.inflate(inflation_factor, inflation_factor);
+                            input.union(&shadow_rect.translate(primitive.shadow.offset * Scale::new(1.0)))
+                        }
+                        FilterPrimitiveKind::Blend(ref primitive) => {
+                            primitive.input1.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect)
+                                .union(&primitive.input2.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect))
+                        }
+                        FilterPrimitiveKind::Composite(ref primitive) => {
+                            primitive.input1.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect)
+                                .union(&primitive.input2.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect))
+                        }
+                        FilterPrimitiveKind::Identity(ref primitive) =>
+                            primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect),
+                        FilterPrimitiveKind::Opacity(ref primitive) =>
+                            primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect),
+                        FilterPrimitiveKind::ColorMatrix(ref primitive) =>
+                            primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect),
+                        FilterPrimitiveKind::ComponentTransfer(ref primitive) =>
+                            primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect),
+                        FilterPrimitiveKind::Offset(ref primitive) => {
+                            let input_rect = primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect);
+                            input_rect.translate(primitive.offset * Scale::new(1.0))
+                        },
+
+                        FilterPrimitiveKind::Flood(..) => surface_rect,
+                    };
+                    output_rects.push(output_rect);
+                    result_rect = result_rect.union(&output_rect);
+                }
+                result_rect
+            }
+            _ => {
+                surface_rect
+            }
+        }
+    }
+
+    pub fn get_coverage(
+        &self,
+        surface: &SurfaceInfo,
+        sub_rect: Option<LayoutRect>,
+    ) -> LayoutRect {
+        let surface_rect = match sub_rect {
+            Some(sub_rect) => sub_rect,
+            None => surface.clipped_local_rect.cast_unit(),
+        };
+
+        match self {
+            PictureCompositeMode::Filter(Filter::Blur { width, height, should_inflate }) => {
+                if *should_inflate {
+                    let (width_factor, height_factor) = surface.clamp_blur_radius(*width, *height);
+
+                    surface_rect.inflate(
+                        width_factor.ceil() * BLUR_SAMPLE_SCALE,
+                        height_factor.ceil() * BLUR_SAMPLE_SCALE,
+                    )
+                } else {
+                    surface_rect
+                }
+            }
+            PictureCompositeMode::Filter(Filter::DropShadows(ref shadows)) => {
+                let mut rect = surface_rect;
+
+                for shadow in shadows {
+                    let (blur_radius_x, blur_radius_y) = surface.clamp_blur_radius(
+                        shadow.blur_radius,
+                        shadow.blur_radius,
+                    );
+                    let blur_inflation_x = blur_radius_x * BLUR_SAMPLE_SCALE;
+                    let blur_inflation_y = blur_radius_y * BLUR_SAMPLE_SCALE;
+
+                    let shadow_rect = surface_rect
+                        .translate(shadow.offset)
+                        .inflate(blur_inflation_x, blur_inflation_y);
+                    rect = rect.union(&shadow_rect);
+                }
+
+                rect
+            }
+            PictureCompositeMode::SvgFilter(primitives, _) => {
+                let mut result_rect = surface_rect;
+                let mut output_rects = Vec::with_capacity(primitives.len());
+
+                for (cur_index, primitive) in primitives.iter().enumerate() {
+                    let output_rect = match primitive.kind {
+                        FilterPrimitiveKind::Blur(ref primitive) => {
+                            let input = primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect);
+                            let width_factor = primitive.width.round() * BLUR_SAMPLE_SCALE;
+                            let height_factor = primitive.height.round() * BLUR_SAMPLE_SCALE;
+
+                            input.inflate(width_factor, height_factor)
+                        }
+                        FilterPrimitiveKind::DropShadow(ref primitive) => {
+                            let inflation_factor = primitive.shadow.blur_radius.ceil() * BLUR_SAMPLE_SCALE;
+                            let input = primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect);
+                            let shadow_rect = input.inflate(inflation_factor, inflation_factor);
+                            input.union(&shadow_rect.translate(primitive.shadow.offset * Scale::new(1.0)))
+                        }
+                        FilterPrimitiveKind::Blend(ref primitive) => {
+                            primitive.input1.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect)
+                                .union(&primitive.input2.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect))
+                        }
+                        FilterPrimitiveKind::Composite(ref primitive) => {
+                            primitive.input1.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect)
+                                .union(&primitive.input2.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect))
+                        }
+                        FilterPrimitiveKind::Identity(ref primitive) =>
+                            primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect),
+                        FilterPrimitiveKind::Opacity(ref primitive) =>
+                            primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect),
+                        FilterPrimitiveKind::ColorMatrix(ref primitive) =>
+                            primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect),
+                        FilterPrimitiveKind::ComponentTransfer(ref primitive) =>
+                            primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect),
+                        FilterPrimitiveKind::Offset(ref primitive) => {
+                            let input_rect = primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(surface_rect);
+                            input_rect.translate(primitive.offset * Scale::new(1.0))
+                        },
+
+                        FilterPrimitiveKind::Flood(..) => surface_rect,
+                    };
+                    output_rects.push(output_rect);
+                    result_rect = result_rect.union(&output_rect);
+                }
+                result_rect
+            }
+            _ => {
+                surface_rect
+            }
+        }
+    }
+}
+
+/// Enum value describing the place of a picture in a 3D context.
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub enum Picture3DContext<C> {
+    /// The picture is not a part of 3D context sub-hierarchy.
+    Out,
+    /// The picture is a part of 3D context.
+    In {
+        /// Additional data per child for the case of this a root of 3D hierarchy.
+        root_data: Option<Vec<C>>,
+        /// The spatial node index of an "ancestor" element, i.e. one
+        /// that establishes the transformed element's containing block.
+        ///
+        /// See CSS spec draft for more details:
+        /// https://drafts.csswg.org/css-transforms-2/#accumulated-3d-transformation-matrix-computation
+        ancestor_index: SpatialNodeIndex,
+        /// Index in the built scene's array of plane splitters.
+        plane_splitter_index: PlaneSplitterIndex,
+    },
+}
+
+/// Information about a preserve-3D hierarchy child that has been plane-split
+/// and ordered according to the view direction.
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct OrderedPictureChild {
+    pub anchor: PlaneSplitAnchor,
+    pub gpu_address: GpuCacheAddress,
+}
+
+bitflags! {
+    /// A set of flags describing why a picture may need a backing surface.
+    #[cfg_attr(feature = "capture", derive(Serialize))]
+    pub struct ClusterFlags: u32 {
+        /// Whether this cluster is visible when the position node is a backface.
+        const IS_BACKFACE_VISIBLE = 1;
+        /// This flag is set during the first pass picture traversal, depending on whether
+        /// the cluster is visible or not. It's read during the second pass when primitives
+        /// consult their owning clusters to see if the primitive itself is visible.
+        const IS_VISIBLE = 2;
+    }
+}
+
+/// Descriptor for a cluster of primitives. For now, this is quite basic but will be
+/// extended to handle more spatial clustering of primitives.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct PrimitiveCluster {
+    /// The positioning node for this cluster.
+    pub spatial_node_index: SpatialNodeIndex,
+    /// The bounding rect of the cluster, in the local space of the spatial node.
+    /// This is used to quickly determine the overall bounding rect for a picture
+    /// during the first picture traversal, which is needed for local scale
+    /// determination, and render task size calculations.
+    bounding_rect: LayoutRect,
+    /// a part of the cluster that we know to be opaque if any. Does not always
+    /// describe the entire opaque region, but all content within that rect must
+    /// be opaque.
+    pub opaque_rect: LayoutRect,
+    /// The range of primitive instance indices associated with this cluster.
+    pub prim_range: Range<usize>,
+    /// Various flags / state for this cluster.
+    pub flags: ClusterFlags,
+}
+
+impl PrimitiveCluster {
+    /// Construct a new primitive cluster for a given positioning node.
+    fn new(
+        spatial_node_index: SpatialNodeIndex,
+        flags: ClusterFlags,
+        first_instance_index: usize,
+    ) -> Self {
+        PrimitiveCluster {
+            bounding_rect: LayoutRect::zero(),
+            opaque_rect: LayoutRect::zero(),
+            spatial_node_index,
+            flags,
+            prim_range: first_instance_index..first_instance_index
+        }
+    }
+
+    /// Return true if this cluster is compatible with the given params
+    pub fn is_compatible(
+        &self,
+        spatial_node_index: SpatialNodeIndex,
+        flags: ClusterFlags,
+        instance_index: usize,
+    ) -> bool {
+        self.flags == flags &&
+        self.spatial_node_index == spatial_node_index &&
+        instance_index == self.prim_range.end
+    }
+
+    pub fn prim_range(&self) -> Range<usize> {
+        self.prim_range.clone()
+    }
+
+    /// Add a primitive instance to this cluster, at the start or end
+    fn add_instance(
+        &mut self,
+        culling_rect: &LayoutRect,
+        instance_index: usize,
+    ) {
+        debug_assert_eq!(instance_index, self.prim_range.end);
+        self.bounding_rect = self.bounding_rect.union(culling_rect);
+        self.prim_range.end += 1;
+    }
+}
+
+/// A list of primitive instances that are added to a picture
+/// This ensures we can keep a list of primitives that
+/// are pictures, for a fast initial traversal of the picture
+/// tree without walking the instance list.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct PrimitiveList {
+    /// List of primitives grouped into clusters.
+    pub clusters: Vec<PrimitiveCluster>,
+    pub child_pictures: Vec<PictureIndex>,
+    /// The number of preferred compositor surfaces that were found when
+    /// adding prims to this list.
+    pub compositor_surface_count: usize,
+}
+
+impl PrimitiveList {
+    /// Construct an empty primitive list. This is
+    /// just used during the take_context / restore_context
+    /// borrow check dance, which will be removed as the
+    /// picture traversal pass is completed.
+    pub fn empty() -> Self {
+        PrimitiveList {
+            clusters: Vec::new(),
+            child_pictures: Vec::new(),
+            compositor_surface_count: 0,
+        }
+    }
+
+    pub fn merge(&mut self, other: PrimitiveList) {
+        self.clusters.extend(other.clusters);
+        self.child_pictures.extend(other.child_pictures);
+        self.compositor_surface_count += other.compositor_surface_count;
+    }
+
+    /// Add a primitive instance to the end of the list
+    pub fn add_prim(
+        &mut self,
+        prim_instance: PrimitiveInstance,
+        prim_rect: LayoutRect,
+        spatial_node_index: SpatialNodeIndex,
+        prim_flags: PrimitiveFlags,
+        prim_instances: &mut Vec<PrimitiveInstance>,
+        clip_tree_builder: &ClipTreeBuilder,
+    ) {
+        let mut flags = ClusterFlags::empty();
+
+        // Pictures are always put into a new cluster, to make it faster to
+        // iterate all pictures in a given primitive list.
+        match prim_instance.kind {
+            PrimitiveInstanceKind::Picture { pic_index, .. } => {
+                self.child_pictures.push(pic_index);
+            }
+            _ => {}
+        }
+
+        if prim_flags.contains(PrimitiveFlags::IS_BACKFACE_VISIBLE) {
+            flags.insert(ClusterFlags::IS_BACKFACE_VISIBLE);
+        }
+
+        if prim_flags.contains(PrimitiveFlags::PREFER_COMPOSITOR_SURFACE) {
+            self.compositor_surface_count += 1;
+        }
+
+        let clip_leaf = clip_tree_builder.get_leaf(prim_instance.clip_leaf_id);
+        let culling_rect = clip_leaf.local_clip_rect
+            .intersection(&prim_rect)
+            .unwrap_or_else(LayoutRect::zero);
+
+        let instance_index = prim_instances.len();
+        prim_instances.push(prim_instance);
+
+        if let Some(cluster) = self.clusters.last_mut() {
+            if cluster.is_compatible(spatial_node_index, flags, instance_index) {
+                cluster.add_instance(&culling_rect, instance_index);
+                return;
+            }
+        }
+
+        // Same idea with clusters, using a different distribution.
+        let clusters_len = self.clusters.len();
+        if clusters_len == self.clusters.capacity() {
+            let next_alloc = match clusters_len {
+                1 ..= 15 => 16 - clusters_len,
+                16 ..= 127 => 128 - clusters_len,
+                _ => clusters_len * 2,
+            };
+
+            self.clusters.reserve(next_alloc);
+        }
+
+        let mut cluster = PrimitiveCluster::new(
+            spatial_node_index,
+            flags,
+            instance_index,
+        );
+
+        cluster.add_instance(&culling_rect, instance_index);
+        self.clusters.push(cluster);
+    }
+
+    /// Returns true if there are no clusters (and thus primitives)
+    pub fn is_empty(&self) -> bool {
+        self.clusters.is_empty()
+    }
+}
+
+bitflags! {
+    #[cfg_attr(feature = "capture", derive(Serialize))]
+    /// Flags describing properties for a given PicturePrimitive
+    pub struct PictureFlags : u8 {
+        /// This picture is a resolve target (doesn't actually render content itself,
+        /// will have content copied in to it)
+        const IS_RESOLVE_TARGET = 1 << 0;
+        /// This picture establishes a sub-graph, which affects how SurfaceBuilder will
+        /// set up dependencies in the render task graph
+        const IS_SUB_GRAPH = 1 << 1;
+        /// If set, this picture should not apply snapping via changing the raster root
+        const DISABLE_SNAPPING = 1 << 2;
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct PicturePrimitive {
+    /// List of primitives, and associated info for this picture.
+    pub prim_list: PrimitiveList,
+
+    /// If false and transform ends up showing the back of the picture,
+    /// it will be considered invisible.
+    pub is_backface_visible: bool,
+
+    pub primary_render_task_id: Option<RenderTaskId>,
+    /// If a mix-blend-mode, contains the render task for
+    /// the readback of the framebuffer that we use to sample
+    /// from in the mix-blend-mode shader.
+    /// For drop-shadow filter, this will store the original
+    /// picture task which would be rendered on screen after
+    /// blur pass.
+    pub secondary_render_task_id: Option<RenderTaskId>,
+    /// How this picture should be composited.
+    /// If None, don't composite - just draw directly on parent surface.
+    pub composite_mode: Option<PictureCompositeMode>,
+
+    pub raster_config: Option<RasterConfig>,
+    pub context_3d: Picture3DContext<OrderedPictureChild>,
+
+    // Optional cache handles for storing extra data
+    // in the GPU cache, depending on the type of
+    // picture.
+    pub extra_gpu_data_handles: SmallVec<[GpuCacheHandle; 1]>,
+
+    /// The spatial node index of this picture when it is
+    /// composited into the parent picture.
+    pub spatial_node_index: SpatialNodeIndex,
+
+    /// Store the state of the previous local rect
+    /// for this picture. We need this in order to know when
+    /// to invalidate segments / drop-shadow gpu cache handles.
+    pub prev_local_rect: LayoutRect,
+
+    /// If false, this picture needs to (re)build segments
+    /// if it supports segment rendering. This can occur
+    /// if the local rect of the picture changes due to
+    /// transform animation and/or scrolling.
+    pub segments_are_valid: bool,
+
+    /// Set to true if we know for sure the picture is fully opaque.
+    pub is_opaque: bool,
+
+    /// Requested raster space for this picture
+    pub raster_space: RasterSpace,
+
+    /// Flags for this picture primitive
+    pub flags: PictureFlags,
+}
+
+impl PicturePrimitive {
+    pub fn print<T: PrintTreePrinter>(
+        &self,
+        pictures: &[Self],
+        self_index: PictureIndex,
+        pt: &mut T,
+    ) {
+        pt.new_level(format!("{:?}", self_index));
+        pt.add_item(format!("cluster_count: {:?}", self.prim_list.clusters.len()));
+        pt.add_item(format!("spatial_node_index: {:?}", self.spatial_node_index));
+        pt.add_item(format!("raster_config: {:?}", self.raster_config));
+        pt.add_item(format!("composite_mode: {:?}", self.composite_mode));
+        pt.add_item(format!("flags: {:?}", self.flags));
+
+        for child_pic_index in &self.prim_list.child_pictures {
+            pictures[child_pic_index.0].print(pictures, *child_pic_index, pt);
+        }
+
+        pt.end_level();
+    }
+
+    /// Returns true if this picture supports segmented rendering.
+    pub fn can_use_segments(&self) -> bool {
+        match self.raster_config {
+            // TODO(gw): Support brush segment rendering for filter and mix-blend
+            //           shaders. It's possible this already works, but I'm just
+            //           applying this optimization to Blit mode for now.
+            Some(RasterConfig { composite_mode: PictureCompositeMode::MixBlend(..), .. }) |
+            Some(RasterConfig { composite_mode: PictureCompositeMode::Filter(..), .. }) |
+            Some(RasterConfig { composite_mode: PictureCompositeMode::ComponentTransferFilter(..), .. }) |
+            Some(RasterConfig { composite_mode: PictureCompositeMode::TileCache { .. }, .. }) |
+            Some(RasterConfig { composite_mode: PictureCompositeMode::SvgFilter(..), .. }) |
+            Some(RasterConfig { composite_mode: PictureCompositeMode::IntermediateSurface, .. }) |
+            None => {
+                false
+            }
+            Some(RasterConfig { composite_mode: PictureCompositeMode::Blit(reason), ..}) => {
+                reason == BlitReason::CLIP
+            }
+        }
+    }
+
+    fn resolve_scene_properties(&mut self, properties: &SceneProperties) {
+        match self.composite_mode {
+            Some(PictureCompositeMode::Filter(ref mut filter)) => {
+                match *filter {
+                    Filter::Opacity(ref binding, ref mut value) => {
+                        *value = properties.resolve_float(binding);
+                    }
+                    _ => {}
+                }
+            }
+            _ => {}
+        }
+    }
+
+    pub fn is_visible(
+        &self,
+        spatial_tree: &SpatialTree,
+    ) -> bool {
+        if let Some(PictureCompositeMode::Filter(ref filter)) = self.composite_mode {
+            if !filter.is_visible() {
+                return false;
+            }
+        }
+
+        // For out-of-preserve-3d pictures, the backface visibility is determined by
+        // the local transform only.
+        // Note: we aren't taking the transform relative to the parent picture,
+        // since picture tree can be more dense than the corresponding spatial tree.
+        if !self.is_backface_visible {
+            if let Picture3DContext::Out = self.context_3d {
+                match spatial_tree.get_local_visible_face(self.spatial_node_index) {
+                    VisibleFace::Front => {}
+                    VisibleFace::Back => return false,
+                }
+            }
+        }
+
+        true
+    }
+
+    pub fn new_image(
+        composite_mode: Option<PictureCompositeMode>,
+        context_3d: Picture3DContext<OrderedPictureChild>,
+        prim_flags: PrimitiveFlags,
+        prim_list: PrimitiveList,
+        spatial_node_index: SpatialNodeIndex,
+        raster_space: RasterSpace,
+        flags: PictureFlags,
+    ) -> Self {
+        PicturePrimitive {
+            prim_list,
+            primary_render_task_id: None,
+            secondary_render_task_id: None,
+            composite_mode,
+            raster_config: None,
+            context_3d,
+            extra_gpu_data_handles: SmallVec::new(),
+            is_backface_visible: prim_flags.contains(PrimitiveFlags::IS_BACKFACE_VISIBLE),
+            spatial_node_index,
+            prev_local_rect: LayoutRect::zero(),
+            segments_are_valid: false,
+            is_opaque: false,
+            raster_space,
+            flags,
+        }
+    }
+
+    pub fn take_context(
+        &mut self,
+        pic_index: PictureIndex,
+        parent_surface_index: Option<SurfaceIndex>,
+        parent_subpixel_mode: SubpixelMode,
+        frame_state: &mut FrameBuildingState,
+        frame_context: &FrameBuildingContext,
+        scratch: &mut PrimitiveScratchBuffer,
+        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
+    ) -> Option<(PictureContext, PictureState, PrimitiveList)> {
+        self.primary_render_task_id = None;
+        self.secondary_render_task_id = None;
+
+        if !self.is_visible(frame_context.spatial_tree) {
+            return None;
+        }
+
+        profile_scope!("take_context");
+
+        let surface_index = match self.raster_config {
+            Some(ref raster_config) => raster_config.surface_index,
+            None => parent_surface_index.expect("bug: no parent"),
+        };
+        let surface_spatial_node_index = frame_state.surfaces[surface_index.0].surface_spatial_node_index;
+
+        let map_pic_to_world = SpaceMapper::new_with_target(
+            frame_context.root_spatial_node_index,
+            surface_spatial_node_index,
+            frame_context.global_screen_world_rect,
+            frame_context.spatial_tree,
+        );
+
+        let pic_bounds = map_pic_to_world
+            .unmap(&map_pic_to_world.bounds)
+            .unwrap_or_else(PictureRect::max_rect);
+
+        let map_local_to_pic = SpaceMapper::new(
+            surface_spatial_node_index,
+            pic_bounds,
+        );
+
+        match self.raster_config {
+            Some(RasterConfig { surface_index, composite_mode: PictureCompositeMode::TileCache { slice_id }, .. }) => {
+                let tile_cache = tile_caches.get_mut(&slice_id).unwrap();
+                let mut debug_info = SliceDebugInfo::new();
+                let mut surface_render_tasks = FastHashMap::default();
+                let mut surface_dirty_rects = Vec::new();
+                let mut surface_local_dirty_rect = PictureRect::zero();
+                let device_pixel_scale = frame_state
+                    .surfaces[surface_index.0]
+                    .device_pixel_scale;
+                let mut at_least_one_tile_visible = false;
+
+                // Get the overall world space rect of the picture cache. Used to clip
+                // the tile rects below for occlusion testing to the relevant area.
+                let world_clip_rect = map_pic_to_world
+                    .map(&tile_cache.local_clip_rect)
+                    .expect("bug: unable to map clip rect")
+                    .round();
+                let device_clip_rect = (world_clip_rect * frame_context.global_device_pixel_scale).round();
+
+                for (sub_slice_index, sub_slice) in tile_cache.sub_slices.iter_mut().enumerate() {
+                    for tile in sub_slice.tiles.values_mut() {
+                        if tile.is_visible {
+                            // Get the world space rect that this tile will actually occupy on screen
+                            let world_draw_rect = world_clip_rect.intersection(&tile.world_valid_rect);
+
+                            // If that draw rect is occluded by some set of tiles in front of it,
+                            // then mark it as not visible and skip drawing. When it's not occluded
+                            // it will fail this test, and get rasterized by the render task setup
+                            // code below.
+                            match world_draw_rect {
+                                Some(world_draw_rect) => {
+                                    // Only check for occlusion on visible tiles that are fixed position.
+                                    if tile_cache.spatial_node_index == frame_context.root_spatial_node_index &&
+                                       frame_state.composite_state.occluders.is_tile_occluded(tile.z_id, world_draw_rect) {
+                                        // If this tile has an allocated native surface, free it, since it's completely
+                                        // occluded. We will need to re-allocate this surface if it becomes visible,
+                                        // but that's likely to be rare (e.g. when there is no content display list
+                                        // for a frame or two during a tab switch).
+                                        let surface = tile.surface.as_mut().expect("no tile surface set!");
+
+                                        if let TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { id, .. }, .. } = surface {
+                                            if let Some(id) = id.take() {
+                                                frame_state.resource_cache.destroy_compositor_tile(id);
+                                            }
+                                        }
+
+                                        tile.is_visible = false;
+
+                                        if frame_context.fb_config.testing {
+                                            debug_info.tiles.insert(
+                                                tile.tile_offset,
+                                                TileDebugInfo::Occluded,
+                                            );
+                                        }
+
+                                        continue;
+                                    }
+                                }
+                                None => {
+                                    tile.is_visible = false;
+                                }
+                            }
+                        }
+
+                        // If we get here, we want to ensure that the surface remains valid in the texture
+                        // cache, _even if_ it's not visible due to clipping or being scrolled off-screen.
+                        // This ensures that we retain valid tiles that are off-screen, but still in the
+                        // display port of this tile cache instance.
+                        if let Some(TileSurface::Texture { descriptor, .. }) = tile.surface.as_ref() {
+                            if let SurfaceTextureDescriptor::TextureCache { handle: Some(handle), .. } = descriptor {
+                                frame_state.resource_cache
+                                    .picture_textures.request(handle, frame_state.gpu_cache);
+                            }
+                        }
+
+                        // If the tile has been found to be off-screen / clipped, skip any further processing.
+                        if !tile.is_visible {
+                            if frame_context.fb_config.testing {
+                                debug_info.tiles.insert(
+                                    tile.tile_offset,
+                                    TileDebugInfo::Culled,
+                                );
+                            }
+
+                            continue;
+                        }
+
+                        at_least_one_tile_visible = true;
+
+                        if frame_context.debug_flags.contains(DebugFlags::PICTURE_CACHING_DBG) {
+                            tile.root.draw_debug_rects(
+                                &map_pic_to_world,
+                                tile.is_opaque,
+                                tile.current_descriptor.local_valid_rect,
+                                scratch,
+                                frame_context.global_device_pixel_scale,
+                            );
+
+                            let label_offset = DeviceVector2D::new(
+                                20.0 + sub_slice_index as f32 * 20.0,
+                                30.0 + sub_slice_index as f32 * 20.0,
+                            );
+                            let tile_device_rect = tile.world_tile_rect * frame_context.global_device_pixel_scale;
+                            if tile_device_rect.height() >= label_offset.y {
+                                let surface = tile.surface.as_ref().expect("no tile surface set!");
+
+                                scratch.push_debug_string(
+                                    tile_device_rect.min + label_offset,
+                                    debug_colors::RED,
+                                    format!("{:?}: s={} is_opaque={} surface={} sub={}",
+                                            tile.id,
+                                            tile_cache.slice,
+                                            tile.is_opaque,
+                                            surface.kind(),
+                                            sub_slice_index,
+                                    ),
+                                );
+                            }
+                        }
+
+                        if let TileSurface::Texture { descriptor, .. } = tile.surface.as_mut().unwrap() {
+                            match descriptor {
+                                SurfaceTextureDescriptor::TextureCache { ref handle, .. } => {
+                                    let exists = handle.as_ref().map_or(false,
+                                        |handle| frame_state.resource_cache.picture_textures.entry_exists(handle)
+                                    );
+                                    // Invalidate if the backing texture was evicted.
+                                    if exists {
+                                        // Request the backing texture so it won't get evicted this frame.
+                                        // We specifically want to mark the tile texture as used, even
+                                        // if it's detected not visible below and skipped. This is because
+                                        // we maintain the set of tiles we care about based on visibility
+                                        // during pre_update. If a tile still exists after that, we are
+                                        // assuming that it's either visible or we want to retain it for
+                                        // a while in case it gets scrolled back onto screen soon.
+                                        // TODO(gw): Consider switching to manual eviction policy?
+                                        frame_state.resource_cache
+                                            .picture_textures
+                                            .request(handle.as_ref().unwrap(), frame_state.gpu_cache);
+                                    } else {
+                                        // If the texture was evicted on a previous frame, we need to assume
+                                        // that the entire tile rect is dirty.
+                                        tile.invalidate(None, InvalidationReason::NoTexture);
+                                    }
+                                }
+                                SurfaceTextureDescriptor::Native { id, .. } => {
+                                    if id.is_none() {
+                                        // There is no current surface allocation, so ensure the entire tile is invalidated
+                                        tile.invalidate(None, InvalidationReason::NoSurface);
+                                    }
+                                }
+                            }
+                        }
+
+                        // Ensure that the dirty rect doesn't extend outside the local valid rect.
+                        tile.local_dirty_rect = tile.local_dirty_rect
+                            .intersection(&tile.current_descriptor.local_valid_rect)
+                            .unwrap_or_else(PictureRect::zero);
+
+                        surface_local_dirty_rect = surface_local_dirty_rect.union(&tile.local_dirty_rect);
+
+                        // Update the world/device dirty rect
+                        let world_dirty_rect = map_pic_to_world.map(&tile.local_dirty_rect).expect("bug");
+
+                        let device_rect = (tile.world_tile_rect * frame_context.global_device_pixel_scale).round();
+                        tile.device_dirty_rect = (world_dirty_rect * frame_context.global_device_pixel_scale)
+                            .round_out()
+                            .intersection(&device_rect)
+                            .unwrap_or_else(DeviceRect::zero);
+
+                        if tile.is_valid {
+                            if frame_context.fb_config.testing {
+                                debug_info.tiles.insert(
+                                    tile.tile_offset,
+                                    TileDebugInfo::Valid,
+                                );
+                            }
+                        } else {
+                            // Add this dirty rect to the dirty region tracker. This must be done outside the if statement below,
+                            // so that we include in the dirty region tiles that are handled by a background color only (no
+                            // surface allocation).
+                            tile_cache.dirty_region.add_dirty_region(
+                                tile.local_dirty_rect,
+                                frame_context.spatial_tree,
+                            );
+
+                            // Ensure that this texture is allocated.
+                            if let TileSurface::Texture { ref mut descriptor } = tile.surface.as_mut().unwrap() {
+                                match descriptor {
+                                    SurfaceTextureDescriptor::TextureCache { ref mut handle } => {
+
+                                        frame_state.resource_cache.picture_textures.update(
+                                            tile_cache.current_tile_size,
+                                            handle,
+                                            frame_state.gpu_cache,
+                                            &mut frame_state.resource_cache.texture_cache.next_id,
+                                            &mut frame_state.resource_cache.texture_cache.pending_updates,
+                                        );
+                                    }
+                                    SurfaceTextureDescriptor::Native { id } => {
+                                        if id.is_none() {
+                                            // Allocate a native surface id if we're in native compositing mode,
+                                            // and we don't have a surface yet (due to first frame, or destruction
+                                            // due to tile size changing etc).
+                                            if sub_slice.native_surface.is_none() {
+                                                let opaque = frame_state
+                                                    .resource_cache
+                                                    .create_compositor_surface(
+                                                        tile_cache.virtual_offset,
+                                                        tile_cache.current_tile_size,
+                                                        true,
+                                                    );
+
+                                                let alpha = frame_state
+                                                    .resource_cache
+                                                    .create_compositor_surface(
+                                                        tile_cache.virtual_offset,
+                                                        tile_cache.current_tile_size,
+                                                        false,
+                                                    );
+
+                                                sub_slice.native_surface = Some(NativeSurface {
+                                                    opaque,
+                                                    alpha,
+                                                });
+                                            }
+
+                                            // Create the tile identifier and allocate it.
+                                            let surface_id = if tile.is_opaque {
+                                                sub_slice.native_surface.as_ref().unwrap().opaque
+                                            } else {
+                                                sub_slice.native_surface.as_ref().unwrap().alpha
+                                            };
+
+                                            let tile_id = NativeTileId {
+                                                surface_id,
+                                                x: tile.tile_offset.x,
+                                                y: tile.tile_offset.y,
+                                            };
+
+                                            frame_state.resource_cache.create_compositor_tile(tile_id);
+
+                                            *id = Some(tile_id);
+                                        }
+                                    }
+                                }
+
+                                // The cast_unit() here is because the `content_origin` is expected to be in
+                                // device pixels, however we're establishing raster roots for picture cache
+                                // tiles meaning the `content_origin` needs to be in the local space of that root.
+                                // TODO(gw): `content_origin` should actually be in RasterPixels to be consistent
+                                //           with both local / screen raster modes, but this involves a lot of
+                                //           changes to render task and picture code.
+                                let content_origin_f = tile.local_tile_rect.min.cast_unit() * device_pixel_scale;
+                                let content_origin = content_origin_f.round();
+                                // TODO: these asserts used to have a threshold of 0.01 but failed intermittently the
+                                // gfx/layers/apz/test/mochitest/test_group_double_tap_zoom-2.html test on android.
+                                // moving the rectangles in space mapping conversion code to the Box2D representaton
+                                // made the failure happen more often.
+                                debug_assert!((content_origin_f.x - content_origin.x).abs() < 0.15);
+                                debug_assert!((content_origin_f.y - content_origin.y).abs() < 0.15);
+
+                                let surface = descriptor.resolve(
+                                    frame_state.resource_cache,
+                                    tile_cache.current_tile_size,
+                                );
+
+                                let scissor_rect = frame_state.composite_state.get_surface_rect(
+                                    &tile.local_dirty_rect,
+                                    &tile.local_tile_rect,
+                                    tile_cache.transform_index,
+                                ).to_i32();
+
+                                let valid_rect = frame_state.composite_state.get_surface_rect(
+                                    &tile.current_descriptor.local_valid_rect,
+                                    &tile.local_tile_rect,
+                                    tile_cache.transform_index,
+                                ).to_i32();
+
+                                let composite_task_size = tile_cache.current_tile_size;
+
+                                let tile_key = TileKey {
+                                    sub_slice_index: SubSliceIndex::new(sub_slice_index),
+                                    tile_offset: tile.tile_offset,
+                                };
+
+                                let mut clear_color = ColorF::TRANSPARENT;
+
+                                if SubSliceIndex::new(sub_slice_index).is_primary() {
+                                    if let Some(background_color) = tile_cache.background_color {
+                                        clear_color = background_color;
+                                    }
+                                    
+                                    // If this picture cache has a spanning_opaque_color, we will use
+                                    // that as the clear color. The primitive that was detected as a
+                                    // spanning primitive will have been set with IS_BACKDROP, causing
+                                    // it to be skipped and removing everything added prior to it
+                                    // during batching.
+                                    if let Some(color) = tile_cache.backdrop.spanning_opaque_color {
+                                        clear_color = color;
+                                    }
+                                }
+
+                                let cmd_buffer_index = frame_state.cmd_buffers.create_cmd_buffer();
+
+                                // TODO(gw): As a performance optimization, we could skip the resolve picture
+                                //           if the dirty rect is the same as the resolve rect (probably quite
+                                //           common for effects that scroll underneath a backdrop-filter, for example).
+                                let use_tile_composite = !tile.sub_graphs.is_empty();
+
+                                if use_tile_composite {
+                                    let mut local_content_rect = tile.local_dirty_rect;
+
+                                    for (sub_graph_rect, surface_stack) in &tile.sub_graphs {
+                                        if let Some(dirty_sub_graph_rect) = sub_graph_rect.intersection(&tile.local_dirty_rect) {
+                                            for (composite_mode, surface_index) in surface_stack {
+                                                let surface = &frame_state.surfaces[surface_index.0];
+
+                                                let rect = composite_mode.get_coverage(
+                                                    surface,
+                                                    Some(dirty_sub_graph_rect.cast_unit()),
+                                                ).cast_unit();
+
+                                                local_content_rect = local_content_rect.union(&rect);
+                                            }
+                                        }
+                                    }
+
+                                    // We know that we'll never need to sample > 300 device pixels outside the tile
+                                    // for blurring, so clamp the content rect here so that we don't try to allocate
+                                    // a really large surface in the case of a drop-shadow with large offset.
+                                    let max_content_rect = (tile.local_dirty_rect.cast_unit() * device_pixel_scale)
+                                        .inflate(
+                                            MAX_BLUR_RADIUS * BLUR_SAMPLE_SCALE,
+                                            MAX_BLUR_RADIUS * BLUR_SAMPLE_SCALE,
+                                        )
+                                        .round_out()
+                                        .to_i32();
+
+                                    let content_device_rect = (local_content_rect.cast_unit() * device_pixel_scale)
+                                        .round_out()
+                                        .to_i32();
+
+                                    let content_device_rect = content_device_rect
+                                        .intersection(&max_content_rect)
+                                        .expect("bug: no intersection with tile dirty rect");
+
+                                    let content_task_size = content_device_rect.size();
+                                    let normalized_content_rect = content_task_size.into();
+
+                                    let inner_offset = content_origin + scissor_rect.min.to_vector().to_f32();
+                                    let outer_offset = content_device_rect.min.to_f32();
+                                    let sub_rect_offset = (inner_offset - outer_offset).round().to_i32();
+
+                                    let render_task_id = frame_state.rg_builder.add().init(
+                                        RenderTask::new_dynamic(
+                                            content_task_size,
+                                            RenderTaskKind::new_picture(
+                                                content_task_size,
+                                                true,
+                                                content_device_rect.min.to_f32(),
+                                                surface_spatial_node_index,
+                                                // raster == surface implicitly for picture cache tiles
+                                                surface_spatial_node_index,
+                                                device_pixel_scale,
+                                                Some(normalized_content_rect),
+                                                None,
+                                                Some(clear_color),
+                                                cmd_buffer_index,
+                                                false,
+                                            )
+                                        ),
+                                    );
+
+                                    let composite_task_id = frame_state.rg_builder.add().init(
+                                        RenderTask::new(
+                                            RenderTaskLocation::Static {
+                                                surface: StaticRenderTaskSurface::PictureCache {
+                                                    surface,
+                                                },
+                                                rect: composite_task_size.into(),
+                                            },
+                                            RenderTaskKind::new_tile_composite(
+                                                sub_rect_offset,
+                                                scissor_rect,
+                                                valid_rect,
+                                                clear_color,
+                                            ),
+                                        ),
+                                    );
+
+                                    surface_render_tasks.insert(
+                                        tile_key,
+                                        SurfaceTileDescriptor {
+                                            current_task_id: render_task_id,
+                                            composite_task_id: Some(composite_task_id),
+                                        },
+                                    );
+                                } else {
+                                    let render_task_id = frame_state.rg_builder.add().init(
+                                        RenderTask::new(
+                                            RenderTaskLocation::Static {
+                                                surface: StaticRenderTaskSurface::PictureCache {
+                                                    surface,
+                                                },
+                                                rect: composite_task_size.into(),
+                                            },
+                                            RenderTaskKind::new_picture(
+                                                composite_task_size,
+                                                true,
+                                                content_origin,
+                                                surface_spatial_node_index,
+                                                // raster == surface implicitly for picture cache tiles
+                                                surface_spatial_node_index,
+                                                device_pixel_scale,
+                                                Some(scissor_rect),
+                                                Some(valid_rect),
+                                                Some(clear_color),
+                                                cmd_buffer_index,
+                                                false,
+                                            )
+                                        ),
+                                    );
+
+                                    surface_render_tasks.insert(
+                                        tile_key,
+                                        SurfaceTileDescriptor {
+                                            current_task_id: render_task_id,
+                                            composite_task_id: None,
+                                        },
+                                    );
+                                }
+
+                                surface_dirty_rects.push(tile.local_dirty_rect);
+                            }
+
+                            if frame_context.fb_config.testing {
+                                debug_info.tiles.insert(
+                                    tile.tile_offset,
+                                    TileDebugInfo::Dirty(DirtyTileDebugInfo {
+                                        local_valid_rect: tile.current_descriptor.local_valid_rect,
+                                        local_dirty_rect: tile.local_dirty_rect,
+                                    }),
+                                );
+                            }
+                        }
+
+                        let surface = tile.surface.as_ref().expect("no tile surface set!");
+
+                        let descriptor = CompositeTileDescriptor {
+                            surface_kind: surface.into(),
+                            tile_id: tile.id,
+                        };
+
+                        let (surface, is_opaque) = match surface {
+                            TileSurface::Color { color } => {
+                                (CompositeTileSurface::Color { color: *color }, true)
+                            }
+                            TileSurface::Clear => {
+                                // Clear tiles are rendered with blend mode pre-multiply-dest-out.
+                                (CompositeTileSurface::Clear, false)
+                            }
+                            TileSurface::Texture { descriptor, .. } => {
+                                let surface = descriptor.resolve(frame_state.resource_cache, tile_cache.current_tile_size);
+                                (
+                                    CompositeTileSurface::Texture { surface },
+                                    tile.is_opaque
+                                )
+                            }
+                        };
+
+                        if is_opaque {
+                            sub_slice.opaque_tile_descriptors.push(descriptor);
+                        } else {
+                            sub_slice.alpha_tile_descriptors.push(descriptor);
+                        }
+
+                        let composite_tile = CompositeTile {
+                            kind: tile_kind(&surface, is_opaque),
+                            surface,
+                            local_rect: tile.local_tile_rect,
+                            local_valid_rect: tile.current_descriptor.local_valid_rect,
+                            local_dirty_rect: tile.local_dirty_rect,
+                            device_clip_rect,
+                            z_id: tile.z_id,
+                            transform_index: tile_cache.transform_index,
+                        };
+
+                        sub_slice.composite_tiles.push(composite_tile);
+
+                        // Now that the tile is valid, reset the dirty rect.
+                        tile.local_dirty_rect = PictureRect::zero();
+                        tile.is_valid = true;
+                    }
+
+                    // Sort the tile descriptor lists, since iterating values in the tile_cache.tiles
+                    // hashmap doesn't provide any ordering guarantees, but we want to detect the
+                    // composite descriptor as equal if the tiles list is the same, regardless of
+                    // ordering.
+                    sub_slice.opaque_tile_descriptors.sort_by_key(|desc| desc.tile_id);
+                    sub_slice.alpha_tile_descriptors.sort_by_key(|desc| desc.tile_id);
+                }
+
+                // Check to see if we should add backdrops as native surfaces.
+                let backdrop_rect = tile_cache.backdrop.backdrop_rect
+                    .intersection(&tile_cache.local_rect)
+                    .and_then(|r| {
+                        r.intersection(&tile_cache.local_clip_rect)
+                });
+
+                let mut backdrop_in_use_and_visible = false;
+                if let Some(backdrop_rect) = backdrop_rect {
+                    let supports_surface_for_backdrop = match frame_state.composite_state.compositor_kind {
+                        CompositorKind::Draw { .. } => {
+                            false
+                        }
+                        CompositorKind::Native { capabilities, .. } => {
+                            capabilities.supports_surface_for_backdrop
+                        }
+                    };
+                    if supports_surface_for_backdrop && !tile_cache.found_prims_after_backdrop && at_least_one_tile_visible {
+                        if let Some(BackdropKind::Color { color }) = tile_cache.backdrop.kind {
+                            backdrop_in_use_and_visible = true;
+
+                            // We're going to let the compositor handle the backdrop as a native surface.
+                            // Hide all of our sub_slice tiles so they aren't also trying to draw it.
+                            for sub_slice in &mut tile_cache.sub_slices {
+                                for tile in sub_slice.tiles.values_mut() {
+                                    tile.is_visible = false;
+                                }
+                            }
+
+                            // Destroy our backdrop surface if it doesn't match the new color.
+                            // TODO: This is a performance hit for animated color backdrops.
+                            if let Some(backdrop_surface) = &tile_cache.backdrop_surface {
+                                if backdrop_surface.color != color {
+                                    frame_state.resource_cache.destroy_compositor_surface(backdrop_surface.id);
+                                    tile_cache.backdrop_surface = None;
+                                }
+                            }
+
+                            // Calculate the device_rect for the backdrop, which is just the backdrop_rect
+                            // converted into world space and scaled to device pixels.
+                            let world_backdrop_rect = map_pic_to_world.map(&backdrop_rect).expect("bug: unable to map backdrop rect");
+                            let device_rect = (world_backdrop_rect * frame_context.global_device_pixel_scale).round();
+
+                            // If we already have a backdrop surface, update the device rect. Otherwise, create
+                            // a backdrop surface.
+                            if let Some(backdrop_surface) = &mut tile_cache.backdrop_surface {
+                                backdrop_surface.device_rect = device_rect;
+                            } else {
+                                // Create native compositor surface with color for the backdrop and store the id.
+                                tile_cache.backdrop_surface = Some(BackdropSurface {
+                                    id: frame_state.resource_cache.create_compositor_backdrop_surface(color),
+                                    color,
+                                    device_rect,
+                                });
+                            }
+                        }
+                    }
+                }
+
+                if !backdrop_in_use_and_visible {
+                    if let Some(backdrop_surface) = &tile_cache.backdrop_surface {
+                        // We've already allocated a backdrop surface, but we're not using it.
+                        // Tell the compositor to get rid of it.
+                        frame_state.resource_cache.destroy_compositor_surface(backdrop_surface.id);
+                        tile_cache.backdrop_surface = None;
+                    }
+                }
+
+                // If invalidation debugging is enabled, dump the picture cache state to a tree printer.
+                if frame_context.debug_flags.contains(DebugFlags::INVALIDATION_DBG) {
+                    tile_cache.print();
+                }
+
+                // If testing mode is enabled, write some information about the current state
+                // of this picture cache (made available in RenderResults).
+                if frame_context.fb_config.testing {
+                    frame_state.composite_state
+                        .picture_cache_debug
+                        .slices
+                        .insert(
+                            tile_cache.slice,
+                            debug_info,
+                        );
+                }
+
+                let descriptor = SurfaceDescriptor::new_tiled(
+                    surface_render_tasks,
+                    surface_dirty_rects,
+                );
+
+                frame_state.surface_builder.push_surface(
+                    surface_index,
+                    false,
+                    surface_local_dirty_rect,
+                    descriptor,
+                    frame_state.surfaces,
+                    frame_state.rg_builder,
+                );
+            }
+            Some(ref mut raster_config) => {
+                let pic_rect = frame_state
+                    .surfaces[raster_config.surface_index.0]
+                    .clipped_local_rect;
+
+                let parent_surface_index = parent_surface_index.expect("bug: no parent for child surface");
+
+                // Layout space for the picture is picture space from the
+                // perspective of its child primitives.
+                let local_rect = pic_rect * Scale::new(1.0);
+
+                // If the precise rect changed since last frame, we need to invalidate
+                // any segments and gpu cache handles for drop-shadows.
+                // TODO(gw): Requiring storage of the `prev_precise_local_rect` here
+                //           is a total hack. It's required because `prev_precise_local_rect`
+                //           gets written to twice (during initial vis pass and also during
+                //           prepare pass). The proper longer term fix for this is to make
+                //           use of the conservative picture rect for segmenting (which should
+                //           be done during scene building).
+                if local_rect != self.prev_local_rect {
+                    match raster_config.composite_mode {
+                        PictureCompositeMode::Filter(Filter::DropShadows(..)) => {
+                            for handle in &self.extra_gpu_data_handles {
+                                frame_state.gpu_cache.invalidate(handle);
+                            }
+                        }
+                        _ => {}
+                    }
+                    // Invalidate any segments built for this picture, since the local
+                    // rect has changed.
+                    self.segments_are_valid = false;
+                    self.prev_local_rect = local_rect;
+                }
+
+                let max_surface_size = frame_context
+                    .fb_config
+                    .max_surface_override
+                    .unwrap_or(MAX_SURFACE_SIZE) as f32;
+
+                let surface_rects = match get_surface_rects(
+                    raster_config.surface_index,
+                    &raster_config.composite_mode,
+                    parent_surface_index,
+                    &mut frame_state.surfaces,
+                    frame_context.spatial_tree,
+                    max_surface_size,
+                ) {
+                    Some(rects) => rects,
+                    None => return None,
+                };
+
+                let (raster_spatial_node_index, device_pixel_scale) = {
+                    let surface = &frame_state.surfaces[surface_index.0];
+                    (surface.raster_spatial_node_index, surface.device_pixel_scale)
+                };
+                let can_use_shared_surface = !self.flags.contains(PictureFlags::IS_RESOLVE_TARGET);
+
+                let primary_render_task_id;
+                let surface_descriptor;
+                match raster_config.composite_mode {
+                    PictureCompositeMode::TileCache { .. } => {
+                        unreachable!("handled above");
+                    }
+                    PictureCompositeMode::Filter(Filter::Blur { width, height, .. }) => {
+                        let surface = &frame_state.surfaces[raster_config.surface_index.0];
+                        let (width, height) = surface.clamp_blur_radius(width, height);
+
+                        let width_std_deviation = width * surface.local_scale.0 * device_pixel_scale.0;
+                        let height_std_deviation = height * surface.local_scale.1 * device_pixel_scale.0;
+                        let blur_std_deviation = DeviceSize::new(
+                            width_std_deviation,
+                            height_std_deviation,
+                        );
+
+                        let original_size = surface_rects.clipped.size();
+
+                        // Adjust the size to avoid introducing sampling errors during the down-scaling passes.
+                        // what would be even better is to rasterize the picture at the down-scaled size
+                        // directly.
+                        let adjusted_size = BlurTask::adjusted_blur_source_size(
+                            original_size,
+                            blur_std_deviation,
+                        );
+
+                        let cmd_buffer_index = frame_state.cmd_buffers.create_cmd_buffer();
+
+                        // Since we (may have) adjusted the render task size for downscaling accuracy
+                        // above, recalculate the uv rect for tasks that may sample from this blur output
+                        let uv_rect_kind = calculate_uv_rect_kind(
+                            DeviceRect::from_origin_and_size(surface_rects.clipped.min, adjusted_size.to_f32()),
+                            surface_rects.unclipped,
+                        );
+
+                        let picture_task_id = frame_state.rg_builder.add().init(
+                            RenderTask::new_dynamic(
+                                adjusted_size,
+                                RenderTaskKind::new_picture(
+                                    adjusted_size,
+                                    surface_rects.needs_scissor_rect,
+                                    surface_rects.clipped.min,
+                                    surface_spatial_node_index,
+                                    raster_spatial_node_index,
+                                    device_pixel_scale,
+                                    None,
+                                    None,
+                                    None,
+                                    cmd_buffer_index,
+                                    can_use_shared_surface,
+                                )
+                            ).with_uv_rect_kind(uv_rect_kind)
+                        );
+
+                        let blur_render_task_id = RenderTask::new_blur(
+                            blur_std_deviation,
+                            picture_task_id,
+                            frame_state.rg_builder,
+                            RenderTargetKind::Color,
+                            None,
+                            original_size.to_i32(),
+                        );
+
+                        primary_render_task_id = blur_render_task_id;
+
+                        surface_descriptor = SurfaceDescriptor::new_chained(
+                            picture_task_id,
+                            blur_render_task_id,
+                            surface_rects.clipped_local,
+                        );
+                    }
+                    PictureCompositeMode::Filter(Filter::DropShadows(ref shadows)) => {
+                        let surface = &frame_state.surfaces[raster_config.surface_index.0];
+
+                        let device_rect = surface_rects.clipped;
+
+                        let cmd_buffer_index = frame_state.cmd_buffers.create_cmd_buffer();
+
+                        let picture_task_id = frame_state.rg_builder.add().init(
+                            RenderTask::new_dynamic(
+                                surface_rects.task_size,
+                                RenderTaskKind::new_picture(
+                                    surface_rects.task_size,
+                                    surface_rects.needs_scissor_rect,
+                                    device_rect.min,
+                                    surface_spatial_node_index,
+                                    raster_spatial_node_index,
+                                    device_pixel_scale,
+                                    None,
+                                    None,
+                                    None,
+                                    cmd_buffer_index,
+                                    can_use_shared_surface,
+                                ),
+                            ).with_uv_rect_kind(surface_rects.uv_rect_kind)
+                        );
+
+                        let mut blur_tasks = BlurTaskCache::default();
+
+                        self.extra_gpu_data_handles.resize(shadows.len(), GpuCacheHandle::new());
+
+                        let mut blur_render_task_id = picture_task_id;
+                        for shadow in shadows {
+                            let (blur_radius_x, blur_radius_y) = surface.clamp_blur_radius(
+                                shadow.blur_radius,
+                                shadow.blur_radius,
+                            );
+
+                            blur_render_task_id = RenderTask::new_blur(
+                                DeviceSize::new(
+                                    blur_radius_x * surface.local_scale.0 * device_pixel_scale.0,
+                                    blur_radius_y * surface.local_scale.1 * device_pixel_scale.0,
+                                ),
+                                picture_task_id,
+                                frame_state.rg_builder,
+                                RenderTargetKind::Color,
+                                Some(&mut blur_tasks),
+                                device_rect.size().to_i32(),
+                            );
+                        }
+
+                        // Add this content picture as a dependency of the parent surface, to
+                        // ensure it isn't free'd after the shadow uses it as an input.
+                        frame_state.surface_builder.add_picture_render_task(picture_task_id);
+
+                        primary_render_task_id = blur_render_task_id;
+                        self.secondary_render_task_id = Some(picture_task_id);
+
+                        surface_descriptor = SurfaceDescriptor::new_chained(
+                            picture_task_id,
+                            blur_render_task_id,
+                            surface_rects.clipped_local,
+                        );
+                    }
+                    PictureCompositeMode::MixBlend(mode) if BlendMode::from_mix_blend_mode(
+                        mode,
+                        frame_context.fb_config.gpu_supports_advanced_blend,
+                        frame_context.fb_config.advanced_blend_is_coherent,
+                        frame_context.fb_config.dual_source_blending_is_supported,
+                    ).is_none() => {
+                        let parent_surface = &frame_state.surfaces[parent_surface_index.0];
+
+                        // Create a space mapper that will allow mapping from the local rect
+                        // of the mix-blend primitive into the space of the surface that we
+                        // need to read back from. Note that we use the parent's raster spatial
+                        // node here, so that we are in the correct device space of the parent
+                        // surface, whether it establishes a raster root or not.
+                        let map_pic_to_parent = SpaceMapper::new_with_target(
+                            parent_surface.surface_spatial_node_index,
+                            surface_spatial_node_index,
+                            parent_surface.clipping_rect,
+                            frame_context.spatial_tree,
+                        );
+                        let pic_in_raster_space = map_pic_to_parent
+                            .map(&pic_rect)
+                            .expect("bug: unable to map mix-blend content into parent");
+
+                        // Apply device pixel ratio for parent surface to get into device
+                        // pixels for that surface.
+                        let backdrop_rect = pic_in_raster_space;
+                        let parent_surface_rect = parent_surface.clipping_rect;
+
+                        // If there is no available parent surface to read back from (for example, if
+                        // the parent surface is affected by a clip that doesn't affect the child
+                        // surface), then create a dummy 16x16 readback. In future, we could alter
+                        // the composite mode of this primitive to skip the mix-blend, but for simplicity
+                        // we just create a dummy readback for now.
+
+                        let readback_task_id = match backdrop_rect.intersection(&parent_surface_rect) {
+                            Some(available_rect) => {
+                                // Calculate the UV coords necessary for the shader to sampler
+                                // from the primitive rect within the readback region. This is
+                                // 0..1 for aligned surfaces, but doing it this way allows
+                                // accurate sampling if the primitive bounds have fractional values.
+
+                                let backdrop_rect = parent_surface.map_to_device_rect(
+                                    &backdrop_rect,
+                                    frame_context.spatial_tree,
+                                );
+
+                                let available_rect = parent_surface.map_to_device_rect(
+                                    &available_rect,
+                                    frame_context.spatial_tree,
+                                ).round_out();
+
+                                let backdrop_uv = calculate_uv_rect_kind(
+                                    available_rect,
+                                    backdrop_rect,
+                                );
+
+                                frame_state.rg_builder.add().init(
+                                    RenderTask::new_dynamic(
+                                        available_rect.size().to_i32(),
+                                        RenderTaskKind::new_readback(Some(available_rect.min)),
+                                    ).with_uv_rect_kind(backdrop_uv)
+                                )
+                            }
+                            None => {
+                                frame_state.rg_builder.add().init(
+                                    RenderTask::new_dynamic(
+                                        DeviceIntSize::new(16, 16),
+                                        RenderTaskKind::new_readback(None),
+                                    )
+                                )
+                            }
+                        };
+
+                        frame_state.surface_builder.add_child_render_task(
+                            readback_task_id,
+                            frame_state.rg_builder,
+                        );
+
+                        self.secondary_render_task_id = Some(readback_task_id);
+
+                        let task_size = surface_rects.clipped.size().to_i32();
+
+                        let cmd_buffer_index = frame_state.cmd_buffers.create_cmd_buffer();
+
+                        let render_task_id = frame_state.rg_builder.add().init(
+                            RenderTask::new_dynamic(
+                                task_size,
+                                RenderTaskKind::new_picture(
+                                    task_size,
+                                    surface_rects.needs_scissor_rect,
+                                    surface_rects.clipped.min,
+                                    surface_spatial_node_index,
+                                    raster_spatial_node_index,
+                                    device_pixel_scale,
+                                    None,
+                                    None,
+                                    None,
+                                    cmd_buffer_index,
+                                    can_use_shared_surface,
+                                )
+                            ).with_uv_rect_kind(surface_rects.uv_rect_kind)
+                        );
+
+                        primary_render_task_id = render_task_id;
+
+                        surface_descriptor = SurfaceDescriptor::new_simple(
+                            render_task_id,
+                            surface_rects.clipped_local,
+                        );
+                    }
+                    PictureCompositeMode::Filter(..) => {
+                        let cmd_buffer_index = frame_state.cmd_buffers.create_cmd_buffer();
+
+                        let render_task_id = frame_state.rg_builder.add().init(
+                            RenderTask::new_dynamic(
+                                surface_rects.task_size,
+                                RenderTaskKind::new_picture(
+                                    surface_rects.task_size,
+                                    surface_rects.needs_scissor_rect,
+                                    surface_rects.clipped.min,
+                                    surface_spatial_node_index,
+                                    raster_spatial_node_index,
+                                    device_pixel_scale,
+                                    None,
+                                    None,
+                                    None,
+                                    cmd_buffer_index,
+                                    can_use_shared_surface,
+                                )
+                            ).with_uv_rect_kind(surface_rects.uv_rect_kind)
+                        );
+
+                        primary_render_task_id = render_task_id;
+
+                        surface_descriptor = SurfaceDescriptor::new_simple(
+                            render_task_id,
+                            surface_rects.clipped_local,
+                        );
+                    }
+                    PictureCompositeMode::ComponentTransferFilter(..) => {
+                        let cmd_buffer_index = frame_state.cmd_buffers.create_cmd_buffer();
+
+                        let render_task_id = frame_state.rg_builder.add().init(
+                            RenderTask::new_dynamic(
+                                surface_rects.task_size,
+                                RenderTaskKind::new_picture(
+                                    surface_rects.task_size,
+                                    surface_rects.needs_scissor_rect,
+                                    surface_rects.clipped.min,
+                                    surface_spatial_node_index,
+                                    raster_spatial_node_index,
+                                    device_pixel_scale,
+                                    None,
+                                    None,
+                                    None,
+                                    cmd_buffer_index,
+                                    can_use_shared_surface,
+                                )
+                            ).with_uv_rect_kind(surface_rects.uv_rect_kind)
+                        );
+
+                        primary_render_task_id = render_task_id;
+
+                        surface_descriptor = SurfaceDescriptor::new_simple(
+                            render_task_id,
+                            surface_rects.clipped_local,
+                        );
+                    }
+                    PictureCompositeMode::MixBlend(..) |
+                    PictureCompositeMode::Blit(_) => {
+                        let cmd_buffer_index = frame_state.cmd_buffers.create_cmd_buffer();
+
+                        let render_task_id = frame_state.rg_builder.add().init(
+                            RenderTask::new_dynamic(
+                                surface_rects.task_size,
+                                RenderTaskKind::new_picture(
+                                    surface_rects.task_size,
+                                    surface_rects.needs_scissor_rect,
+                                    surface_rects.clipped.min,
+                                    surface_spatial_node_index,
+                                    raster_spatial_node_index,
+                                    device_pixel_scale,
+                                    None,
+                                    None,
+                                    None,
+                                    cmd_buffer_index,
+                                    can_use_shared_surface,
+                                )
+                            ).with_uv_rect_kind(surface_rects.uv_rect_kind)
+                        );
+
+                        primary_render_task_id = render_task_id;
+
+                        surface_descriptor = SurfaceDescriptor::new_simple(
+                            render_task_id,
+                            surface_rects.clipped_local,
+                        );
+                    }
+                    PictureCompositeMode::IntermediateSurface => {
+                        if !scratch.required_sub_graphs.contains(&pic_index) {
+                            return None;
+                        }
+
+                        // TODO(gw): Remove all the mostly duplicated code in each of these
+                        //           match cases (they used to be quite different).
+                        let cmd_buffer_index = frame_state.cmd_buffers.create_cmd_buffer();
+
+                        let render_task_id = frame_state.rg_builder.add().init(
+                            RenderTask::new_dynamic(
+                                surface_rects.task_size,
+                                RenderTaskKind::new_picture(
+                                    surface_rects.task_size,
+                                    surface_rects.needs_scissor_rect,
+                                    surface_rects.clipped.min,
+                                    surface_spatial_node_index,
+                                    raster_spatial_node_index,
+                                    device_pixel_scale,
+                                    None,
+                                    None,
+                                    None,
+                                    cmd_buffer_index,
+                                    can_use_shared_surface,
+                                )
+                            ).with_uv_rect_kind(surface_rects.uv_rect_kind)
+                        );
+
+                        primary_render_task_id = render_task_id;
+
+                        surface_descriptor = SurfaceDescriptor::new_simple(
+                            render_task_id,
+                            surface_rects.clipped_local,
+                        );
+                    }
+                    PictureCompositeMode::SvgFilter(ref primitives, ref filter_datas) => {
+                        let cmd_buffer_index = frame_state.cmd_buffers.create_cmd_buffer();
+
+                        let picture_task_id = frame_state.rg_builder.add().init(
+                            RenderTask::new_dynamic(
+                                surface_rects.task_size,
+                                RenderTaskKind::new_picture(
+                                    surface_rects.task_size,
+                                    surface_rects.needs_scissor_rect,
+                                    surface_rects.clipped.min,
+                                    surface_spatial_node_index,
+                                    raster_spatial_node_index,
+                                    device_pixel_scale,
+                                    None,
+                                    None,
+                                    None,
+                                    cmd_buffer_index,
+                                    can_use_shared_surface,
+                                )
+                            ).with_uv_rect_kind(surface_rects.uv_rect_kind)
+                        );
+
+                        let filter_task_id = RenderTask::new_svg_filter(
+                            primitives,
+                            filter_datas,
+                            frame_state.rg_builder,
+                            surface_rects.clipped.size().to_i32(),
+                            surface_rects.uv_rect_kind,
+                            picture_task_id,
+                            device_pixel_scale,
+                        );
+
+                        primary_render_task_id = filter_task_id;
+
+                        surface_descriptor = SurfaceDescriptor::new_chained(
+                            picture_task_id,
+                            filter_task_id,
+                            surface_rects.clipped_local,
+                        );
+                    }
+                }
+
+                let is_sub_graph = self.flags.contains(PictureFlags::IS_SUB_GRAPH);
+
+                frame_state.surface_builder.push_surface(
+                    raster_config.surface_index,
+                    is_sub_graph,
+                    surface_rects.clipped_local,
+                    surface_descriptor,
+                    frame_state.surfaces,
+                    frame_state.rg_builder,
+                );
+
+                self.primary_render_task_id = Some(primary_render_task_id);
+            }
+            None => {}
+        };
+
+        let state = PictureState {
+            map_local_to_pic,
+            map_pic_to_world,
+        };
+
+        let mut dirty_region_count = 0;
+
+        // If this is a picture cache, push the dirty region to ensure any
+        // child primitives are culled and clipped to the dirty rect(s).
+        if let Some(RasterConfig { composite_mode: PictureCompositeMode::TileCache { slice_id }, .. }) = self.raster_config {
+            let dirty_region = tile_caches[&slice_id].dirty_region.clone();
+            frame_state.push_dirty_region(dirty_region);
+            dirty_region_count += 1;
+        }
+
+        // Disallow subpixel AA if an intermediate surface is needed.
+        // TODO(lsalzman): allow overriding parent if intermediate surface is opaque
+        let subpixel_mode = match self.raster_config {
+            Some(RasterConfig { ref composite_mode, .. }) => {
+                let subpixel_mode = match composite_mode {
+                    PictureCompositeMode::TileCache { slice_id } => {
+                        tile_caches[&slice_id].subpixel_mode
+                    }
+                    PictureCompositeMode::Blit(..) |
+                    PictureCompositeMode::ComponentTransferFilter(..) |
+                    PictureCompositeMode::Filter(..) |
+                    PictureCompositeMode::MixBlend(..) |
+                    PictureCompositeMode::IntermediateSurface |
+                    PictureCompositeMode::SvgFilter(..) => {
+                        // TODO(gw): We can take advantage of the same logic that
+                        //           exists in the opaque rect detection for tile
+                        //           caches, to allow subpixel text on other surfaces
+                        //           that can be detected as opaque.
+                        SubpixelMode::Deny
+                    }
+                };
+
+                subpixel_mode
+            }
+            None => {
+                SubpixelMode::Allow
+            }
+        };
+
+        // Still disable subpixel AA if parent forbids it
+        let subpixel_mode = match (parent_subpixel_mode, subpixel_mode) {
+            (SubpixelMode::Allow, SubpixelMode::Allow) => {
+                // Both parent and this surface unconditionally allow subpixel AA
+                SubpixelMode::Allow
+            }
+            (SubpixelMode::Allow, SubpixelMode::Conditional { allowed_rect }) => {
+                // Parent allows, but we are conditional subpixel AA
+                SubpixelMode::Conditional {
+                    allowed_rect,
+                }
+            }
+            (SubpixelMode::Conditional { allowed_rect }, SubpixelMode::Allow) => {
+                // Propagate conditional subpixel mode to child pictures that allow subpixel AA
+                SubpixelMode::Conditional {
+                    allowed_rect,
+                }
+            }
+            (SubpixelMode::Conditional { .. }, SubpixelMode::Conditional { ..}) => {
+                unreachable!("bug: only top level picture caches have conditional subpixel");
+            }
+            (SubpixelMode::Deny, _) | (_, SubpixelMode::Deny) => {
+                // Either parent or this surface explicitly deny subpixel, these take precedence
+                SubpixelMode::Deny
+            }
+        };
+
+        let context = PictureContext {
+            pic_index,
+            raster_spatial_node_index: frame_state.surfaces[surface_index.0].raster_spatial_node_index,
+            surface_spatial_node_index,
+            surface_index,
+            dirty_region_count,
+            subpixel_mode,
+        };
+
+        let prim_list = mem::replace(&mut self.prim_list, PrimitiveList::empty());
+
+        Some((context, state, prim_list))
+    }
+
+    pub fn restore_context(
+        &mut self,
+        pic_index: PictureIndex,
+        prim_list: PrimitiveList,
+        context: PictureContext,
+        prim_instances: &[PrimitiveInstance],
+        frame_context: &FrameBuildingContext,
+        frame_state: &mut FrameBuildingState,
+    ) {
+        // Pop any dirty regions this picture set
+        for _ in 0 .. context.dirty_region_count {
+            frame_state.pop_dirty_region();
+        }
+
+        if self.raster_config.is_some() {
+            frame_state.surface_builder.pop_surface(
+                pic_index,
+                frame_state.rg_builder,
+                frame_state.cmd_buffers,
+            );
+        }
+
+        if let Picture3DContext::In { root_data: Some(ref mut list), plane_splitter_index, .. } = self.context_3d {
+            let splitter = &mut frame_state.plane_splitters[plane_splitter_index.0];
+
+            // Resolve split planes via BSP
+            PicturePrimitive::resolve_split_planes(
+                splitter,
+                list,
+                &mut frame_state.gpu_cache,
+                &frame_context.spatial_tree,
+            );
+
+            // Add the child prims to the relevant command buffers
+            for child in list {
+                let child_prim_instance = &prim_instances[child.anchor.instance_index.0 as usize];
+
+                let prim_cmd = PrimitiveCommand::complex(
+                    child.anchor.instance_index,
+                    child.gpu_address
+                );
+
+                frame_state.surface_builder.push_prim(
+                    &prim_cmd,
+                    child.anchor.spatial_node_index,
+                    &child_prim_instance.vis,
+                    frame_state.cmd_buffers,
+                );
+            }
+        }
+
+        self.prim_list = prim_list;
+    }
+
+    /// Add a primitive instance to the plane splitter. The function would generate
+    /// an appropriate polygon, clip it against the frustum, and register with the
+    /// given plane splitter.
+    pub fn add_split_plane(
+        splitter: &mut PlaneSplitter,
+        spatial_tree: &SpatialTree,
+        prim_spatial_node_index: SpatialNodeIndex,
+        original_local_rect: LayoutRect,
+        combined_local_clip_rect: &LayoutRect,
+        world_rect: WorldRect,
+        plane_split_anchor: PlaneSplitAnchor,
+    ) -> bool {
+        let transform = spatial_tree
+            .get_world_transform(prim_spatial_node_index);
+        let matrix = transform.clone().into_transform().cast().to_untyped();
+
+        // Apply the local clip rect here, before splitting. This is
+        // because the local clip rect can't be applied in the vertex
+        // shader for split composites, since we are drawing polygons
+        // rather that rectangles. The interpolation still works correctly
+        // since we determine the UVs by doing a bilerp with a factor
+        // from the original local rect.
+        let local_rect = match original_local_rect
+            .intersection(combined_local_clip_rect)
+        {
+            Some(rect) => rect.cast(),
+            None => return false,
+        };
+        let world_rect = world_rect.cast();
+
+        match transform {
+            CoordinateSpaceMapping::Local => {
+                let polygon = Polygon::from_rect(
+                    local_rect.to_rect() * Scale::new(1.0),
+                    plane_split_anchor,
+                );
+                splitter.add(polygon);
+            }
+            CoordinateSpaceMapping::ScaleOffset(scale_offset) if scale_offset.scale == Vector2D::new(1.0, 1.0) => {
+                let inv_matrix = scale_offset.inverse().to_transform().cast();
+                let polygon = Polygon::from_transformed_rect_with_inverse(
+                    local_rect.to_rect().to_untyped(),
+                    &matrix,
+                    &inv_matrix,
+                    plane_split_anchor,
+                ).unwrap();
+                splitter.add(polygon);
+            }
+            CoordinateSpaceMapping::ScaleOffset(_) |
+            CoordinateSpaceMapping::Transform(_) => {
+                let mut clipper = Clipper::new();
+                let results = clipper.clip_transformed(
+                    Polygon::from_rect(
+                        local_rect.to_rect().to_untyped(),
+                        plane_split_anchor,
+                    ),
+                    &matrix,
+                    Some(world_rect.to_rect().to_untyped()),
+                );
+                if let Ok(results) = results {
+                    for poly in results {
+                        splitter.add(poly);
+                    }
+                }
+            }
+        }
+
+        true
+    }
+
+    fn resolve_split_planes(
+        splitter: &mut PlaneSplitter,
+        ordered: &mut Vec<OrderedPictureChild>,
+        gpu_cache: &mut GpuCache,
+        spatial_tree: &SpatialTree,
+    ) {
+        ordered.clear();
+
+        // Process the accumulated split planes and order them for rendering.
+        // Z axis is directed at the screen, `sort` is ascending, and we need back-to-front order.
+        let sorted = splitter.sort(vec3(0.0, 0.0, 1.0));
+        ordered.reserve(sorted.len());
+        for poly in sorted {
+            let transform = match spatial_tree
+                .get_world_transform(poly.anchor.spatial_node_index)
+                .inverse()
+            {
+                Some(transform) => transform.into_transform(),
+                // logging this would be a bit too verbose
+                None => continue,
+            };
+
+            let local_points = [
+                transform.transform_point3d(poly.points[0].cast_unit().to_f32()),
+                transform.transform_point3d(poly.points[1].cast_unit().to_f32()),
+                transform.transform_point3d(poly.points[2].cast_unit().to_f32()),
+                transform.transform_point3d(poly.points[3].cast_unit().to_f32()),
+            ];
+
+            // If any of the points are un-transformable, just drop this
+            // plane from drawing.
+            if local_points.iter().any(|p| p.is_none()) {
+                continue;
+            }
+
+            let p0 = local_points[0].unwrap();
+            let p1 = local_points[1].unwrap();
+            let p2 = local_points[2].unwrap();
+            let p3 = local_points[3].unwrap();
+            let gpu_blocks = [
+                [p0.x, p0.y, p1.x, p1.y].into(),
+                [p2.x, p2.y, p3.x, p3.y].into(),
+            ];
+            let gpu_handle = gpu_cache.push_per_frame_blocks(&gpu_blocks);
+            let gpu_address = gpu_cache.get_address(&gpu_handle);
+
+            ordered.push(OrderedPictureChild {
+                anchor: poly.anchor,
+                gpu_address,
+            });
+        }
+    }
+
+    /// Do initial checks to determine whether this picture should be drawn as part of the
+    /// frame build.
+    pub fn pre_update(
+        &mut self,
+        frame_context: &FrameBuildingContext,
+    ) {
+        // Resolve animation properties
+        self.resolve_scene_properties(frame_context.scene_properties);
+    }
+
+    /// Called during initial picture traversal, before we know the
+    /// bounding rect of children. It is possible to determine the
+    /// surface / raster config now though.
+    pub fn assign_surface(
+        &mut self,
+        frame_context: &FrameBuildingContext,
+        parent_surface_index: Option<SurfaceIndex>,
+        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
+        surfaces: &mut Vec<SurfaceInfo>,
+    ) -> Option<SurfaceIndex> {
+        // Reset raster config in case we early out below.
+        self.raster_config = None;
+
+        match self.composite_mode {
+            Some(ref composite_mode) => {
+                let surface_spatial_node_index = self.spatial_node_index;
+
+                // Currently, we ensure that the scaling factor is >= 1.0 as a smaller scale factor can result in blurry output.
+                let mut min_scale;
+                let mut max_scale = 1.0e32;
+
+                // If a raster root is established, this surface should be scaled based on the scale factors of the surface raster to parent raster transform.
+                // This scaling helps ensure that the content in this surface does not become blurry or pixelated when composited in the parent surface.
+
+                let world_scale_factors = match parent_surface_index {
+                    Some(parent_surface_index) => {
+                        let parent_surface = &surfaces[parent_surface_index.0];
+
+                        let local_to_surface = frame_context
+                            .spatial_tree
+                            .get_relative_transform(
+                                surface_spatial_node_index,
+                                parent_surface.surface_spatial_node_index,
+                            );
+
+                        // Since we can't determine reasonable scale factors for transforms
+                        // with perspective, just use a scale of (1,1) for now, which is
+                        // what Gecko does when it choosed to supplies a scale factor anyway.
+                        // In future, we might be able to improve the quality here by taking
+                        // into account the screen rect after clipping, but for now this gives
+                        // better results than just taking the matrix scale factors.
+                        let scale_factors = if local_to_surface.is_perspective() {
+                            (1.0, 1.0)
+                        } else {
+                            local_to_surface.scale_factors()
+                        };
+
+                        let scale_factors = (
+                            scale_factors.0 * parent_surface.world_scale_factors.0,
+                            scale_factors.1 * parent_surface.world_scale_factors.1,
+                        );
+
+                        scale_factors
+                    }
+                    None => {
+                        let local_to_surface_scale_factors = frame_context
+                            .spatial_tree
+                            .get_relative_transform(
+                                surface_spatial_node_index,
+                                frame_context.spatial_tree.root_reference_frame_index(),
+                            )
+                            .scale_factors();
+
+                        let scale_factors = (
+                            local_to_surface_scale_factors.0,
+                            local_to_surface_scale_factors.1,
+                        );
+
+                        scale_factors
+                    }
+                };
+
+                // TODO(gw): For now, we disable snapping on any sub-graph, as that implies
+                //           that the spatial / raster node must be the same as the parent
+                //           surface. In future, we may be able to support snapping in these
+                //           cases (if it's even useful?) or perhaps add a ENABLE_SNAPPING
+                //           picture flag, if the IS_SUB_GRAPH is ever useful in a different
+                //           context.
+                let allow_snapping = !self.flags.contains(PictureFlags::DISABLE_SNAPPING);
+
+                // Check if there is perspective or if an SVG filter is applied, and thus whether a new
+                // rasterization root should be established.
+                let (device_pixel_scale, raster_spatial_node_index, local_scale, world_scale_factors) = match composite_mode {
+                    PictureCompositeMode::TileCache { slice_id } => {
+                        let tile_cache = tile_caches.get_mut(&slice_id).unwrap();
+
+                        // We only update the raster scale if we're in high quality zoom mode, or there is no
+                        // pinch-zoom active. This means that in low quality pinch-zoom, we retain the initial
+                        // scale factor until the zoom ends, then select a high quality zoom factor for the next
+                        // frame to be drawn.
+                        let update_raster_scale =
+                            !frame_context.fb_config.low_quality_pinch_zoom ||
+                            !frame_context.spatial_tree.get_spatial_node(tile_cache.spatial_node_index).is_ancestor_or_self_zooming;
+
+                        if update_raster_scale {
+                            // Get the complete scale-offset from local space to device space
+                            let local_to_device = get_relative_scale_offset(
+                                tile_cache.spatial_node_index,
+                                frame_context.root_spatial_node_index,
+                                frame_context.spatial_tree,
+                            );
+
+                            tile_cache.current_raster_scale = local_to_device.scale.x;
+                        }
+
+                        // We may need to minify when zooming out picture cache tiles
+                        min_scale = 0.0;
+
+                        if frame_context.fb_config.low_quality_pinch_zoom {
+                            // Force the scale for this tile cache to be the currently selected
+                            // local raster scale, so we don't need to rasterize tiles during
+                            // the pinch-zoom.
+                            min_scale = tile_cache.current_raster_scale;
+                            max_scale = tile_cache.current_raster_scale;
+                        }
+
+                        // Pick the largest scale factor of the transform for the scaling factor.
+                        let scaling_factor = world_scale_factors.0.max(world_scale_factors.1).max(min_scale).min(max_scale);
+
+                        let device_pixel_scale = Scale::new(scaling_factor);
+
+                        (device_pixel_scale, surface_spatial_node_index, (1.0, 1.0), world_scale_factors)
+                    }
+                    _ => {
+                        let surface_spatial_node = frame_context.spatial_tree.get_spatial_node(surface_spatial_node_index);
+
+                        let enable_snapping =
+                            allow_snapping &&
+                            surface_spatial_node.coordinate_system_id == CoordinateSystemId::root() &&
+                            surface_spatial_node.snapping_transform.is_some();
+
+                        if enable_snapping {
+                            let raster_spatial_node_index = frame_context.spatial_tree.root_reference_frame_index();
+
+                            let local_to_raster_transform = frame_context
+                                .spatial_tree
+                                .get_relative_transform(
+                                    self.spatial_node_index,
+                                    raster_spatial_node_index,
+                                );
+
+                            let local_scale = local_to_raster_transform.scale_factors();
+
+                            (Scale::new(1.0), raster_spatial_node_index, local_scale, (1.0, 1.0))
+                        } else {
+                            // If client supplied a specific local scale, use that instead of
+                            // estimating from parent transform
+                            let world_scale_factors = match self.raster_space {
+                                RasterSpace::Screen => world_scale_factors,
+                                RasterSpace::Local(scale) => (scale, scale),
+                            };
+
+                            let device_pixel_scale = Scale::new(world_scale_factors.0.max(world_scale_factors.1));
+
+                            (device_pixel_scale, surface_spatial_node_index, (1.0, 1.0), world_scale_factors)
+                        }
+                    }
+                };
+
+                let surface = SurfaceInfo::new(
+                    surface_spatial_node_index,
+                    raster_spatial_node_index,
+                    frame_context.global_screen_world_rect,
+                    &frame_context.spatial_tree,
+                    device_pixel_scale,
+                    world_scale_factors,
+                    local_scale,
+                    allow_snapping,
+                );
+
+                let surface_index = SurfaceIndex(surfaces.len());
+                surfaces.push(surface);
+
+                self.raster_config = Some(RasterConfig {
+                    composite_mode: composite_mode.clone(),
+                    surface_index,
+                });
+
+                Some(surface_index)
+            }
+            None => {
+                None
+            }
+        }
+    }
+
+    /// Called after updating child pictures during the initial
+    /// picture traversal. Bounding rects are propagated from
+    /// child pictures up to parent picture surfaces, so that the
+    /// parent bounding rect includes any dynamic picture bounds.
+    pub fn propagate_bounding_rect(
+        &mut self,
+        surface_index: SurfaceIndex,
+        parent_surface_index: Option<SurfaceIndex>,
+        surfaces: &mut [SurfaceInfo],
+        frame_context: &FrameBuildingContext,
+    ) {
+        let surface = &mut surfaces[surface_index.0];
+
+        for cluster in &mut self.prim_list.clusters {
+            cluster.flags.remove(ClusterFlags::IS_VISIBLE);
+
+            // Skip the cluster if backface culled.
+            if !cluster.flags.contains(ClusterFlags::IS_BACKFACE_VISIBLE) {
+                // For in-preserve-3d primitives and pictures, the backface visibility is
+                // evaluated relative to the containing block.
+                if let Picture3DContext::In { ancestor_index, .. } = self.context_3d {
+                    let mut face = VisibleFace::Front;
+                    frame_context.spatial_tree.get_relative_transform_with_face(
+                        cluster.spatial_node_index,
+                        ancestor_index,
+                        Some(&mut face),
+                    );
+                    if face == VisibleFace::Back {
+                        continue
+                    }
+                }
+            }
+
+            // No point including this cluster if it can't be transformed
+            let spatial_node = &frame_context
+                .spatial_tree
+                .get_spatial_node(cluster.spatial_node_index);
+            if !spatial_node.invertible {
+                continue;
+            }
+
+            // Map the cluster bounding rect into the space of the surface, and
+            // include it in the surface bounding rect.
+            surface.map_local_to_surface.set_target_spatial_node(
+                cluster.spatial_node_index,
+                frame_context.spatial_tree,
+            );
+
+            // Mark the cluster visible, since it passed the invertible and
+            // backface checks.
+            cluster.flags.insert(ClusterFlags::IS_VISIBLE);
+            if let Some(cluster_rect) = surface.map_local_to_surface.map(&cluster.bounding_rect) {
+                surface.unclipped_local_rect = surface.unclipped_local_rect.union(&cluster_rect);
+            }
+        }
+
+        // If this picture establishes a surface, then map the surface bounding
+        // rect into the parent surface coordinate space, and propagate that up
+        // to the parent.
+        if let Some(ref mut raster_config) = self.raster_config {
+            // Propagate up to parent surface, now that we know this surface's static rect
+            if let Some(parent_surface_index) = parent_surface_index {
+                let surface_rect = raster_config.composite_mode.get_coverage(
+                    surface,
+                    Some(surface.unclipped_local_rect.cast_unit()),
+                );
+
+                let parent_surface = &mut surfaces[parent_surface_index.0];
+                parent_surface.map_local_to_surface.set_target_spatial_node(
+                    self.spatial_node_index,
+                    frame_context.spatial_tree,
+                );
+
+                // Drop shadows draw both a content and shadow rect, so need to expand the local
+                // rect of any surfaces to be composited in parent surfaces correctly.
+
+                if let Some(parent_surface_rect) = parent_surface
+                    .map_local_to_surface
+                    .map(&surface_rect)
+                {
+                    parent_surface.unclipped_local_rect =
+                        parent_surface.unclipped_local_rect.union(&parent_surface_rect);
+                }
+            }
+        }
+    }
+
+    pub fn prepare_for_render(
+        &mut self,
+        frame_state: &mut FrameBuildingState,
+        data_stores: &mut DataStores,
+    ) -> bool {
+        let raster_config = match self.raster_config {
+            Some(ref mut raster_config) => raster_config,
+            None => {
+                return true
+            }
+        };
+
+        // TODO(gw): Almost all of the Picture types below use extra_gpu_cache_data
+        //           to store the same type of data. The exception is the filter
+        //           with a ColorMatrix, which stores the color matrix here. It's
+        //           probably worth tidying this code up to be a bit more consistent.
+        //           Perhaps store the color matrix after the common data, even though
+        //           it's not used by that shader.
+
+        match raster_config.composite_mode {
+            PictureCompositeMode::TileCache { .. } => {}
+            PictureCompositeMode::Filter(Filter::Blur { .. }) => {}
+            PictureCompositeMode::Filter(Filter::DropShadows(ref shadows)) => {
+                self.extra_gpu_data_handles.resize(shadows.len(), GpuCacheHandle::new());
+                for (shadow, extra_handle) in shadows.iter().zip(self.extra_gpu_data_handles.iter_mut()) {
+                    if let Some(mut request) = frame_state.gpu_cache.request(extra_handle) {
+                        let surface = &frame_state.surfaces[raster_config.surface_index.0];
+                        let prim_rect = surface.clipped_local_rect.cast_unit();
+
+                        // Basic brush primitive header is (see end of prepare_prim_for_render_inner in prim_store.rs)
+                        //  [brush specific data]
+                        //  [segment_rect, segment data]
+                        let (blur_inflation_x, blur_inflation_y) = surface.clamp_blur_radius(
+                            shadow.blur_radius,
+                            shadow.blur_radius,
+                        );
+
+                        let shadow_rect = prim_rect.inflate(
+                            blur_inflation_x * BLUR_SAMPLE_SCALE,
+                            blur_inflation_y * BLUR_SAMPLE_SCALE,
+                        ).translate(shadow.offset);
+
+                        // ImageBrush colors
+                        request.push(shadow.color.premultiplied());
+                        request.push(PremultipliedColorF::WHITE);
+                        request.push([
+                            shadow_rect.width(),
+                            shadow_rect.height(),
+                            0.0,
+                            0.0,
+                        ]);
+
+                        // segment rect / extra data
+                        request.push(shadow_rect);
+                        request.push([0.0, 0.0, 0.0, 0.0]);
+                    }
+                }
+            }
+            PictureCompositeMode::Filter(ref filter) => {
+                match *filter {
+                    Filter::ColorMatrix(ref m) => {
+                        if self.extra_gpu_data_handles.is_empty() {
+                            self.extra_gpu_data_handles.push(GpuCacheHandle::new());
+                        }
+                        if let Some(mut request) = frame_state.gpu_cache.request(&mut self.extra_gpu_data_handles[0]) {
+                            for i in 0..5 {
+                                request.push([m[i*4], m[i*4+1], m[i*4+2], m[i*4+3]]);
+                            }
+                        }
+                    }
+                    Filter::Flood(ref color) => {
+                        if self.extra_gpu_data_handles.is_empty() {
+                            self.extra_gpu_data_handles.push(GpuCacheHandle::new());
+                        }
+                        if let Some(mut request) = frame_state.gpu_cache.request(&mut self.extra_gpu_data_handles[0]) {
+                            request.push(color.to_array());
+                        }
+                    }
+                    _ => {}
+                }
+            }
+            PictureCompositeMode::ComponentTransferFilter(handle) => {
+                let filter_data = &mut data_stores.filter_data[handle];
+                filter_data.update(frame_state);
+            }
+            PictureCompositeMode::MixBlend(..) |
+            PictureCompositeMode::Blit(_) |
+            PictureCompositeMode::IntermediateSurface |
+            PictureCompositeMode::SvgFilter(..) => {}
+        }
+
+        true
+    }
+}
+
+fn get_transform_key(
+    spatial_node_index: SpatialNodeIndex,
+    cache_spatial_node_index: SpatialNodeIndex,
+    spatial_tree: &SpatialTree,
+) -> TransformKey {
+    spatial_tree.get_relative_transform(
+        spatial_node_index,
+        cache_spatial_node_index,
+    ).into()
+}
+
+/// A key for storing primitive comparison results during tile dependency tests.
+#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
+struct PrimitiveComparisonKey {
+    prev_index: PrimitiveDependencyIndex,
+    curr_index: PrimitiveDependencyIndex,
+}
+
+/// Information stored an image dependency
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ImageDependency {
+    pub key: ImageKey,
+    pub generation: ImageGeneration,
+}
+
+impl ImageDependency {
+    pub const INVALID: ImageDependency = ImageDependency {
+        key: ImageKey::DUMMY,
+        generation: ImageGeneration::INVALID,
+    };
+}
+
+/// In some cases, we need to know the dirty rect of all tiles in order
+/// to correctly invalidate a primitive.
+#[derive(Debug)]
+struct DeferredDirtyTest {
+    /// The tile rect that the primitive being checked affects
+    tile_rect: TileRect,
+    /// The picture-cache local rect of the primitive being checked
+    prim_rect: PictureRect,
+}
+
+/// A helper struct to compare a primitive and all its sub-dependencies.
+struct PrimitiveComparer<'a> {
+    clip_comparer: CompareHelper<'a, ItemUid>,
+    transform_comparer: CompareHelper<'a, SpatialNodeKey>,
+    image_comparer: CompareHelper<'a, ImageDependency>,
+    opacity_comparer: CompareHelper<'a, OpacityBinding>,
+    color_comparer: CompareHelper<'a, ColorBinding>,
+    resource_cache: &'a ResourceCache,
+    spatial_node_comparer: &'a mut SpatialNodeComparer,
+    opacity_bindings: &'a FastHashMap<PropertyBindingId, OpacityBindingInfo>,
+    color_bindings: &'a FastHashMap<PropertyBindingId, ColorBindingInfo>,
+}
+
+impl<'a> PrimitiveComparer<'a> {
+    fn new(
+        prev: &'a TileDescriptor,
+        curr: &'a TileDescriptor,
+        resource_cache: &'a ResourceCache,
+        spatial_node_comparer: &'a mut SpatialNodeComparer,
+        opacity_bindings: &'a FastHashMap<PropertyBindingId, OpacityBindingInfo>,
+        color_bindings: &'a FastHashMap<PropertyBindingId, ColorBindingInfo>,
+    ) -> Self {
+        let clip_comparer = CompareHelper::new(
+            &prev.clips,
+            &curr.clips,
+        );
+
+        let transform_comparer = CompareHelper::new(
+            &prev.transforms,
+            &curr.transforms,
+        );
+
+        let image_comparer = CompareHelper::new(
+            &prev.images,
+            &curr.images,
+        );
+
+        let opacity_comparer = CompareHelper::new(
+            &prev.opacity_bindings,
+            &curr.opacity_bindings,
+        );
+
+        let color_comparer = CompareHelper::new(
+            &prev.color_bindings,
+            &curr.color_bindings,
+        );
+
+        PrimitiveComparer {
+            clip_comparer,
+            transform_comparer,
+            image_comparer,
+            opacity_comparer,
+            color_comparer,
+            resource_cache,
+            spatial_node_comparer,
+            opacity_bindings,
+            color_bindings,
+        }
+    }
+
+    fn reset(&mut self) {
+        self.clip_comparer.reset();
+        self.transform_comparer.reset();
+        self.image_comparer.reset();
+        self.opacity_comparer.reset();
+        self.color_comparer.reset();
+    }
+
+    fn advance_prev(&mut self, prim: &PrimitiveDescriptor) {
+        self.clip_comparer.advance_prev(prim.clip_dep_count);
+        self.transform_comparer.advance_prev(prim.transform_dep_count);
+        self.image_comparer.advance_prev(prim.image_dep_count);
+        self.opacity_comparer.advance_prev(prim.opacity_binding_dep_count);
+        self.color_comparer.advance_prev(prim.color_binding_dep_count);
+    }
+
+    fn advance_curr(&mut self, prim: &PrimitiveDescriptor) {
+        self.clip_comparer.advance_curr(prim.clip_dep_count);
+        self.transform_comparer.advance_curr(prim.transform_dep_count);
+        self.image_comparer.advance_curr(prim.image_dep_count);
+        self.opacity_comparer.advance_curr(prim.opacity_binding_dep_count);
+        self.color_comparer.advance_curr(prim.color_binding_dep_count);
+    }
+
+    /// Check if two primitive descriptors are the same.
+    fn compare_prim(
+        &mut self,
+        prev: &PrimitiveDescriptor,
+        curr: &PrimitiveDescriptor,
+    ) -> PrimitiveCompareResult {
+        let resource_cache = self.resource_cache;
+        let spatial_node_comparer = &mut self.spatial_node_comparer;
+        let opacity_bindings = self.opacity_bindings;
+        let color_bindings = self.color_bindings;
+
+        // Check equality of the PrimitiveDescriptor
+        if prev != curr {
+            return PrimitiveCompareResult::Descriptor;
+        }
+
+        // Check if any of the clips  this prim has are different.
+        if !self.clip_comparer.is_same(
+            prev.clip_dep_count,
+            curr.clip_dep_count,
+            |prev, curr| {
+                prev == curr
+            },
+        ) {
+            return PrimitiveCompareResult::Clip;
+        }
+
+        // Check if any of the transforms  this prim has are different.
+        if !self.transform_comparer.is_same(
+            prev.transform_dep_count,
+            curr.transform_dep_count,
+            |prev, curr| {
+                spatial_node_comparer.are_transforms_equivalent(prev, curr)
+            },
+        ) {
+            return PrimitiveCompareResult::Transform;
+        }
+
+        // Check if any of the images this prim has are different.
+        if !self.image_comparer.is_same(
+            prev.image_dep_count,
+            curr.image_dep_count,
+            |prev, curr| {
+                prev == curr &&
+                resource_cache.get_image_generation(curr.key) == curr.generation
+            },
+        ) {
+            return PrimitiveCompareResult::Image;
+        }
+
+        // Check if any of the opacity bindings this prim has are different.
+        if !self.opacity_comparer.is_same(
+            prev.opacity_binding_dep_count,
+            curr.opacity_binding_dep_count,
+            |prev, curr| {
+                if prev != curr {
+                    return false;
+                }
+
+                if let OpacityBinding::Binding(id) = curr {
+                    if opacity_bindings
+                        .get(id)
+                        .map_or(true, |info| info.changed) {
+                        return false;
+                    }
+                }
+
+                true
+            },
+        ) {
+            return PrimitiveCompareResult::OpacityBinding;
+        }
+
+        // Check if any of the color bindings this prim has are different.
+        if !self.color_comparer.is_same(
+            prev.color_binding_dep_count,
+            curr.color_binding_dep_count,
+            |prev, curr| {
+                if prev != curr {
+                    return false;
+                }
+
+                if let ColorBinding::Binding(id) = curr {
+                    if color_bindings
+                        .get(id)
+                        .map_or(true, |info| info.changed) {
+                        return false;
+                    }
+                }
+
+                true
+            },
+        ) {
+            return PrimitiveCompareResult::ColorBinding;
+        }
+
+        PrimitiveCompareResult::Equal
+    }
+}
+
+/// Details for a node in a quadtree that tracks dirty rects for a tile.
+#[cfg_attr(any(feature="capture",feature="replay"), derive(Clone))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum TileNodeKind {
+    Leaf {
+        /// The index buffer of primitives that affected this tile previous frame
+        #[cfg_attr(any(feature = "capture", feature = "replay"), serde(skip))]
+        prev_indices: Vec<PrimitiveDependencyIndex>,
+        /// The index buffer of primitives that affect this tile on this frame
+        #[cfg_attr(any(feature = "capture", feature = "replay"), serde(skip))]
+        curr_indices: Vec<PrimitiveDependencyIndex>,
+        /// A bitset of which of the last 64 frames have been dirty for this leaf.
+        #[cfg_attr(any(feature = "capture", feature = "replay"), serde(skip))]
+        dirty_tracker: u64,
+        /// The number of frames since this node split or merged.
+        #[cfg_attr(any(feature = "capture", feature = "replay"), serde(skip))]
+        frames_since_modified: usize,
+    },
+    Node {
+        /// The four children of this node
+        children: Vec<TileNode>,
+    },
+}
+
+/// The kind of modification that a tile wants to do
+#[derive(Copy, Clone, PartialEq, Debug)]
+enum TileModification {
+    Split,
+    Merge,
+}
+
+/// A node in the dirty rect tracking quadtree.
+#[cfg_attr(any(feature="capture",feature="replay"), derive(Clone))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct TileNode {
+    /// Leaf or internal node
+    pub kind: TileNodeKind,
+    /// Rect of this node in the same space as the tile cache picture
+    pub rect: PictureBox2D,
+}
+
+impl TileNode {
+    /// Construct a new leaf node, with the given primitive dependency index buffer
+    fn new_leaf(curr_indices: Vec<PrimitiveDependencyIndex>) -> Self {
+        TileNode {
+            kind: TileNodeKind::Leaf {
+                prev_indices: Vec::new(),
+                curr_indices,
+                dirty_tracker: 0,
+                frames_since_modified: 0,
+            },
+            rect: PictureBox2D::zero(),
+        }
+    }
+
+    /// Draw debug information about this tile node
+    fn draw_debug_rects(
+        &self,
+        pic_to_world_mapper: &SpaceMapper<PicturePixel, WorldPixel>,
+        is_opaque: bool,
+        local_valid_rect: PictureRect,
+        scratch: &mut PrimitiveScratchBuffer,
+        global_device_pixel_scale: DevicePixelScale,
+    ) {
+        match self.kind {
+            TileNodeKind::Leaf { dirty_tracker, .. } => {
+                let color = if (dirty_tracker & 1) != 0 {
+                    debug_colors::RED
+                } else if is_opaque {
+                    debug_colors::GREEN
+                } else {
+                    debug_colors::YELLOW
+                };
+
+                if let Some(local_rect) = local_valid_rect.intersection(&self.rect) {
+                    let world_rect = pic_to_world_mapper
+                        .map(&local_rect)
+                        .unwrap();
+                    let device_rect = world_rect * global_device_pixel_scale;
+
+                    let outer_color = color.scale_alpha(0.3);
+                    let inner_color = outer_color.scale_alpha(0.5);
+                    scratch.push_debug_rect(
+                        device_rect.inflate(-3.0, -3.0),
+                        outer_color,
+                        inner_color
+                    );
+                }
+            }
+            TileNodeKind::Node { ref children, .. } => {
+                for child in children.iter() {
+                    child.draw_debug_rects(
+                        pic_to_world_mapper,
+                        is_opaque,
+                        local_valid_rect,
+                        scratch,
+                        global_device_pixel_scale,
+                    );
+                }
+            }
+        }
+    }
+
+    /// Calculate the four child rects for a given node
+    fn get_child_rects(
+        rect: &PictureBox2D,
+        result: &mut [PictureBox2D; 4],
+    ) {
+        let p0 = rect.min;
+        let p1 = rect.max;
+        let pc = p0 + rect.size() * 0.5;
+
+        *result = [
+            PictureBox2D::new(
+                p0,
+                pc,
+            ),
+            PictureBox2D::new(
+                PicturePoint::new(pc.x, p0.y),
+                PicturePoint::new(p1.x, pc.y),
+            ),
+            PictureBox2D::new(
+                PicturePoint::new(p0.x, pc.y),
+                PicturePoint::new(pc.x, p1.y),
+            ),
+            PictureBox2D::new(
+                pc,
+                p1,
+            ),
+        ];
+    }
+
+    /// Called during pre_update, to clear the current dependencies
+    fn clear(
+        &mut self,
+        rect: PictureBox2D,
+    ) {
+        self.rect = rect;
+
+        match self.kind {
+            TileNodeKind::Leaf { ref mut prev_indices, ref mut curr_indices, ref mut dirty_tracker, ref mut frames_since_modified } => {
+                // Swap current dependencies to be the previous frame
+                mem::swap(prev_indices, curr_indices);
+                curr_indices.clear();
+                // Note that another frame has passed in the dirty bit trackers
+                *dirty_tracker = *dirty_tracker << 1;
+                *frames_since_modified += 1;
+            }
+            TileNodeKind::Node { ref mut children, .. } => {
+                let mut child_rects = [PictureBox2D::zero(); 4];
+                TileNode::get_child_rects(&rect, &mut child_rects);
+                assert_eq!(child_rects.len(), children.len());
+
+                for (child, rect) in children.iter_mut().zip(child_rects.iter()) {
+                    child.clear(*rect);
+                }
+            }
+        }
+    }
+
+    /// Add a primitive dependency to this node
+    fn add_prim(
+        &mut self,
+        index: PrimitiveDependencyIndex,
+        prim_rect: &PictureBox2D,
+    ) {
+        match self.kind {
+            TileNodeKind::Leaf { ref mut curr_indices, .. } => {
+                curr_indices.push(index);
+            }
+            TileNodeKind::Node { ref mut children, .. } => {
+                for child in children.iter_mut() {
+                    if child.rect.intersects(prim_rect) {
+                        child.add_prim(index, prim_rect);
+                    }
+                }
+            }
+        }
+    }
+
+    /// Apply a merge or split operation to this tile, if desired
+    fn maybe_merge_or_split(
+        &mut self,
+        level: i32,
+        curr_prims: &[PrimitiveDescriptor],
+        max_split_levels: i32,
+    ) {
+        // Determine if this tile wants to split or merge
+        let mut tile_mod = None;
+
+        fn get_dirty_frames(
+            dirty_tracker: u64,
+            frames_since_modified: usize,
+        ) -> Option<u32> {
+            // Only consider splitting or merging at least 64 frames since we last changed
+            if frames_since_modified > 64 {
+                // Each bit in the tracker is a frame that was recently invalidated
+                Some(dirty_tracker.count_ones())
+            } else {
+                None
+            }
+        }
+
+        match self.kind {
+            TileNodeKind::Leaf { dirty_tracker, frames_since_modified, .. } => {
+                // Only consider splitting if the tree isn't too deep.
+                if level < max_split_levels {
+                    if let Some(dirty_frames) = get_dirty_frames(dirty_tracker, frames_since_modified) {
+                        // If the tile has invalidated > 50% of the recent number of frames, split.
+                        if dirty_frames > 32 {
+                            tile_mod = Some(TileModification::Split);
+                        }
+                    }
+                }
+            }
+            TileNodeKind::Node { ref children, .. } => {
+                // There's two conditions that cause a node to merge its children:
+                // (1) If _all_ the child nodes are constantly invalidating, then we are wasting
+                //     CPU time tracking dependencies for each child, so merge them.
+                // (2) If _none_ of the child nodes are recently invalid, then the page content
+                //     has probably changed, and we no longer need to track fine grained dependencies here.
+
+                let mut static_count = 0;
+                let mut changing_count = 0;
+
+                for child in children {
+                    // Only consider merging nodes at the edge of the tree.
+                    if let TileNodeKind::Leaf { dirty_tracker, frames_since_modified, .. } = child.kind {
+                        if let Some(dirty_frames) = get_dirty_frames(dirty_tracker, frames_since_modified) {
+                            if dirty_frames == 0 {
+                                // Hasn't been invalidated for some time
+                                static_count += 1;
+                            } else if dirty_frames == 64 {
+                                // Is constantly being invalidated
+                                changing_count += 1;
+                            }
+                        }
+                    }
+
+                    // Only merge if all the child tiles are in agreement. Otherwise, we have some
+                    // that are invalidating / static, and it's worthwhile tracking dependencies for
+                    // them individually.
+                    if static_count == 4 || changing_count == 4 {
+                        tile_mod = Some(TileModification::Merge);
+                    }
+                }
+            }
+        }
+
+        match tile_mod {
+            Some(TileModification::Split) => {
+                // To split a node, take the current dependency index buffer for this node, and
+                // split it into child index buffers.
+                let curr_indices = match self.kind {
+                    TileNodeKind::Node { .. } => {
+                        unreachable!("bug - only leaves can split");
+                    }
+                    TileNodeKind::Leaf { ref mut curr_indices, .. } => {
+                        curr_indices.take()
+                    }
+                };
+
+                let mut child_rects = [PictureBox2D::zero(); 4];
+                TileNode::get_child_rects(&self.rect, &mut child_rects);
+
+                let mut child_indices = [
+                    Vec::new(),
+                    Vec::new(),
+                    Vec::new(),
+                    Vec::new(),
+                ];
+
+                // Step through the index buffer, and add primitives to each of the children
+                // that they intersect.
+                for index in curr_indices {
+                    let prim = &curr_prims[index.0 as usize];
+                    for (child_rect, indices) in child_rects.iter().zip(child_indices.iter_mut()) {
+                        if prim.prim_clip_box.intersects(child_rect) {
+                            indices.push(index);
+                        }
+                    }
+                }
+
+                // Create the child nodes and switch from leaf -> node.
+                let children = child_indices
+                    .iter_mut()
+                    .map(|i| TileNode::new_leaf(mem::replace(i, Vec::new())))
+                    .collect();
+
+                self.kind = TileNodeKind::Node {
+                    children,
+                };
+            }
+            Some(TileModification::Merge) => {
+                // Construct a merged index buffer by collecting the dependency index buffers
+                // from each child, and merging them into a de-duplicated index buffer.
+                let merged_indices = match self.kind {
+                    TileNodeKind::Node { ref mut children, .. } => {
+                        let mut merged_indices = Vec::new();
+
+                        for child in children.iter() {
+                            let child_indices = match child.kind {
+                                TileNodeKind::Leaf { ref curr_indices, .. } => {
+                                    curr_indices
+                                }
+                                TileNodeKind::Node { .. } => {
+                                    unreachable!("bug: child is not a leaf");
+                                }
+                            };
+                            merged_indices.extend_from_slice(child_indices);
+                        }
+
+                        merged_indices.sort();
+                        merged_indices.dedup();
+
+                        merged_indices
+                    }
+                    TileNodeKind::Leaf { .. } => {
+                        unreachable!("bug - trying to merge a leaf");
+                    }
+                };
+
+                // Switch from a node to a leaf, with the combined index buffer
+                self.kind = TileNodeKind::Leaf {
+                    prev_indices: Vec::new(),
+                    curr_indices: merged_indices,
+                    dirty_tracker: 0,
+                    frames_since_modified: 0,
+                };
+            }
+            None => {
+                // If this node didn't merge / split, then recurse into children
+                // to see if they want to split / merge.
+                if let TileNodeKind::Node { ref mut children, .. } = self.kind {
+                    for child in children.iter_mut() {
+                        child.maybe_merge_or_split(
+                            level+1,
+                            curr_prims,
+                            max_split_levels,
+                        );
+                    }
+                }
+            }
+        }
+    }
+
+    /// Update the dirty state of this node, building the overall dirty rect
+    fn update_dirty_rects(
+        &mut self,
+        prev_prims: &[PrimitiveDescriptor],
+        curr_prims: &[PrimitiveDescriptor],
+        prim_comparer: &mut PrimitiveComparer,
+        dirty_rect: &mut PictureBox2D,
+        compare_cache: &mut FastHashMap<PrimitiveComparisonKey, PrimitiveCompareResult>,
+        invalidation_reason: &mut Option<InvalidationReason>,
+        frame_context: &FrameVisibilityContext,
+    ) {
+        match self.kind {
+            TileNodeKind::Node { ref mut children, .. } => {
+                for child in children.iter_mut() {
+                    child.update_dirty_rects(
+                        prev_prims,
+                        curr_prims,
+                        prim_comparer,
+                        dirty_rect,
+                        compare_cache,
+                        invalidation_reason,
+                        frame_context,
+                    );
+                }
+            }
+            TileNodeKind::Leaf { ref prev_indices, ref curr_indices, ref mut dirty_tracker, .. } => {
+                // If the index buffers are of different length, they must be different
+                if prev_indices.len() == curr_indices.len() {
+                    let mut prev_i0 = 0;
+                    let mut prev_i1 = 0;
+                    prim_comparer.reset();
+
+                    // Walk each index buffer, comparing primitives
+                    for (prev_index, curr_index) in prev_indices.iter().zip(curr_indices.iter()) {
+                        let i0 = prev_index.0 as usize;
+                        let i1 = curr_index.0 as usize;
+
+                        // Advance the dependency arrays for each primitive (this handles
+                        // prims that may be skipped by these index buffers).
+                        for i in prev_i0 .. i0 {
+                            prim_comparer.advance_prev(&prev_prims[i]);
+                        }
+                        for i in prev_i1 .. i1 {
+                            prim_comparer.advance_curr(&curr_prims[i]);
+                        }
+
+                        // Compare the primitives, caching the result in a hash map
+                        // to save comparisons in other tree nodes.
+                        let key = PrimitiveComparisonKey {
+                            prev_index: *prev_index,
+                            curr_index: *curr_index,
+                        };
+
+                        let prim_compare_result = *compare_cache
+                            .entry(key)
+                            .or_insert_with(|| {
+                                let prev = &prev_prims[i0];
+                                let curr = &curr_prims[i1];
+                                prim_comparer.compare_prim(prev, curr)
+                            });
+
+                        // If not the same, mark this node as dirty and update the dirty rect
+                        if prim_compare_result != PrimitiveCompareResult::Equal {
+                            if invalidation_reason.is_none() {
+                                *invalidation_reason = Some(InvalidationReason::Content);
+                            }
+                            *dirty_rect = self.rect.union(dirty_rect);
+                            *dirty_tracker = *dirty_tracker | 1;
+                            break;
+                        }
+
+                        prev_i0 = i0;
+                        prev_i1 = i1;
+                    }
+                } else {
+                    if invalidation_reason.is_none() {
+                        *invalidation_reason = Some(InvalidationReason::PrimCount);
+                    }
+                    *dirty_rect = self.rect.union(dirty_rect);
+                    *dirty_tracker = *dirty_tracker | 1;
+                }
+            }
+        }
+    }
+}
+
+impl CompositeState {
+    // A helper function to destroy all native surfaces for a given list of tiles
+    pub fn destroy_native_tiles<'a, I: Iterator<Item = &'a mut Box<Tile>>>(
+        &mut self,
+        tiles_iter: I,
+        resource_cache: &mut ResourceCache,
+    ) {
+        // Any old tiles that remain after the loop above are going to be dropped. For
+        // simple composite mode, the texture cache handle will expire and be collected
+        // by the texture cache. For native compositor mode, we need to explicitly
+        // invoke a callback to the client to destroy that surface.
+        if let CompositorKind::Native { .. } = self.compositor_kind {
+            for tile in tiles_iter {
+                // Only destroy native surfaces that have been allocated. It's
+                // possible for display port tiles to be created that never
+                // come on screen, and thus never get a native surface allocated.
+                if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { ref mut id, .. }, .. }) = tile.surface {
+                    if let Some(id) = id.take() {
+                        resource_cache.destroy_compositor_tile(id);
+                    }
+                }
+            }
+        }
+    }
+}
+
+fn get_relative_scale_offset(
+    child_spatial_node_index: SpatialNodeIndex,
+    parent_spatial_node_index: SpatialNodeIndex,
+    spatial_tree: &SpatialTree,
+) -> ScaleOffset {
+    let transform = spatial_tree.get_relative_transform(
+        child_spatial_node_index,
+        parent_spatial_node_index,
+    );
+    let mut scale_offset = match transform {
+        CoordinateSpaceMapping::Local => ScaleOffset::identity(),
+        CoordinateSpaceMapping::ScaleOffset(scale_offset) => scale_offset,
+        CoordinateSpaceMapping::Transform(m) => {
+            ScaleOffset::from_transform(&m).expect("bug: pictures caches don't support complex transforms")
+        }
+    };
+
+    // Compositors expect things to be aligned on device pixels. Logic at a higher level ensures that is
+    // true, but floating point inaccuracy can sometimes result in small differences, so remove
+    // them here.
+    scale_offset.offset = scale_offset.offset.round();
+
+    scale_offset
+}
+
+pub fn calculate_screen_uv(
+    p: DevicePoint,
+    clipped: DeviceRect,
+) -> DeviceHomogeneousVector {
+    // TODO(gw): Switch to a simple mix, no bilerp / homogeneous vec needed anymore
+    DeviceHomogeneousVector::new(
+        (p.x - clipped.min.x) / (clipped.max.x - clipped.min.x),
+        (p.y - clipped.min.y) / (clipped.max.y - clipped.min.y),
+        0.0,
+        1.0,
+    )
+}
+
+fn get_surface_rects(
+    surface_index: SurfaceIndex,
+    composite_mode: &PictureCompositeMode,
+    parent_surface_index: SurfaceIndex,
+    surfaces: &mut [SurfaceInfo],
+    spatial_tree: &SpatialTree,
+    max_surface_size: f32,
+) -> Option<SurfaceAllocInfo> {
+    let parent_surface = &surfaces[parent_surface_index.0];
+
+    let local_to_parent = SpaceMapper::new_with_target(
+        parent_surface.surface_spatial_node_index,
+        surfaces[surface_index.0].surface_spatial_node_index,
+        parent_surface.clipping_rect,
+        spatial_tree,
+    );
+
+    let local_clip_rect = local_to_parent
+        .unmap(&parent_surface.clipping_rect)
+        .unwrap_or(PictureRect::max_rect())
+        .cast_unit();
+
+    let surface = &mut surfaces[surface_index.0];
+
+    let (clipped_local, unclipped_local) = match composite_mode {
+        PictureCompositeMode::Filter(Filter::DropShadows(ref shadows)) => {
+            let local_prim_rect = surface.clipped_local_rect;
+
+            let mut required_local_rect = match local_prim_rect.intersection(&local_clip_rect) {
+                Some(rect) => rect,
+                None => return None,
+            };
+
+            for shadow in shadows {
+                let (blur_radius_x, blur_radius_y) = surface.clamp_blur_radius(
+                    shadow.blur_radius,
+                    shadow.blur_radius,
+                );
+                let blur_inflation_x = blur_radius_x * BLUR_SAMPLE_SCALE;
+                let blur_inflation_y = blur_radius_y * BLUR_SAMPLE_SCALE;
+
+                let local_shadow_rect = local_prim_rect
+                    .translate(shadow.offset.cast_unit());
+
+                if let Some(clipped_shadow_rect) = local_clip_rect.intersection(&local_shadow_rect) {
+                    let required_shadow_rect = clipped_shadow_rect.inflate(blur_inflation_x, blur_inflation_y);
+
+                    let local_clipped_shadow_rect = required_shadow_rect.translate(-shadow.offset.cast_unit());
+
+                    required_local_rect = required_local_rect.union(&local_clipped_shadow_rect);
+                }
+            }
+
+            let unclipped = composite_mode.get_rect(surface, None);
+            let clipped = required_local_rect;
+
+            let clipped = match clipped.intersection(&unclipped.cast_unit()) {
+                Some(rect) => rect,
+                None => return None,
+            };
+
+            (clipped, unclipped)
+        }
+        _ => {
+            let surface_origin = surface.clipped_local_rect.min.to_vector().cast_unit();
+
+            let normalized_prim_rect = composite_mode
+                .get_rect(surface, None)
+                .translate(-surface_origin);
+
+            let normalized_clip_rect = local_clip_rect
+                .cast_unit()
+                .translate(-surface_origin);
+
+            let norm_clipped_rect = match normalized_prim_rect.intersection(&normalized_clip_rect) {
+                Some(rect) => rect,
+                None => return None,
+            };
+
+            let norm_clipped_rect = composite_mode.get_rect(surface, Some(norm_clipped_rect));
+
+            let norm_clipped_rect = match norm_clipped_rect.intersection(&normalized_prim_rect) {
+                Some(rect) => rect,
+                None => return None,
+            };
+
+            let unclipped = normalized_prim_rect.translate(surface_origin);
+            let clipped = norm_clipped_rect.translate(surface_origin);
+
+            (clipped.cast_unit(), unclipped.cast_unit())
+        }
+    };
+
+    let (mut clipped, mut unclipped) = if surface.raster_spatial_node_index != surface.surface_spatial_node_index {
+        assert_eq!(surface.device_pixel_scale.0, 1.0);
+
+        let local_to_world = SpaceMapper::new_with_target(
+            spatial_tree.root_reference_frame_index(),
+            surface.surface_spatial_node_index,
+            WorldRect::max_rect(),
+            spatial_tree,
+        );
+
+        let clipped = (local_to_world.map(&clipped_local.cast_unit()).unwrap() * surface.device_pixel_scale).round_out();
+        let unclipped = local_to_world.map(&unclipped_local).unwrap() * surface.device_pixel_scale;
+
+        (clipped, unclipped)
+    } else {
+        let clipped = (clipped_local.cast_unit() * surface.device_pixel_scale).round_out();
+        let unclipped = unclipped_local.cast_unit() * surface.device_pixel_scale;
+
+        (clipped, unclipped)
+    };
+
+    let task_size_f = clipped.size();
+
+    if task_size_f.width > max_surface_size || task_size_f.height > max_surface_size {
+        let max_dimension = clipped_local.width().max(clipped_local.height()).ceil();
+
+        surface.raster_spatial_node_index = surface.surface_spatial_node_index;
+        surface.device_pixel_scale = Scale::new(max_surface_size / max_dimension);
+
+        clipped = (clipped_local.cast_unit() * surface.device_pixel_scale).round();
+        unclipped = unclipped_local.cast_unit() * surface.device_pixel_scale;
+    }
+
+    let task_size = clipped.size().to_i32();
+    debug_assert!(task_size.width <= max_surface_size as i32);
+    debug_assert!(task_size.height <= max_surface_size as i32);
+
+    let uv_rect_kind = calculate_uv_rect_kind(
+        clipped,
+        unclipped,
+    );
+
+    // If the task size is zero sized, skip creation and drawing of it
+    if task_size.width == 0 || task_size.height == 0 {
+        return None;
+    }
+
+    // If the final clipped surface rect is not the same or larger as the unclipped
+    // local rect of the surface, we need to enable scissor rect (which disables
+    // merging batches between this and other render tasks allocated to the same
+    // render target). This is conservative - we could do better in future by
+    // distinguishing between clips that affect the surface itself vs. clips on
+    // child primitives that don't affect this.
+    let needs_scissor_rect = !clipped_local.contains_box(&surface.unclipped_local_rect);
+
+    Some(SurfaceAllocInfo {
+        task_size,
+        needs_scissor_rect,
+        clipped,
+        unclipped,
+        clipped_local,
+        uv_rect_kind,
+    })
+}
+
+fn calculate_uv_rect_kind(
+    clipped: DeviceRect,
+    unclipped: DeviceRect,
+) -> UvRectKind {
+    let top_left = calculate_screen_uv(
+        unclipped.top_left().cast_unit(),
+        clipped,
+    );
+
+    let top_right = calculate_screen_uv(
+        unclipped.top_right().cast_unit(),
+        clipped,
+    );
+
+    let bottom_left = calculate_screen_uv(
+        unclipped.bottom_left().cast_unit(),
+        clipped,
+    );
+
+    let bottom_right = calculate_screen_uv(
+        unclipped.bottom_right().cast_unit(),
+        clipped,
+    );
+
+    UvRectKind::Quad {
+        top_left,
+        top_right,
+        bottom_left,
+        bottom_right,
+    }
+}
+
+#[test]
+fn test_large_surface_scale_1() {
+    use crate::spatial_tree::{SceneSpatialTree, SpatialTree};
+
+    let mut cst = SceneSpatialTree::new();
+    let root_reference_frame_index = cst.root_reference_frame_index();
+
+    let mut spatial_tree = SpatialTree::new();
+    spatial_tree.apply_updates(cst.end_frame_and_get_pending_updates());
+    spatial_tree.update_tree(&SceneProperties::new());
+
+    let map_local_to_surface = SpaceMapper::new_with_target(
+        root_reference_frame_index,
+        root_reference_frame_index,
+        PictureRect::max_rect(),
+        &spatial_tree,
+    );
+
+    let mut surfaces = vec![
+        SurfaceInfo {
+            unclipped_local_rect: PictureRect::max_rect(),
+            clipped_local_rect: PictureRect::max_rect(),
+            is_opaque: true,
+            clipping_rect: PictureRect::max_rect(),
+            map_local_to_surface: map_local_to_surface.clone(),
+            raster_spatial_node_index: root_reference_frame_index,
+            surface_spatial_node_index: root_reference_frame_index,
+            device_pixel_scale: DevicePixelScale::new(1.0),
+            world_scale_factors: (1.0, 1.0),
+            local_scale: (1.0, 1.0),
+            allow_snapping: true,
+        },
+        SurfaceInfo {
+            unclipped_local_rect: PictureRect::new(
+                PicturePoint::new(52.76350021362305, 0.0),
+                PicturePoint::new(159.6738739013672, 35.0),
+            ),
+            clipped_local_rect: PictureRect::max_rect(),
+            is_opaque: true,
+            clipping_rect: PictureRect::max_rect(),
+            map_local_to_surface,
+            raster_spatial_node_index: root_reference_frame_index,
+            surface_spatial_node_index: root_reference_frame_index,
+            device_pixel_scale: DevicePixelScale::new(43.82798767089844),
+            world_scale_factors: (1.0, 1.0),
+            local_scale: (1.0, 1.0),
+            allow_snapping: true,
+        },
+    ];
+
+    get_surface_rects(
+        SurfaceIndex(1),
+        &PictureCompositeMode::Blit(BlitReason::ISOLATE),
+        SurfaceIndex(0),
+        &mut surfaces,
+        &spatial_tree,
+        MAX_SURFACE_SIZE as f32,
+    );
+}
diff --git a/gfx/wr/webrender/src/picture_graph.rs b/gfx/wr/webrender/src/picture_graph.rs
new file mode 100644
index 0000000000..1446784ca6
--- /dev/null
+++ b/gfx/wr/webrender/src/picture_graph.rs
@@ -0,0 +1,212 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::frame_builder::FrameBuildingContext;
+use crate::internal_types::FastHashMap;
+use crate::prim_store::PictureIndex;
+use crate::picture::{PicturePrimitive, SurfaceIndex, SurfaceInfo};
+use crate::picture::{TileCacheInstance, SliceId};
+use smallvec::SmallVec;
+
+#[derive(Debug)]
+pub struct PictureInfo {
+    pub update_pass: Option<usize>,
+    pub surface_index: Option<SurfaceIndex>,
+    pub parent: Option<PictureIndex>,
+}
+
+/// A graph of picture dependencies, allowing updates to be processed without recursion
+/// by building a list of passes.
+pub struct PictureGraph {
+    roots: Vec<PictureIndex>,
+    pic_info: Vec<PictureInfo>,
+    update_passes: Vec<Vec<PictureIndex>>,
+}
+
+impl PictureGraph {
+    pub fn new() -> Self {
+        PictureGraph {
+            roots: Vec::new(),
+            pic_info: Vec::new(),
+            update_passes: Vec::new(),
+        }
+    }
+
+    /// Add a root picture to the graph
+    pub fn add_root(
+        &mut self,
+        pic_index: PictureIndex,
+    ) {
+        self.roots.push(pic_index);
+    }
+
+    /// Build a list of update passes based on the dependencies between pictures
+    pub fn build_update_passes(
+        &mut self,
+        pictures: &mut [PicturePrimitive],
+        frame_context: &FrameBuildingContext
+    ) {
+        self.pic_info.clear();
+        self.pic_info.reserve(pictures.len());
+
+        for _ in 0 .. pictures.len() {
+            self.pic_info.push(PictureInfo {
+                update_pass: None,
+                parent: None,
+                surface_index: None,
+            })
+        };
+
+        let mut max_pass_index = 0;
+
+        for pic_index in &self.roots {
+            assign_update_pass(
+                *pic_index,
+                None,
+                0,
+                pictures,
+                &mut self.pic_info,
+                &mut max_pass_index,
+                frame_context,
+            );
+        }
+
+        let pass_count = max_pass_index + 1;
+
+        self.update_passes.clear();
+        self.update_passes.resize_with(pass_count, Vec::new);
+
+        for (pic_index, info) in self.pic_info.iter().enumerate() {
+            if let Some(update_pass) = info.update_pass {
+                let pass = &mut self.update_passes[update_pass];
+                pass.push(PictureIndex(pic_index));
+            }
+        }
+    }
+
+    /// Assign surfaces and scale factors to each picture (root -> leaf ordered pass)
+    pub fn assign_surfaces(
+        &mut self,
+        pictures: &mut [PicturePrimitive],
+        surfaces: &mut Vec<SurfaceInfo>,
+        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
+        frame_context: &FrameBuildingContext,
+    ) {
+        for pass in &self.update_passes {
+            for pic_index in pass {
+                let parent = self.pic_info[pic_index.0].parent;
+
+                let parent_surface_index = parent.map(|parent| {
+                    // Can unwrap here as by the time we have a parent that parent's
+                    // surface must have been assigned.
+                    self.pic_info[parent.0].surface_index.unwrap()
+                });
+
+                let info = &mut self.pic_info[pic_index.0];
+
+                match pictures[pic_index.0].assign_surface(
+                    frame_context,
+                    parent_surface_index,
+                    tile_caches,
+                    surfaces,
+                ) {
+                    Some(surface_index) => {
+                        info.surface_index = Some(surface_index);
+                    }
+                    None => {
+                        info.surface_index = Some(parent_surface_index.unwrap());
+                    }
+                }
+            }
+        }
+    }
+
+    /// Propegate bounding rects from pictures to parents (leaf -> root ordered pass)
+    pub fn propagate_bounding_rects(
+        &mut self,
+        pictures: &mut [PicturePrimitive],
+        surfaces: &mut [SurfaceInfo],
+        frame_context: &FrameBuildingContext,
+    ) {
+        for pass in self.update_passes.iter().rev() {
+            for pic_index in pass {
+                let parent = self.pic_info[pic_index.0].parent;
+
+                let surface_index = self.pic_info[pic_index.0]
+                    .surface_index
+                    .expect("bug: no surface assigned during propagate_bounding_rects");
+
+                let parent_surface_index = parent.map(|parent| {
+                    // Can unwrap here as by the time we have a parent that parent's
+                    // surface must have been assigned.
+                    self.pic_info[parent.0].surface_index.unwrap()
+                });
+
+                pictures[pic_index.0].propagate_bounding_rect(
+                    surface_index,
+                    parent_surface_index,
+                    surfaces,
+                    frame_context,
+                );
+            }
+        }
+    }
+}
+
+/// Recursive function that assigns pictures to the earliest pass possible that they
+/// can be processed in, while maintaining dependency ordering.
+fn assign_update_pass(
+    pic_index: PictureIndex,
+    parent_pic_index: Option<PictureIndex>,
+    pass: usize,
+    pictures: &mut [PicturePrimitive],
+    pic_info: &mut [PictureInfo],
+    max_pass_index: &mut usize,
+    frame_context: &FrameBuildingContext
+) {
+    let pic = &mut pictures[pic_index.0];
+    let info = &mut pic_info[pic_index.0];
+
+    info.parent = parent_pic_index;
+
+    // Run pre-update to resolve animation properties etc
+    pic.pre_update(frame_context);
+
+    let can_be_drawn = match info.update_pass {
+        Some(update_pass) => {
+            // No point in recursing into paths in the graph if this picture already
+            // has been set to update after this pass.
+            if update_pass > pass {
+                return;
+            }
+
+            true
+        }
+        None => {
+            // Check if this picture can be dropped from the graph we're building this frame
+            pic.is_visible(frame_context.spatial_tree)
+        }
+    };
+
+    if can_be_drawn {
+        info.update_pass = Some(pass);
+
+        *max_pass_index = pass.max(*max_pass_index);
+
+        let mut child_pictures: SmallVec<[PictureIndex; 8]> = SmallVec::new();
+        child_pictures.extend_from_slice(&pic.prim_list.child_pictures);
+
+        for child_pic_index in child_pictures {
+            assign_update_pass(
+                child_pic_index,
+                Some(pic_index),
+                pass + 1,
+                pictures,
+                pic_info,
+                max_pass_index,
+                frame_context,
+            );
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/picture_textures.rs b/gfx/wr/webrender/src/picture_textures.rs
new file mode 100644
index 0000000000..4345951bb9
--- /dev/null
+++ b/gfx/wr/webrender/src/picture_textures.rs
@@ -0,0 +1,382 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use std::mem;
+use smallvec::SmallVec;
+use api::{ImageFormat, ImageBufferKind, DebugFlags};
+use api::units::*;
+use crate::device::TextureFilter;
+use crate::internal_types::{
+    CacheTextureId, TextureUpdateList, Swizzle, TextureCacheAllocInfo, TextureCacheCategory,
+    TextureSource, FrameStamp, FrameId,
+};
+use crate::profiler::{self, TransactionProfile};
+use crate::gpu_types::{ImageSource, UvRectKind};
+use crate::gpu_cache::{GpuCache, GpuCacheHandle};
+use crate::freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
+
+
+#[derive(Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum PictureCacheEntryMarker {}
+
+malloc_size_of::malloc_size_of_is_0!(PictureCacheEntryMarker);
+
+pub type PictureCacheTextureHandle = WeakFreeListHandle<PictureCacheEntryMarker>;
+
+use std::cmp;
+
+// Stores information related to a single entry in the texture
+// cache. This is stored for each item whether it's in the shared
+// cache or a standalone texture.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PictureCacheEntry {
+    /// Size of the requested tile.
+    pub size: DeviceIntSize,
+    /// The last frame this item was requested for rendering.
+    // TODO(gw): This stamp is only used for picture cache tiles, and some checks
+    //           in the glyph cache eviction code. We could probably remove it
+    //           entirely in future (or move to EntryDetails::Picture).
+    pub last_access: FrameStamp,
+    /// Handle to the resource rect in the GPU cache.
+    pub uv_rect_handle: GpuCacheHandle,
+    /// Image format of the data that the entry expects.
+    pub filter: TextureFilter,
+    /// The actual device texture ID this is part of.
+    pub texture_id: CacheTextureId,
+}
+
+impl PictureCacheEntry {
+    fn update_gpu_cache(&mut self, gpu_cache: &mut GpuCache) {
+        if let Some(mut request) = gpu_cache.request(&mut self.uv_rect_handle) {
+            let origin = DeviceIntPoint::zero();
+            let image_source = ImageSource {
+                p0: origin.to_f32(),
+                p1: (origin + self.size).to_f32(),
+                uv_rect_kind: UvRectKind::Rect,
+                user_data: [0.0; 4],
+            };
+            image_source.write_gpu_blocks(&mut request);
+        }
+    }
+}
+
+/// The textures used to hold picture cache tiles.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct PictureTexture {
+    texture_id: CacheTextureId,
+    size: DeviceIntSize,
+    is_allocated: bool,
+    last_frame_used: FrameId,
+}
+
+/// The textures used to hold picture cache tiles.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PictureTextures {
+    /// Current list of textures in the pool
+    textures: Vec<PictureTexture>,
+    /// Default tile size for content tiles
+    default_tile_size: DeviceIntSize,
+    /// Number of currently allocated textures in the pool
+    allocated_texture_count: usize,
+    /// Texture filter to use for picture cache textures
+    filter: TextureFilter,
+
+    debug_flags: DebugFlags,
+
+    /// Cache of picture cache entries.
+    cache_entries: FreeList<PictureCacheEntry, PictureCacheEntryMarker>,
+    /// Strong handles for the picture_cache_entries FreeList.
+    cache_handles: Vec<FreeListHandle<PictureCacheEntryMarker>>,
+
+    now: FrameStamp,
+}
+
+impl PictureTextures {
+    pub fn new(
+        default_tile_size: DeviceIntSize,
+        filter: TextureFilter,
+    ) -> Self {
+        PictureTextures {
+            textures: Vec::new(),
+            default_tile_size,
+            allocated_texture_count: 0,
+            filter,
+            debug_flags: DebugFlags::empty(),
+            cache_entries: FreeList::new(),
+            cache_handles: Vec::new(),
+            now: FrameStamp::INVALID,
+        }
+    }
+
+    pub fn begin_frame(&mut self, stamp: FrameStamp, pending_updates: &mut TextureUpdateList) {
+        self.now = stamp;
+
+        // Expire picture cache tiles that haven't been referenced in the last frame.
+        // The picture cache code manually keeps tiles alive by calling `request` on
+        // them if it wants to retain a tile that is currently not visible.
+        self.expire_old_tiles(pending_updates);
+    }
+
+    pub fn default_tile_size(&self) -> DeviceIntSize {
+        self.default_tile_size
+    }
+
+    pub fn update(
+        &mut self,
+        tile_size: DeviceIntSize,
+        handle: &mut Option<PictureCacheTextureHandle>,
+        gpu_cache: &mut GpuCache,
+        next_texture_id: &mut CacheTextureId,
+        pending_updates: &mut TextureUpdateList,
+    ) {
+        debug_assert!(self.now.is_valid());
+        debug_assert!(tile_size.width > 0 && tile_size.height > 0);
+
+        let need_alloc = match handle {
+            None => true,
+            Some(handle) => {
+                // Check if the entry has been evicted.
+                !self.entry_exists(&handle)
+            },
+        };
+
+        if need_alloc {
+            let new_handle = self.get_or_allocate_tile(
+                tile_size,
+                next_texture_id,
+                pending_updates,
+            );
+
+            *handle = Some(new_handle);
+        }
+
+        if let Some(handle) = handle {
+            // Upload the resource rect and texture array layer.
+            self.cache_entries
+                .get_opt_mut(handle)
+                .expect("BUG: handle must be valid now")
+                .update_gpu_cache(gpu_cache);
+        } else {
+            panic!("The handle should be valid picture cache handle now")
+        }
+    }
+
+    pub fn get_or_allocate_tile(
+        &mut self,
+        tile_size: DeviceIntSize,
+        next_texture_id: &mut CacheTextureId,
+        pending_updates: &mut TextureUpdateList,
+    ) -> PictureCacheTextureHandle {
+        let mut texture_id = None;
+        self.allocated_texture_count += 1;
+
+        for texture in &mut self.textures {
+            if texture.size == tile_size && !texture.is_allocated {
+                // Found a target that's not currently in use which matches. Update
+                // the last_frame_used for GC purposes.
+                texture.is_allocated = true;
+                texture.last_frame_used = FrameId::INVALID;
+                texture_id = Some(texture.texture_id);
+                break;
+            }
+        }
+
+        // Need to create a new render target and add it to the pool
+
+        let texture_id = texture_id.unwrap_or_else(|| {
+            let texture_id = *next_texture_id;
+            next_texture_id.0 += 1;
+
+            // Push a command to allocate device storage of the right size / format.
+            let info = TextureCacheAllocInfo {
+                target: ImageBufferKind::Texture2D,
+                width: tile_size.width,
+                height: tile_size.height,
+                format: ImageFormat::RGBA8,
+                filter: self.filter,
+                is_shared_cache: false,
+                has_depth: true,
+                category: TextureCacheCategory::PictureTile,
+            };
+
+            pending_updates.push_alloc(texture_id, info);
+
+            self.textures.push(PictureTexture {
+                texture_id,
+                is_allocated: true,
+                size: tile_size,
+                last_frame_used: FrameId::INVALID,
+            });
+
+            texture_id
+        });
+
+        let cache_entry = PictureCacheEntry {
+            size: tile_size,
+            last_access: self.now,
+            uv_rect_handle: GpuCacheHandle::new(),
+            filter: self.filter,
+            texture_id,
+        };
+
+        // Add the cache entry to the picture_textures.cache_entries FreeList.
+        let strong_handle = self.cache_entries.insert(cache_entry);
+        let new_handle = strong_handle.weak();
+
+        self.cache_handles.push(strong_handle);
+
+        new_handle        
+    }
+
+    pub fn free_tile(
+        &mut self,
+        id: CacheTextureId,
+        current_frame_id: FrameId,
+        pending_updates: &mut TextureUpdateList,
+    ) {
+        self.allocated_texture_count -= 1;
+
+        let texture = self.textures
+            .iter_mut()
+            .find(|t| t.texture_id == id)
+            .expect("bug: invalid texture id");
+
+        assert!(texture.is_allocated);
+        texture.is_allocated = false;
+
+        assert_eq!(texture.last_frame_used, FrameId::INVALID);
+        texture.last_frame_used = current_frame_id;
+
+        if self.debug_flags.contains(
+            DebugFlags::TEXTURE_CACHE_DBG |
+            DebugFlags::TEXTURE_CACHE_DBG_CLEAR_EVICTED)
+        {
+            pending_updates.push_debug_clear(
+                id,
+                DeviceIntPoint::zero(),
+                texture.size.width,
+                texture.size.height,
+            );
+        }
+    }
+
+    pub fn request(&mut self, handle: &PictureCacheTextureHandle, gpu_cache: &mut GpuCache) -> bool {
+        let entry = self.cache_entries.get_opt_mut(handle);
+        let now = self.now;
+        entry.map_or(true, |entry| {
+            // If an image is requested that is already in the cache,
+            // refresh the GPU cache data associated with this item.
+            entry.last_access = now;
+            entry.update_gpu_cache(gpu_cache);
+            false
+        })
+    }
+
+    pub fn get_texture_source(&self, handle: &PictureCacheTextureHandle) -> TextureSource {
+        let entry = self.cache_entries.get_opt(handle)
+            .expect("BUG: was dropped from cache or not updated!");
+
+        debug_assert_eq!(entry.last_access, self.now);
+
+        TextureSource::TextureCache(entry.texture_id, Swizzle::default())
+    }
+
+    /// Expire picture cache tiles that haven't been referenced in the last frame.
+    /// The picture cache code manually keeps tiles alive by calling `request` on
+    /// them if it wants to retain a tile that is currently not visible.
+    pub fn expire_old_tiles(&mut self, pending_updates: &mut TextureUpdateList) {
+        for i in (0 .. self.cache_handles.len()).rev() {
+            let evict = {
+                let entry = self.cache_entries.get(
+                    &self.cache_handles[i]
+                );
+
+                // This function is called at the beginning of the frame,
+                // so we don't yet know which picture cache tiles will be
+                // requested this frame. Therefore only evict picture cache
+                // tiles which weren't requested in the *previous* frame.
+                entry.last_access.frame_id() < self.now.frame_id() - 1
+            };
+
+            if evict {
+                let handle = self.cache_handles.swap_remove(i);
+                let entry = self.cache_entries.free(handle);
+                self.free_tile(entry.texture_id, self.now.frame_id(), pending_updates);
+            }
+        }
+    }
+
+    pub fn clear(&mut self, pending_updates: &mut TextureUpdateList) {
+        for handle in mem::take(&mut self.cache_handles) {
+            let entry = self.cache_entries.free(handle);
+            self.free_tile(entry.texture_id, self.now.frame_id(), pending_updates);
+        }
+
+        for texture in self.textures.drain(..) {
+            pending_updates.push_free(texture.texture_id);
+        }
+    }
+
+    pub fn update_profile(&self, profile: &mut TransactionProfile) {
+        profile.set(profiler::PICTURE_TILES, self.textures.len());
+    }
+
+    /// Simple garbage collect of picture cache tiles
+    pub fn gc(
+        &mut self,
+        pending_updates: &mut TextureUpdateList,
+    ) {
+        // Allow the picture cache pool to keep 25% of the current allocated tile count
+        // as free textures to be reused. This ensures the allowed tile count is appropriate
+        // based on current window size.
+        let free_texture_count = self.textures.len() - self.allocated_texture_count;
+        let allowed_retained_count = (self.allocated_texture_count as f32 * 0.25).ceil() as usize;
+        let do_gc = free_texture_count > allowed_retained_count;
+
+        if do_gc {
+            // Sort the current pool by age, so that we remove oldest textures first
+            self.textures.sort_unstable_by_key(|t| cmp::Reverse(t.last_frame_used));
+
+            // We can't just use retain() because `PictureTexture` requires manual cleanup.
+            let mut allocated_targets = SmallVec::<[PictureTexture; 32]>::new();
+            let mut retained_targets = SmallVec::<[PictureTexture; 32]>::new();
+
+            for target in self.textures.drain(..) {
+                if target.is_allocated {
+                    // Allocated targets can't be collected
+                    allocated_targets.push(target);
+                } else if retained_targets.len() < allowed_retained_count {
+                    // Retain the most recently used targets up to the allowed count
+                    retained_targets.push(target);
+                } else {
+                    // The rest of the targets get freed
+                    assert_ne!(target.last_frame_used, FrameId::INVALID);
+                    pending_updates.push_free(target.texture_id);
+                }
+            }
+
+            self.textures.extend(retained_targets);
+            self.textures.extend(allocated_targets);
+        }
+    }
+
+    pub fn entry_exists(&self, handle: &PictureCacheTextureHandle) -> bool {
+        self.cache_entries.get_opt(handle).is_some()
+    }
+
+    pub fn set_debug_flags(&mut self, flags: DebugFlags) {
+        self.debug_flags = flags;
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn filter(&self) -> TextureFilter {
+        self.filter
+    }
+}
diff --git a/gfx/wr/webrender/src/prepare.rs b/gfx/wr/webrender/src/prepare.rs
new file mode 100644
index 0000000000..cca3005023
--- /dev/null
+++ b/gfx/wr/webrender/src/prepare.rs
@@ -0,0 +1,1470 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! # Prepare pass
+//!
+//! TODO: document this!
+
+use std::cmp;
+use api::{PremultipliedColorF, PropertyBinding};
+use api::{BoxShadowClipMode, BorderStyle, ClipMode};
+use api::units::*;
+use euclid::Scale;
+use smallvec::SmallVec;
+use crate::command_buffer::PrimitiveCommand;
+use crate::image_tiling::{self, Repetition};
+use crate::border::{get_max_scale_for_border, build_border_instances};
+use crate::clip::{ClipStore};
+use crate::spatial_tree::{SpatialNodeIndex, SpatialTree};
+use crate::clip::{ClipDataStore, ClipNodeFlags, ClipChainInstance, ClipItemKind};
+use crate::frame_builder::{FrameBuildingContext, FrameBuildingState, PictureContext, PictureState};
+use crate::gpu_cache::{GpuCacheHandle, GpuDataRequest};
+use crate::gpu_types::{BrushFlags};
+use crate::internal_types::{FastHashMap, PlaneSplitAnchor};
+use crate::picture::{PicturePrimitive, SliceId, ClusterFlags};
+use crate::picture::{PrimitiveList, PrimitiveCluster, SurfaceIndex, TileCacheInstance, SubpixelMode, Picture3DContext};
+use crate::prim_store::line_dec::MAX_LINE_DECORATION_RESOLUTION;
+use crate::prim_store::*;
+use crate::prim_store::gradient::GradientGpuBlockBuilder;
+use crate::render_backend::DataStores;
+use crate::render_task_graph::RenderTaskId;
+use crate::render_task_cache::RenderTaskCacheKeyKind;
+use crate::render_task_cache::{RenderTaskCacheKey, to_cache_size, RenderTaskParent};
+use crate::render_task::{RenderTaskKind, RenderTask};
+use crate::segment::SegmentBuilder;
+use crate::util::{clamp_to_scale_factor, pack_as_float};
+use crate::visibility::{compute_conservative_visible_rect, PrimitiveVisibility, VisibilityState};
+
+
+const MAX_MASK_SIZE: f32 = 4096.0;
+
+const MIN_BRUSH_SPLIT_AREA: f32 = 128.0 * 128.0;
+
+
+pub fn prepare_primitives(
+    store: &mut PrimitiveStore,
+    prim_list: &mut PrimitiveList,
+    pic_context: &PictureContext,
+    pic_state: &mut PictureState,
+    frame_context: &FrameBuildingContext,
+    frame_state: &mut FrameBuildingState,
+    data_stores: &mut DataStores,
+    scratch: &mut PrimitiveScratchBuffer,
+    tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
+    prim_instances: &mut Vec<PrimitiveInstance>,
+) {
+    profile_scope!("prepare_primitives");
+    for cluster in &mut prim_list.clusters {
+        if !cluster.flags.contains(ClusterFlags::IS_VISIBLE) {
+            continue;
+        }
+        profile_scope!("cluster");
+        pic_state.map_local_to_pic.set_target_spatial_node(
+            cluster.spatial_node_index,
+            frame_context.spatial_tree,
+        );
+
+        for prim_instance_index in cluster.prim_range() {
+            if frame_state.surface_builder.is_prim_visible_and_in_dirty_region(&prim_instances[prim_instance_index].vis) {
+
+                let plane_split_anchor = PlaneSplitAnchor::new(
+                    cluster.spatial_node_index,
+                    PrimitiveInstanceIndex(prim_instance_index as u32),
+                );
+
+                if let Some(ref prim_cmd) = prepare_prim_for_render(
+                    store,
+                    prim_instance_index,
+                    cluster,
+                    pic_context,
+                    pic_state,
+                    frame_context,
+                    frame_state,
+                    plane_split_anchor,
+                    data_stores,
+                    scratch,
+                    tile_caches,
+                    prim_instances,
+                ) {
+                    frame_state.surface_builder.push_prim(
+                        prim_cmd,
+                        cluster.spatial_node_index,
+                        &prim_instances[prim_instance_index].vis,
+                        frame_state.cmd_buffers,
+                    );
+
+                    frame_state.num_visible_primitives += 1;
+                    continue;
+                }
+            }
+
+            // TODO(gw): Technically no need to clear visibility here, since from this point it
+            //           only matters if it got added to a command buffer. Kept here for now to
+            //           make debugging simpler, but perhaps we can remove / tidy this up.
+            prim_instances[prim_instance_index].clear_visibility();
+        }
+    }
+}
+
+fn prepare_prim_for_render(
+    store: &mut PrimitiveStore,
+    prim_instance_index: usize,
+    cluster: &mut PrimitiveCluster,
+    pic_context: &PictureContext,
+    pic_state: &mut PictureState,
+    frame_context: &FrameBuildingContext,
+    frame_state: &mut FrameBuildingState,
+    plane_split_anchor: PlaneSplitAnchor,
+    data_stores: &mut DataStores,
+    scratch: &mut PrimitiveScratchBuffer,
+    tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
+    prim_instances: &mut Vec<PrimitiveInstance>,
+) -> Option<PrimitiveCommand> {
+    profile_scope!("prepare_prim_for_render");
+
+    // If we have dependencies, we need to prepare them first, in order
+    // to know the actual rect of this primitive.
+    // For example, scrolling may affect the location of an item in
+    // local space, which may force us to render this item on a larger
+    // picture target, if being composited.
+    let mut is_passthrough = false;
+    if let PrimitiveInstanceKind::Picture { pic_index, .. } = prim_instances[prim_instance_index].kind {
+        let pic = &mut store.pictures[pic_index.0];
+
+        // TODO(gw): Plan to remove pictures with no composite mode, so that we don't need
+        //           to special case for pass through pictures.
+        is_passthrough = pic.composite_mode.is_none();
+
+        match pic.take_context(
+            pic_index,
+            Some(pic_context.surface_index),
+            pic_context.subpixel_mode,
+            frame_state,
+            frame_context,
+            scratch,
+            tile_caches,
+        ) {
+            Some((pic_context_for_children, mut pic_state_for_children, mut prim_list)) => {
+                prepare_primitives(
+                    store,
+                    &mut prim_list,
+                    &pic_context_for_children,
+                    &mut pic_state_for_children,
+                    frame_context,
+                    frame_state,
+                    data_stores,
+                    scratch,
+                    tile_caches,
+                    prim_instances,
+                );
+
+                // Restore the dependencies (borrow check dance)
+                store.pictures[pic_context_for_children.pic_index.0]
+                    .restore_context(
+                        pic_context_for_children.pic_index,
+                        prim_list,
+                        pic_context_for_children,
+                        prim_instances,
+                        frame_context,
+                        frame_state,
+                    );
+            }
+            None => {
+                return None;
+            }
+        }
+    }
+
+    let prim_instance = &mut prim_instances[prim_instance_index];
+
+    if !is_passthrough {
+        let prim_rect = data_stores.get_local_prim_rect(
+            prim_instance,
+            &store.pictures,
+            frame_state.surfaces,
+        );
+
+        if !update_clip_task(
+            prim_instance,
+            &prim_rect.min,
+            cluster.spatial_node_index,
+            pic_context.raster_spatial_node_index,
+            pic_context,
+            pic_state,
+            frame_context,
+            frame_state,
+            store,
+            data_stores,
+            scratch,
+        ) {
+            return None;
+        }
+    }
+
+    Some(prepare_interned_prim_for_render(
+        store,
+        PrimitiveInstanceIndex(prim_instance_index as u32),
+        prim_instance,
+        cluster,
+        plane_split_anchor,
+        pic_context,
+        frame_context,
+        frame_state,
+        data_stores,
+        scratch,
+    ))
+}
+
+/// Prepare an interned primitive for rendering, by requesting
+/// resources, render tasks etc. This is equivalent to the
+/// prepare_prim_for_render_inner call for old style primitives.
+fn prepare_interned_prim_for_render(
+    store: &mut PrimitiveStore,
+    prim_instance_index: PrimitiveInstanceIndex,
+    prim_instance: &mut PrimitiveInstance,
+    cluster: &mut PrimitiveCluster,
+    plane_split_anchor: PlaneSplitAnchor,
+    pic_context: &PictureContext,
+    frame_context: &FrameBuildingContext,
+    frame_state: &mut FrameBuildingState,
+    data_stores: &mut DataStores,
+    scratch: &mut PrimitiveScratchBuffer,
+) -> PrimitiveCommand {
+    let prim_spatial_node_index = cluster.spatial_node_index;
+    let device_pixel_scale = frame_state.surfaces[pic_context.surface_index.0].device_pixel_scale;
+    let mut prim_cmd = None;
+
+    match &mut prim_instance.kind {
+        PrimitiveInstanceKind::LineDecoration { data_handle, ref mut render_task, .. } => {
+            profile_scope!("LineDecoration");
+            let prim_data = &mut data_stores.line_decoration[*data_handle];
+            let common_data = &mut prim_data.common;
+            let line_dec_data = &mut prim_data.kind;
+
+            // Update the template this instane references, which may refresh the GPU
+            // cache with any shared template data.
+            line_dec_data.update(common_data, frame_state);
+
+            // Work out the device pixel size to be used to cache this line decoration.
+
+            // If we have a cache key, it's a wavy / dashed / dotted line. Otherwise, it's
+            // a simple solid line.
+            if let Some(cache_key) = line_dec_data.cache_key.as_ref() {
+                // TODO(gw): These scale factors don't do a great job if the world transform
+                //           contains perspective
+                let scale = frame_context
+                    .spatial_tree
+                    .get_world_transform(prim_spatial_node_index)
+                    .scale_factors();
+
+                // Scale factors are normalized to a power of 2 to reduce the number of
+                // resolution changes.
+                // For frames with a changing scale transform round scale factors up to
+                // nearest power-of-2 boundary so that we don't keep having to redraw
+                // the content as it scales up and down. Rounding up to nearest
+                // power-of-2 boundary ensures we never scale up, only down --- avoiding
+                // jaggies. It also ensures we never scale down by more than a factor of
+                // 2, avoiding bad downscaling quality.
+                let scale_width = clamp_to_scale_factor(scale.0, false);
+                let scale_height = clamp_to_scale_factor(scale.1, false);
+                // Pick the maximum dimension as scale
+                let world_scale = LayoutToWorldScale::new(scale_width.max(scale_height));
+
+                let scale_factor = world_scale * Scale::new(1.0);
+                let mut task_size = (LayoutSize::from_au(cache_key.size) * scale_factor).ceil().to_i32();
+                if task_size.width > MAX_LINE_DECORATION_RESOLUTION as i32 ||
+                   task_size.height > MAX_LINE_DECORATION_RESOLUTION as i32 {
+                     let max_extent = cmp::max(task_size.width, task_size.height);
+                     let task_scale_factor = Scale::new(MAX_LINE_DECORATION_RESOLUTION as f32 / max_extent as f32);
+                     task_size = (LayoutSize::from_au(cache_key.size) * scale_factor * task_scale_factor)
+                                    .ceil().to_i32();
+                }
+
+                // It's plausible, due to float accuracy issues that the line decoration may be considered
+                // visible even if the scale factors are ~0. However, the render task allocation below requires
+                // that the size of the task is > 0. To work around this, ensure that the task size is at least
+                // 1x1 pixels
+                task_size.width = task_size.width.max(1);
+                task_size.height = task_size.height.max(1);
+
+                // Request a pre-rendered image task.
+                // TODO(gw): This match is a bit untidy, but it should disappear completely
+                //           once the prepare_prims and batching are unified. When that
+                //           happens, we can use the cache handle immediately, and not need
+                //           to temporarily store it in the primitive instance.
+                *render_task = Some(frame_state.resource_cache.request_render_task(
+                    RenderTaskCacheKey {
+                        size: task_size,
+                        kind: RenderTaskCacheKeyKind::LineDecoration(cache_key.clone()),
+                    },
+                    frame_state.gpu_cache,
+                    frame_state.frame_gpu_data,
+                    frame_state.rg_builder,
+                    None,
+                    false,
+                    RenderTaskParent::Surface(pic_context.surface_index),
+                    &mut frame_state.surface_builder,
+                    |rg_builder, _| {
+                        rg_builder.add().init(RenderTask::new_dynamic(
+                            task_size,
+                            RenderTaskKind::new_line_decoration(
+                                cache_key.style,
+                                cache_key.orientation,
+                                cache_key.wavy_line_thickness.to_f32_px(),
+                                LayoutSize::from_au(cache_key.size),
+                            ),
+                        ))
+                    }
+                ));
+            }
+        }
+        PrimitiveInstanceKind::TextRun { run_index, data_handle, .. } => {
+            profile_scope!("TextRun");
+            let prim_data = &mut data_stores.text_run[*data_handle];
+            let run = &mut store.text_runs[*run_index];
+
+            prim_data.common.may_need_repetition = false;
+
+            // The glyph transform has to match `glyph_transform` in "ps_text_run" shader.
+            // It's relative to the rasterizing space of a glyph.
+            let transform = frame_context.spatial_tree
+                .get_relative_transform(
+                    prim_spatial_node_index,
+                    pic_context.raster_spatial_node_index,
+                )
+                .into_fast_transform();
+            let prim_offset = prim_data.common.prim_rect.min.to_vector() - run.reference_frame_relative_offset;
+
+            let surface = &frame_state.surfaces[pic_context.surface_index.0];
+
+            // If subpixel AA is disabled due to the backing surface the glyphs
+            // are being drawn onto, disable it (unless we are using the
+            // specifial subpixel mode that estimates background color).
+            let allow_subpixel = match prim_instance.vis.state {
+                VisibilityState::Culled |
+                VisibilityState::Unset |
+                VisibilityState::PassThrough => {
+                    panic!("bug: invalid visibility state");
+                }
+                VisibilityState::Visible { sub_slice_index, .. } => {
+                    // For now, we only allow subpixel AA on primary sub-slices. In future we
+                    // may support other sub-slices if we find content that does this.
+                    if sub_slice_index.is_primary() {
+                        match pic_context.subpixel_mode {
+                            SubpixelMode::Allow => true,
+                            SubpixelMode::Deny => false,
+                            SubpixelMode::Conditional { allowed_rect } => {
+                                // Conditional mode allows subpixel AA to be enabled for this
+                                // text run, so long as it's inside the allowed rect.
+                                allowed_rect.contains_box(&prim_instance.vis.clip_chain.pic_coverage_rect)
+                            }
+                        }
+                    } else {
+                        false
+                    }
+                }
+            };
+
+            run.request_resources(
+                prim_offset,
+                &prim_data.font,
+                &prim_data.glyphs,
+                &transform.to_transform().with_destination::<_>(),
+                surface,
+                prim_spatial_node_index,
+                allow_subpixel,
+                frame_context.fb_config.low_quality_pinch_zoom,
+                frame_state.resource_cache,
+                frame_state.gpu_cache,
+                frame_context.spatial_tree,
+                scratch,
+            );
+
+            // Update the template this instane references, which may refresh the GPU
+            // cache with any shared template data.
+            prim_data.update(frame_state);
+        }
+        PrimitiveInstanceKind::Clear { data_handle, .. } => {
+            profile_scope!("Clear");
+            let prim_data = &mut data_stores.prim[*data_handle];
+
+            prim_data.common.may_need_repetition = false;
+
+            // Update the template this instane references, which may refresh the GPU
+            // cache with any shared template data.
+            prim_data.update(frame_state, frame_context.scene_properties);
+        }
+        PrimitiveInstanceKind::NormalBorder { data_handle, ref mut render_task_ids, .. } => {
+            profile_scope!("NormalBorder");
+            let prim_data = &mut data_stores.normal_border[*data_handle];
+            let common_data = &mut prim_data.common;
+            let border_data = &mut prim_data.kind;
+
+            common_data.may_need_repetition =
+                matches!(border_data.border.top.style, BorderStyle::Dotted | BorderStyle::Dashed) ||
+                matches!(border_data.border.right.style, BorderStyle::Dotted | BorderStyle::Dashed) ||
+                matches!(border_data.border.bottom.style, BorderStyle::Dotted | BorderStyle::Dashed) ||
+                matches!(border_data.border.left.style, BorderStyle::Dotted | BorderStyle::Dashed);
+
+
+            // Update the template this instance references, which may refresh the GPU
+            // cache with any shared template data.
+            border_data.update(common_data, frame_state);
+
+            // TODO(gw): For now, the scale factors to rasterize borders at are
+            //           based on the true world transform of the primitive. When
+            //           raster roots with local scale are supported in future,
+            //           that will need to be accounted for here.
+            let scale = frame_context
+                .spatial_tree
+                .get_world_transform(prim_spatial_node_index)
+                .scale_factors();
+
+            // Scale factors are normalized to a power of 2 to reduce the number of
+            // resolution changes.
+            // For frames with a changing scale transform round scale factors up to
+            // nearest power-of-2 boundary so that we don't keep having to redraw
+            // the content as it scales up and down. Rounding up to nearest
+            // power-of-2 boundary ensures we never scale up, only down --- avoiding
+            // jaggies. It also ensures we never scale down by more than a factor of
+            // 2, avoiding bad downscaling quality.
+            let scale_width = clamp_to_scale_factor(scale.0, false);
+            let scale_height = clamp_to_scale_factor(scale.1, false);
+            // Pick the maximum dimension as scale
+            let world_scale = LayoutToWorldScale::new(scale_width.max(scale_height));
+            let mut scale = world_scale * device_pixel_scale;
+            let max_scale = get_max_scale_for_border(border_data);
+            scale.0 = scale.0.min(max_scale.0);
+
+            // For each edge and corner, request the render task by content key
+            // from the render task cache. This ensures that the render task for
+            // this segment will be available for batching later in the frame.
+            let mut handles: SmallVec<[RenderTaskId; 8]> = SmallVec::new();
+
+            for segment in &border_data.border_segments {
+                // Update the cache key device size based on requested scale.
+                let cache_size = to_cache_size(segment.local_task_size, &mut scale);
+                let cache_key = RenderTaskCacheKey {
+                    kind: RenderTaskCacheKeyKind::BorderSegment(segment.cache_key.clone()),
+                    size: cache_size,
+                };
+
+                handles.push(frame_state.resource_cache.request_render_task(
+                    cache_key,
+                    frame_state.gpu_cache,
+                    frame_state.frame_gpu_data,
+                    frame_state.rg_builder,
+                    None,
+                    false,          // TODO(gw): We don't calculate opacity for borders yet!
+                    RenderTaskParent::Surface(pic_context.surface_index),
+                    &mut frame_state.surface_builder,
+                    |rg_builder, _| {
+                        rg_builder.add().init(RenderTask::new_dynamic(
+                            cache_size,
+                            RenderTaskKind::new_border_segment(
+                                build_border_instances(
+                                    &segment.cache_key,
+                                    cache_size,
+                                    &border_data.border,
+                                    scale,
+                                )
+                            ),
+                        ))
+                    }
+                ));
+            }
+
+            *render_task_ids = scratch
+                .border_cache_handles
+                .extend(handles);
+        }
+        PrimitiveInstanceKind::ImageBorder { data_handle, .. } => {
+            profile_scope!("ImageBorder");
+            let prim_data = &mut data_stores.image_border[*data_handle];
+
+            // TODO: get access to the ninepatch and to check whether we need support
+            // for repetitions in the shader.
+
+            // Update the template this instance references, which may refresh the GPU
+            // cache with any shared template data.
+            prim_data.kind.update(
+                &mut prim_data.common,
+                frame_state
+            );
+        }
+        PrimitiveInstanceKind::Rectangle { data_handle, segment_instance_index, color_binding_index, .. } => {
+            profile_scope!("Rectangle");
+            let prim_data = &mut data_stores.prim[*data_handle];
+            prim_data.common.may_need_repetition = false;
+
+            if *color_binding_index != ColorBindingIndex::INVALID {
+                match store.color_bindings[*color_binding_index] {
+                    PropertyBinding::Binding(..) => {
+                        // We explicitly invalidate the gpu cache
+                        // if the color is animating.
+                        let gpu_cache_handle =
+                            if *segment_instance_index == SegmentInstanceIndex::INVALID {
+                                None
+                            } else if *segment_instance_index == SegmentInstanceIndex::UNUSED {
+                                Some(&prim_data.common.gpu_cache_handle)
+                            } else {
+                                Some(&scratch.segment_instances[*segment_instance_index].gpu_cache_handle)
+                            };
+                        if let Some(gpu_cache_handle) = gpu_cache_handle {
+                            frame_state.gpu_cache.invalidate(gpu_cache_handle);
+                        }
+                    }
+                    PropertyBinding::Value(..) => {},
+                }
+            }
+
+            // Update the template this instane references, which may refresh the GPU
+            // cache with any shared template data.
+            prim_data.update(
+                frame_state,
+                frame_context.scene_properties,
+            );
+
+            write_segment(
+                *segment_instance_index,
+                frame_state,
+                &mut scratch.segments,
+                &mut scratch.segment_instances,
+                |request| {
+                    prim_data.kind.write_prim_gpu_blocks(
+                        request,
+                        frame_context.scene_properties,
+                    );
+                }
+            );
+        }
+        PrimitiveInstanceKind::YuvImage { data_handle, segment_instance_index, .. } => {
+            profile_scope!("YuvImage");
+            let prim_data = &mut data_stores.yuv_image[*data_handle];
+            let common_data = &mut prim_data.common;
+            let yuv_image_data = &mut prim_data.kind;
+
+            common_data.may_need_repetition = false;
+
+            // Update the template this instane references, which may refresh the GPU
+            // cache with any shared template data.
+            yuv_image_data.update(common_data, frame_state);
+
+            write_segment(
+                *segment_instance_index,
+                frame_state,
+                &mut scratch.segments,
+                &mut scratch.segment_instances,
+                |request| {
+                    yuv_image_data.write_prim_gpu_blocks(request);
+                }
+            );
+        }
+        PrimitiveInstanceKind::Image { data_handle, image_instance_index, .. } => {
+            profile_scope!("Image");
+
+            let prim_data = &mut data_stores.image[*data_handle];
+            let common_data = &mut prim_data.common;
+            let image_data = &mut prim_data.kind;
+            let image_instance = &mut store.images[*image_instance_index];
+
+            // Update the template this instance references, which may refresh the GPU
+            // cache with any shared template data.
+            image_data.update(
+                common_data,
+                image_instance,
+                pic_context.surface_index,
+                prim_spatial_node_index,
+                frame_state,
+                frame_context,
+                &mut prim_instance.vis,
+            );
+
+            write_segment(
+                image_instance.segment_instance_index,
+                frame_state,
+                &mut scratch.segments,
+                &mut scratch.segment_instances,
+                |request| {
+                    image_data.write_prim_gpu_blocks(request);
+                },
+            );
+        }
+        PrimitiveInstanceKind::LinearGradient { data_handle, ref mut visible_tiles_range, .. } => {
+            profile_scope!("LinearGradient");
+            let prim_data = &mut data_stores.linear_grad[*data_handle];
+
+            // Update the template this instane references, which may refresh the GPU
+            // cache with any shared template data.
+            prim_data.update(frame_state, pic_context.surface_index);
+
+            if prim_data.stretch_size.width >= prim_data.common.prim_rect.width() &&
+                prim_data.stretch_size.height >= prim_data.common.prim_rect.height() {
+
+                prim_data.common.may_need_repetition = false;
+            }
+
+            if prim_data.tile_spacing != LayoutSize::zero() {
+                // We are performing the decomposition on the CPU here, no need to
+                // have it in the shader.
+                prim_data.common.may_need_repetition = false;
+
+                *visible_tiles_range = decompose_repeated_gradient(
+                    &prim_instance.vis,
+                    &prim_data.common.prim_rect,
+                    prim_spatial_node_index,
+                    &prim_data.stretch_size,
+                    &prim_data.tile_spacing,
+                    frame_state,
+                    &mut scratch.gradient_tiles,
+                    &frame_context.spatial_tree,
+                    Some(&mut |_, mut request| {
+                        request.push([
+                            prim_data.start_point.x,
+                            prim_data.start_point.y,
+                            prim_data.end_point.x,
+                            prim_data.end_point.y,
+                        ]);
+                        request.push([
+                            pack_as_float(prim_data.extend_mode as u32),
+                            prim_data.stretch_size.width,
+                            prim_data.stretch_size.height,
+                            0.0,
+                        ]);
+                    }),
+                );
+
+                if visible_tiles_range.is_empty() {
+                    prim_instance.clear_visibility();
+                }
+            }
+
+            let stops_address = GradientGpuBlockBuilder::build(
+                prim_data.reverse_stops,
+                frame_state.frame_gpu_data,
+                &prim_data.stops,
+            );
+
+            // TODO(gw): Consider whether it's worth doing segment building
+            //           for gradient primitives.
+
+            prim_cmd = Some(PrimitiveCommand::instance(prim_instance_index, stops_address));
+        }
+        PrimitiveInstanceKind::CachedLinearGradient { data_handle, ref mut visible_tiles_range, .. } => {
+            profile_scope!("CachedLinearGradient");
+            let prim_data = &mut data_stores.linear_grad[*data_handle];
+            prim_data.common.may_need_repetition = prim_data.stretch_size.width < prim_data.common.prim_rect.width()
+                || prim_data.stretch_size.height < prim_data.common.prim_rect.height();
+
+            // Update the template this instance references, which may refresh the GPU
+            // cache with any shared template data.
+            prim_data.update(frame_state, pic_context.surface_index);
+
+            if prim_data.tile_spacing != LayoutSize::zero() {
+                prim_data.common.may_need_repetition = false;
+
+                *visible_tiles_range = decompose_repeated_gradient(
+                    &prim_instance.vis,
+                    &prim_data.common.prim_rect,
+                    prim_spatial_node_index,
+                    &prim_data.stretch_size,
+                    &prim_data.tile_spacing,
+                    frame_state,
+                    &mut scratch.gradient_tiles,
+                    &frame_context.spatial_tree,
+                    None,
+                );
+
+                if visible_tiles_range.is_empty() {
+                    prim_instance.clear_visibility();
+                }
+            }
+        }
+        PrimitiveInstanceKind::RadialGradient { data_handle, ref mut visible_tiles_range, .. } => {
+            profile_scope!("RadialGradient");
+            let prim_data = &mut data_stores.radial_grad[*data_handle];
+
+            prim_data.common.may_need_repetition = prim_data.stretch_size.width < prim_data.common.prim_rect.width()
+                || prim_data.stretch_size.height < prim_data.common.prim_rect.height();
+
+            // Update the template this instane references, which may refresh the GPU
+            // cache with any shared template data.
+            prim_data.update(frame_state, pic_context.surface_index);
+
+            if prim_data.tile_spacing != LayoutSize::zero() {
+                prim_data.common.may_need_repetition = false;
+
+                *visible_tiles_range = decompose_repeated_gradient(
+                    &prim_instance.vis,
+                    &prim_data.common.prim_rect,
+                    prim_spatial_node_index,
+                    &prim_data.stretch_size,
+                    &prim_data.tile_spacing,
+                    frame_state,
+                    &mut scratch.gradient_tiles,
+                    &frame_context.spatial_tree,
+                    None,
+                );
+
+                if visible_tiles_range.is_empty() {
+                    prim_instance.clear_visibility();
+                }
+            }
+
+            // TODO(gw): Consider whether it's worth doing segment building
+            //           for gradient primitives.
+        }
+        PrimitiveInstanceKind::ConicGradient { data_handle, ref mut visible_tiles_range, .. } => {
+            profile_scope!("ConicGradient");
+            let prim_data = &mut data_stores.conic_grad[*data_handle];
+
+            prim_data.common.may_need_repetition = prim_data.stretch_size.width < prim_data.common.prim_rect.width()
+                || prim_data.stretch_size.height < prim_data.common.prim_rect.height();
+
+            // Update the template this instane references, which may refresh the GPU
+            // cache with any shared template data.
+            prim_data.update(frame_state, pic_context.surface_index);
+
+            if prim_data.tile_spacing != LayoutSize::zero() {
+                prim_data.common.may_need_repetition = false;
+
+                *visible_tiles_range = decompose_repeated_gradient(
+                    &prim_instance.vis,
+                    &prim_data.common.prim_rect,
+                    prim_spatial_node_index,
+                    &prim_data.stretch_size,
+                    &prim_data.tile_spacing,
+                    frame_state,
+                    &mut scratch.gradient_tiles,
+                    &frame_context.spatial_tree,
+                    None,
+                );
+
+                if visible_tiles_range.is_empty() {
+                    prim_instance.clear_visibility();
+                }
+            }
+
+            // TODO(gw): Consider whether it's worth doing segment building
+            //           for gradient primitives.
+        }
+        PrimitiveInstanceKind::Picture { pic_index, segment_instance_index, .. } => {
+            profile_scope!("Picture");
+            let pic = &mut store.pictures[pic_index.0];
+
+            if pic.prepare_for_render(
+                frame_state,
+                data_stores,
+            ) {
+                if let Picture3DContext::In { root_data: None, plane_splitter_index, .. } = pic.context_3d {
+                    let dirty_rect = frame_state.current_dirty_region().combined;
+                    let splitter = &mut frame_state.plane_splitters[plane_splitter_index.0];
+                    let surface_index = pic.raster_config.as_ref().unwrap().surface_index;
+                    let surface = &frame_state.surfaces[surface_index.0];
+                    let local_prim_rect = surface.clipped_local_rect.cast_unit();
+
+                    PicturePrimitive::add_split_plane(
+                        splitter,
+                        frame_context.spatial_tree,
+                        prim_spatial_node_index,
+                        local_prim_rect,
+                        &prim_instance.vis.clip_chain.local_clip_rect,
+                        dirty_rect,
+                        plane_split_anchor,
+                    );
+                }
+
+                // If this picture uses segments, ensure the GPU cache is
+                // up to date with segment local rects.
+                // TODO(gw): This entire match statement above can now be
+                //           refactored into prepare_interned_prim_for_render.
+                if pic.can_use_segments() {
+                    write_segment(
+                        *segment_instance_index,
+                        frame_state,
+                        &mut scratch.segments,
+                        &mut scratch.segment_instances,
+                        |request| {
+                            request.push(PremultipliedColorF::WHITE);
+                            request.push(PremultipliedColorF::WHITE);
+                            request.push([
+                                -1.0,       // -ve means use prim rect for stretch size
+                                0.0,
+                                0.0,
+                                0.0,
+                            ]);
+                        }
+                    );
+                }
+            } else {
+                prim_instance.clear_visibility();
+            }
+        }
+        PrimitiveInstanceKind::BackdropCapture { .. } => {
+            // Register the owner picture of this backdrop primitive as the
+            // target for resolve of the sub-graph
+            frame_state.surface_builder.register_resolve_source();
+        }
+        PrimitiveInstanceKind::BackdropRender { pic_index, .. } => {
+            match frame_state.surface_builder.sub_graph_output_map.get(pic_index).cloned() {
+                Some(sub_graph_output_id) => {
+                    frame_state.surface_builder.add_child_render_task(
+                        sub_graph_output_id,
+                        frame_state.rg_builder,
+                    );
+                }
+                None => {
+                    // Backdrop capture was found not visible, didn't produce a sub-graph
+                    // so we can just skip drawing
+                    prim_instance.clear_visibility();
+                }
+            }
+        }
+    }
+
+    prim_cmd.unwrap_or(PrimitiveCommand::simple(prim_instance_index))
+}
+
+
+fn write_segment<F>(
+    segment_instance_index: SegmentInstanceIndex,
+    frame_state: &mut FrameBuildingState,
+    segments: &mut SegmentStorage,
+    segment_instances: &mut SegmentInstanceStorage,
+    f: F,
+) where F: Fn(&mut GpuDataRequest) {
+    debug_assert_ne!(segment_instance_index, SegmentInstanceIndex::INVALID);
+    if segment_instance_index != SegmentInstanceIndex::UNUSED {
+        let segment_instance = &mut segment_instances[segment_instance_index];
+
+        if let Some(mut request) = frame_state.gpu_cache.request(&mut segment_instance.gpu_cache_handle) {
+            let segments = &segments[segment_instance.segments_range];
+
+            f(&mut request);
+
+            for segment in segments {
+                request.write_segment(
+                    segment.local_rect,
+                    [0.0; 4],
+                );
+            }
+        }
+    }
+}
+
+fn decompose_repeated_gradient(
+    prim_vis: &PrimitiveVisibility,
+    prim_local_rect: &LayoutRect,
+    prim_spatial_node_index: SpatialNodeIndex,
+    stretch_size: &LayoutSize,
+    tile_spacing: &LayoutSize,
+    frame_state: &mut FrameBuildingState,
+    gradient_tiles: &mut GradientTileStorage,
+    spatial_tree: &SpatialTree,
+    mut callback: Option<&mut dyn FnMut(&LayoutRect, GpuDataRequest)>,
+) -> GradientTileRange {
+    let tile_range = gradient_tiles.open_range();
+
+    // Tighten the clip rect because decomposing the repeated image can
+    // produce primitives that are partially covering the original image
+    // rect and we want to clip these extra parts out.
+    if let Some(tight_clip_rect) = prim_vis
+        .clip_chain
+        .local_clip_rect
+        .intersection(prim_local_rect) {
+
+        let visible_rect = compute_conservative_visible_rect(
+            &prim_vis.clip_chain,
+            frame_state.current_dirty_region().combined,
+            prim_spatial_node_index,
+            spatial_tree,
+        );
+        let stride = *stretch_size + *tile_spacing;
+
+        let repetitions = image_tiling::repetitions(prim_local_rect, &visible_rect, stride);
+        gradient_tiles.reserve(repetitions.num_repetitions());
+        for Repetition { origin, .. } in repetitions {
+            let mut handle = GpuCacheHandle::new();
+            let rect = LayoutRect::from_origin_and_size(
+                origin,
+                *stretch_size,
+            );
+
+            if let Some(callback) = &mut callback {
+                if let Some(request) = frame_state.gpu_cache.request(&mut handle) {
+                    callback(&rect, request);
+                }
+            }
+
+            gradient_tiles.push(VisibleGradientTile {
+                local_rect: rect,
+                local_clip_rect: tight_clip_rect,
+                handle
+            });
+        }
+    }
+
+    // At this point if we don't have tiles to show it means we could probably
+    // have done a better a job at culling during an earlier stage.
+    gradient_tiles.close_range(tile_range)
+}
+
+
+fn update_clip_task_for_brush(
+    instance: &PrimitiveInstance,
+    prim_origin: &LayoutPoint,
+    prim_spatial_node_index: SpatialNodeIndex,
+    root_spatial_node_index: SpatialNodeIndex,
+    pic_context: &PictureContext,
+    pic_state: &mut PictureState,
+    frame_context: &FrameBuildingContext,
+    frame_state: &mut FrameBuildingState,
+    prim_store: &PrimitiveStore,
+    data_stores: &mut DataStores,
+    segments_store: &mut SegmentStorage,
+    segment_instances_store: &mut SegmentInstanceStorage,
+    clip_mask_instances: &mut Vec<ClipMaskKind>,
+    device_pixel_scale: DevicePixelScale,
+) -> Option<ClipTaskIndex> {
+    let segments = match instance.kind {
+        PrimitiveInstanceKind::TextRun { .. } |
+        PrimitiveInstanceKind::Clear { .. } |
+        PrimitiveInstanceKind::LineDecoration { .. } |
+        PrimitiveInstanceKind::BackdropCapture { .. } |
+        PrimitiveInstanceKind::BackdropRender { .. } => {
+            return None;
+        }
+        PrimitiveInstanceKind::Image { image_instance_index, .. } => {
+            let segment_instance_index = prim_store
+                .images[image_instance_index]
+                .segment_instance_index;
+
+            if segment_instance_index == SegmentInstanceIndex::UNUSED {
+                return None;
+            }
+
+            let segment_instance = &segment_instances_store[segment_instance_index];
+
+            &segments_store[segment_instance.segments_range]
+        }
+        PrimitiveInstanceKind::Picture { segment_instance_index, .. } => {
+            // Pictures may not support segment rendering at all (INVALID)
+            // or support segment rendering but choose not to due to size
+            // or some other factor (UNUSED).
+            if segment_instance_index == SegmentInstanceIndex::UNUSED ||
+               segment_instance_index == SegmentInstanceIndex::INVALID {
+                return None;
+            }
+
+            let segment_instance = &segment_instances_store[segment_instance_index];
+            &segments_store[segment_instance.segments_range]
+        }
+        PrimitiveInstanceKind::YuvImage { segment_instance_index, .. } |
+        PrimitiveInstanceKind::Rectangle { segment_instance_index, .. } => {
+            debug_assert!(segment_instance_index != SegmentInstanceIndex::INVALID);
+
+            if segment_instance_index == SegmentInstanceIndex::UNUSED {
+                return None;
+            }
+
+            let segment_instance = &segment_instances_store[segment_instance_index];
+
+            &segments_store[segment_instance.segments_range]
+        }
+        PrimitiveInstanceKind::ImageBorder { data_handle, .. } => {
+            let border_data = &data_stores.image_border[data_handle].kind;
+
+            // TODO: This is quite messy - once we remove legacy primitives we
+            //       can change this to be a tuple match on (instance, template)
+            border_data.brush_segments.as_slice()
+        }
+        PrimitiveInstanceKind::NormalBorder { data_handle, .. } => {
+            let border_data = &data_stores.normal_border[data_handle].kind;
+
+            // TODO: This is quite messy - once we remove legacy primitives we
+            //       can change this to be a tuple match on (instance, template)
+            border_data.brush_segments.as_slice()
+        }
+        PrimitiveInstanceKind::LinearGradient { data_handle, .. }
+        | PrimitiveInstanceKind::CachedLinearGradient { data_handle, .. } => {
+            let prim_data = &data_stores.linear_grad[data_handle];
+
+            // TODO: This is quite messy - once we remove legacy primitives we
+            //       can change this to be a tuple match on (instance, template)
+            if prim_data.brush_segments.is_empty() {
+                return None;
+            }
+
+            prim_data.brush_segments.as_slice()
+        }
+        PrimitiveInstanceKind::RadialGradient { data_handle, .. } => {
+            let prim_data = &data_stores.radial_grad[data_handle];
+
+            // TODO: This is quite messy - once we remove legacy primitives we
+            //       can change this to be a tuple match on (instance, template)
+            if prim_data.brush_segments.is_empty() {
+                return None;
+            }
+
+            prim_data.brush_segments.as_slice()
+        }
+        PrimitiveInstanceKind::ConicGradient { data_handle, .. } => {
+            let prim_data = &data_stores.conic_grad[data_handle];
+
+            // TODO: This is quite messy - once we remove legacy primitives we
+            //       can change this to be a tuple match on (instance, template)
+            if prim_data.brush_segments.is_empty() {
+                return None;
+            }
+
+            prim_data.brush_segments.as_slice()
+        }
+    };
+
+    // If there are no segments, early out to avoid setting a valid
+    // clip task instance location below.
+    if segments.is_empty() {
+        return None;
+    }
+
+    // Set where in the clip mask instances array the clip mask info
+    // can be found for this primitive. Each segment will push the
+    // clip mask information for itself in update_clip_task below.
+    let clip_task_index = ClipTaskIndex(clip_mask_instances.len() as _);
+
+    // If we only built 1 segment, there is no point in re-running
+    // the clip chain builder. Instead, just use the clip chain
+    // instance that was built for the main primitive. This is a
+    // significant optimization for the common case.
+    if segments.len() == 1 {
+        let clip_mask_kind = update_brush_segment_clip_task(
+            &segments[0],
+            Some(&instance.vis.clip_chain),
+            root_spatial_node_index,
+            pic_context.surface_index,
+            frame_context,
+            frame_state,
+            &mut data_stores.clip,
+            device_pixel_scale,
+        );
+        clip_mask_instances.push(clip_mask_kind);
+    } else {
+        let dirty_world_rect = frame_state.current_dirty_region().combined;
+
+        for segment in segments {
+            // Build a clip chain for the smaller segment rect. This will
+            // often manage to eliminate most/all clips, and sometimes
+            // clip the segment completely.
+            frame_state.clip_store.set_active_clips_from_clip_chain(
+                &instance.vis.clip_chain,
+                prim_spatial_node_index,
+                &frame_context.spatial_tree,
+                &data_stores.clip,
+            );
+
+            let segment_clip_chain = frame_state
+                .clip_store
+                .build_clip_chain_instance(
+                    segment.local_rect.translate(prim_origin.to_vector()),
+                    &pic_state.map_local_to_pic,
+                    &pic_state.map_pic_to_world,
+                    &frame_context.spatial_tree,
+                    frame_state.gpu_cache,
+                    frame_state.resource_cache,
+                    device_pixel_scale,
+                    &dirty_world_rect,
+                    &mut data_stores.clip,
+                    false,
+                );
+
+            let clip_mask_kind = update_brush_segment_clip_task(
+                &segment,
+                segment_clip_chain.as_ref(),
+                root_spatial_node_index,
+                pic_context.surface_index,
+                frame_context,
+                frame_state,
+                &mut data_stores.clip,
+                device_pixel_scale,
+            );
+            clip_mask_instances.push(clip_mask_kind);
+        }
+    }
+
+    Some(clip_task_index)
+}
+
+pub fn update_clip_task(
+    instance: &mut PrimitiveInstance,
+    prim_origin: &LayoutPoint,
+    prim_spatial_node_index: SpatialNodeIndex,
+    root_spatial_node_index: SpatialNodeIndex,
+    pic_context: &PictureContext,
+    pic_state: &mut PictureState,
+    frame_context: &FrameBuildingContext,
+    frame_state: &mut FrameBuildingState,
+    prim_store: &mut PrimitiveStore,
+    data_stores: &mut DataStores,
+    scratch: &mut PrimitiveScratchBuffer,
+) -> bool {
+    let device_pixel_scale = frame_state.surfaces[pic_context.surface_index.0].device_pixel_scale;
+
+    build_segments_if_needed(
+        instance,
+        frame_state,
+        prim_store,
+        data_stores,
+        &mut scratch.segments,
+        &mut scratch.segment_instances,
+    );
+
+    // First try to  render this primitive's mask using optimized brush rendering.
+    instance.vis.clip_task_index = if let Some(clip_task_index) = update_clip_task_for_brush(
+        instance,
+        prim_origin,
+        prim_spatial_node_index,
+        root_spatial_node_index,
+        pic_context,
+        pic_state,
+        frame_context,
+        frame_state,
+        prim_store,
+        data_stores,
+        &mut scratch.segments,
+        &mut scratch.segment_instances,
+        &mut scratch.clip_mask_instances,
+        device_pixel_scale,
+    ) {
+        clip_task_index
+    } else if instance.vis.clip_chain.needs_mask {
+        // Get a minimal device space rect, clipped to the screen that we
+        // need to allocate for the clip mask, as well as interpolated
+        // snap offsets.
+        let unadjusted_device_rect = match frame_state.surfaces[pic_context.surface_index.0].get_surface_rect(
+            &instance.vis.clip_chain.pic_coverage_rect,
+            frame_context.spatial_tree,
+        ) {
+            Some(rect) => rect,
+            None => return false,
+        };
+
+        let (device_rect, device_pixel_scale) = adjust_mask_scale_for_max_size(
+            unadjusted_device_rect,
+            device_pixel_scale,
+        );
+        let clip_task_id = RenderTaskKind::new_mask(
+            device_rect,
+            instance.vis.clip_chain.clips_range,
+            root_spatial_node_index,
+            frame_state.clip_store,
+            frame_state.gpu_cache,
+            frame_state.frame_gpu_data,
+            frame_state.resource_cache,
+            frame_state.rg_builder,
+            &mut data_stores.clip,
+            device_pixel_scale,
+            frame_context.fb_config,
+            &mut frame_state.surface_builder,
+        );
+        // Set the global clip mask instance for this primitive.
+        let clip_task_index = ClipTaskIndex(scratch.clip_mask_instances.len() as _);
+        scratch.clip_mask_instances.push(ClipMaskKind::Mask(clip_task_id));
+        instance.vis.clip_task_index = clip_task_index;
+        frame_state.surface_builder.add_child_render_task(
+            clip_task_id,
+            frame_state.rg_builder,
+        );
+        clip_task_index
+    } else {
+        ClipTaskIndex::INVALID
+    };
+
+    true
+}
+
+/// Write out to the clip mask instances array the correct clip mask
+/// config for this segment.
+pub fn update_brush_segment_clip_task(
+    segment: &BrushSegment,
+    clip_chain: Option<&ClipChainInstance>,
+    root_spatial_node_index: SpatialNodeIndex,
+    surface_index: SurfaceIndex,
+    frame_context: &FrameBuildingContext,
+    frame_state: &mut FrameBuildingState,
+    clip_data_store: &mut ClipDataStore,
+    device_pixel_scale: DevicePixelScale,
+) -> ClipMaskKind {
+    let clip_chain = match clip_chain {
+        Some(chain) => chain,
+        None => return ClipMaskKind::Clipped,
+    };
+    if !clip_chain.needs_mask ||
+       (!segment.may_need_clip_mask && !clip_chain.has_non_local_clips) {
+        return ClipMaskKind::None;
+    }
+
+    let device_rect = match frame_state.surfaces[surface_index.0].get_surface_rect(
+        &clip_chain.pic_coverage_rect,
+        frame_context.spatial_tree,
+    ) {
+        Some(rect) => rect,
+        None => return ClipMaskKind::Clipped,
+    };
+
+    let (device_rect, device_pixel_scale) = adjust_mask_scale_for_max_size(device_rect, device_pixel_scale);
+
+    let clip_task_id = RenderTaskKind::new_mask(
+        device_rect,
+        clip_chain.clips_range,
+        root_spatial_node_index,
+        frame_state.clip_store,
+        frame_state.gpu_cache,
+        frame_state.frame_gpu_data,
+        frame_state.resource_cache,
+        frame_state.rg_builder,
+        clip_data_store,
+        device_pixel_scale,
+        frame_context.fb_config,
+        &mut frame_state.surface_builder,
+    );
+
+    frame_state.surface_builder.add_child_render_task(
+        clip_task_id,
+        frame_state.rg_builder,
+    );
+    ClipMaskKind::Mask(clip_task_id)
+}
+
+
+fn write_brush_segment_description(
+    prim_local_rect: LayoutRect,
+    prim_local_clip_rect: LayoutRect,
+    clip_chain: &ClipChainInstance,
+    segment_builder: &mut SegmentBuilder,
+    clip_store: &ClipStore,
+    data_stores: &DataStores,
+) -> bool {
+    // If the brush is small, we want to skip building segments
+    // and just draw it as a single primitive with clip mask.
+    if prim_local_rect.area() < MIN_BRUSH_SPLIT_AREA {
+        return false;
+    }
+
+    // NOTE: The local clip rect passed to the segment builder must be the unmodified
+    //       local clip rect from the clip leaf, not the local_clip_rect from the
+    //       clip-chain instance. The clip-chain instance may have been reduced by
+    //       clips that are in the same coordinate system, but not the same spatial
+    //       node as the primitive. This can result in the clip for the segment building
+    //       being affected by scrolling clips, which we can't handle (since the segments
+    //       are not invalidated during frame building after being built).
+    segment_builder.initialize(
+        prim_local_rect,
+        None,
+        prim_local_clip_rect,
+    );
+
+    // Segment the primitive on all the local-space clip sources that we can.
+    for i in 0 .. clip_chain.clips_range.count {
+        let clip_instance = clip_store
+            .get_instance_from_range(&clip_chain.clips_range, i);
+        let clip_node = &data_stores.clip[clip_instance.handle];
+
+        // If this clip item is positioned by another positioning node, its relative position
+        // could change during scrolling. This means that we would need to resegment. Instead
+        // of doing that, only segment with clips that have the same positioning node.
+        // TODO(mrobinson, #2858): It may make sense to include these nodes, resegmenting only
+        // when necessary while scrolling.
+        if !clip_instance.flags.contains(ClipNodeFlags::SAME_SPATIAL_NODE) {
+            continue;
+        }
+
+        let (local_clip_rect, radius, mode) = match clip_node.item.kind {
+            ClipItemKind::RoundedRectangle { rect, radius, mode } => {
+                (rect, Some(radius), mode)
+            }
+            ClipItemKind::Rectangle { rect, mode } => {
+                (rect, None, mode)
+            }
+            ClipItemKind::BoxShadow { ref source } => {
+                // For inset box shadows, we can clip out any
+                // pixels that are inside the shadow region
+                // and are beyond the inner rect, as they can't
+                // be affected by the blur radius.
+                let inner_clip_mode = match source.clip_mode {
+                    BoxShadowClipMode::Outset => None,
+                    BoxShadowClipMode::Inset => Some(ClipMode::ClipOut),
+                };
+
+                // Push a region into the segment builder where the
+                // box-shadow can have an effect on the result. This
+                // ensures clip-mask tasks get allocated for these
+                // pixel regions, even if no other clips affect them.
+                segment_builder.push_mask_region(
+                    source.prim_shadow_rect,
+                    source.prim_shadow_rect.inflate(
+                        -0.5 * source.original_alloc_size.width,
+                        -0.5 * source.original_alloc_size.height,
+                    ),
+                    inner_clip_mode,
+                );
+
+                continue;
+            }
+            ClipItemKind::Image { .. } => {
+                // If we encounter an image mask, bail out from segment building.
+                // It's not possible to know which parts of the primitive are affected
+                // by the mask (without inspecting the pixels). We could do something
+                // better here in the future if it ever shows up as a performance issue
+                // (for instance, at least segment based on the bounding rect of the
+                // image mask if it's non-repeating).
+                return false;
+            }
+        };
+
+        segment_builder.push_clip_rect(local_clip_rect, radius, mode);
+    }
+
+    true
+}
+
+fn build_segments_if_needed(
+    instance: &mut PrimitiveInstance,
+    frame_state: &mut FrameBuildingState,
+    prim_store: &mut PrimitiveStore,
+    data_stores: &DataStores,
+    segments_store: &mut SegmentStorage,
+    segment_instances_store: &mut SegmentInstanceStorage,
+) {
+    let prim_clip_chain = &instance.vis.clip_chain;
+
+    // Usually, the primitive rect can be found from information
+    // in the instance and primitive template.
+    let prim_local_rect = data_stores.get_local_prim_rect(
+        instance,
+        &prim_store.pictures,
+        frame_state.surfaces,
+    );
+
+    let segment_instance_index = match instance.kind {
+        PrimitiveInstanceKind::Rectangle { ref mut segment_instance_index, .. } |
+        PrimitiveInstanceKind::YuvImage { ref mut segment_instance_index, .. } => {
+            segment_instance_index
+        }
+        PrimitiveInstanceKind::Image { data_handle, image_instance_index, .. } => {
+            let image_data = &data_stores.image[data_handle].kind;
+            let image_instance = &mut prim_store.images[image_instance_index];
+            //Note: tiled images don't support automatic segmentation,
+            // they strictly produce one segment per visible tile instead.
+            if frame_state
+                .resource_cache
+                .get_image_properties(image_data.key)
+                .and_then(|properties| properties.tiling)
+                .is_some()
+            {
+                image_instance.segment_instance_index = SegmentInstanceIndex::UNUSED;
+                return;
+            }
+            &mut image_instance.segment_instance_index
+        }
+        PrimitiveInstanceKind::Picture { ref mut segment_instance_index, pic_index, .. } => {
+            let pic = &mut prim_store.pictures[pic_index.0];
+
+            // If this picture supports segment rendering
+            if pic.can_use_segments() {
+                // If the segments have been invalidated, ensure the current
+                // index of segments is invalid. This ensures that the segment
+                // building logic below will be run.
+                if !pic.segments_are_valid {
+                    *segment_instance_index = SegmentInstanceIndex::INVALID;
+                    pic.segments_are_valid = true;
+                }
+
+                segment_instance_index
+            } else {
+                return;
+            }
+        }
+        PrimitiveInstanceKind::TextRun { .. } |
+        PrimitiveInstanceKind::NormalBorder { .. } |
+        PrimitiveInstanceKind::ImageBorder { .. } |
+        PrimitiveInstanceKind::Clear { .. } |
+        PrimitiveInstanceKind::LinearGradient { .. } |
+        PrimitiveInstanceKind::CachedLinearGradient { .. } |
+        PrimitiveInstanceKind::RadialGradient { .. } |
+        PrimitiveInstanceKind::ConicGradient { .. } |
+        PrimitiveInstanceKind::LineDecoration { .. } |
+        PrimitiveInstanceKind::BackdropCapture { .. } |
+        PrimitiveInstanceKind::BackdropRender { .. } => {
+            // These primitives don't support / need segments.
+            return;
+        }
+    };
+
+    if *segment_instance_index == SegmentInstanceIndex::INVALID {
+        let mut segments: SmallVec<[BrushSegment; 8]> = SmallVec::new();
+        let clip_leaf = frame_state.clip_tree.get_leaf(instance.clip_leaf_id);
+
+        if write_brush_segment_description(
+            prim_local_rect,
+            clip_leaf.local_clip_rect,
+            prim_clip_chain,
+            &mut frame_state.segment_builder,
+            frame_state.clip_store,
+            data_stores,
+        ) {
+            frame_state.segment_builder.build(|segment| {
+                segments.push(
+                    BrushSegment::new(
+                        segment.rect.translate(-prim_local_rect.min.to_vector()),
+                        segment.has_mask,
+                        segment.edge_flags,
+                        [0.0; 4],
+                        BrushFlags::PERSPECTIVE_INTERPOLATION,
+                    ),
+                );
+            });
+        }
+
+        // If only a single segment is produced, there is no benefit to writing
+        // a segment instance array. Instead, just use the main primitive rect
+        // written into the GPU cache.
+        // TODO(gw): This is (sortof) a bandaid - due to a limitation in the current
+        //           brush encoding, we can only support a total of up to 2^16 segments.
+        //           This should be (more than) enough for any real world case, so for
+        //           now we can handle this by skipping cases where we were generating
+        //           segments where there is no benefit. The long term / robust fix
+        //           for this is to move the segment building to be done as a more
+        //           limited nine-patch system during scene building, removing arbitrary
+        //           segmentation during frame-building (see bug #1617491).
+        if segments.len() <= 1 {
+            *segment_instance_index = SegmentInstanceIndex::UNUSED;
+        } else {
+            let segments_range = segments_store.extend(segments);
+
+            let instance = SegmentedInstance {
+                segments_range,
+                gpu_cache_handle: GpuCacheHandle::new(),
+            };
+
+            *segment_instance_index = segment_instances_store.push(instance);
+        };
+    }
+}
+
+// Ensures that the size of mask render tasks are within MAX_MASK_SIZE.
+fn adjust_mask_scale_for_max_size(device_rect: DeviceRect, device_pixel_scale: DevicePixelScale) -> (DeviceRect, DevicePixelScale) {
+    if device_rect.width() > MAX_MASK_SIZE || device_rect.height() > MAX_MASK_SIZE {
+        // round_out will grow by 1 integer pixel if origin is on a
+        // fractional position, so keep that margin for error with -1:
+        let scale = (MAX_MASK_SIZE - 1.0) /
+            f32::max(device_rect.width(), device_rect.height());
+        let new_device_pixel_scale = device_pixel_scale * Scale::new(scale);
+        let new_device_rect = (device_rect.to_f32() * Scale::new(scale))
+            .round_out();
+        (new_device_rect, new_device_pixel_scale)
+    } else {
+        (device_rect, device_pixel_scale)
+    }
+}
+
diff --git a/gfx/wr/webrender/src/prim_store/backdrop.rs b/gfx/wr/webrender/src/prim_store/backdrop.rs
new file mode 100644
index 0000000000..7c106e47bc
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/backdrop.rs
@@ -0,0 +1,175 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::units::*;
+use crate::intern::{Internable, InternDebug, Handle as InternHandle};
+use crate::internal_types::LayoutPrimitiveInfo;
+use crate::prim_store::{
+    InternablePrimitive, PrimitiveInstanceKind, PrimKey, PrimTemplate,
+    PrimTemplateCommonData, PrimitiveStore, PictureIndex,
+};
+use crate::scene_building::IsVisible;
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, PartialEq, MallocSizeOf, Hash)]
+pub struct BackdropCapture {
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, PartialEq, MallocSizeOf, Hash)]
+pub struct BackdropRender {
+}
+
+impl From<BackdropCapture> for BackdropCaptureData {
+    fn from(_backdrop: BackdropCapture) -> Self {
+        BackdropCaptureData {
+        }
+    }
+}
+
+impl From<BackdropRender> for BackdropRenderData {
+    fn from(_backdrop: BackdropRender) -> Self {
+        BackdropRenderData {
+        }
+    }
+}
+
+pub type BackdropCaptureKey = PrimKey<BackdropCapture>;
+pub type BackdropRenderKey = PrimKey<BackdropRender>;
+
+impl BackdropCaptureKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        backdrop_capture: BackdropCapture,
+    ) -> Self {
+        BackdropCaptureKey {
+            common: info.into(),
+            kind: backdrop_capture,
+        }
+    }
+}
+
+impl BackdropRenderKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        backdrop_render: BackdropRender,
+    ) -> Self {
+        BackdropRenderKey {
+            common: info.into(),
+            kind: backdrop_render,
+        }
+    }
+}
+
+impl InternDebug for BackdropCaptureKey {}
+impl InternDebug for BackdropRenderKey {}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, MallocSizeOf)]
+pub struct BackdropCaptureData {
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, MallocSizeOf)]
+pub struct BackdropRenderData {
+}
+
+pub type BackdropCaptureTemplate = PrimTemplate<BackdropCaptureData>;
+pub type BackdropRenderTemplate = PrimTemplate<BackdropRenderData>;
+
+impl From<BackdropCaptureKey> for BackdropCaptureTemplate {
+    fn from(backdrop: BackdropCaptureKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(backdrop.common);
+
+        BackdropCaptureTemplate {
+            common,
+            kind: backdrop.kind.into(),
+        }
+    }
+}
+
+impl From<BackdropRenderKey> for BackdropRenderTemplate {
+    fn from(backdrop: BackdropRenderKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(backdrop.common);
+
+        BackdropRenderTemplate {
+            common,
+            kind: backdrop.kind.into(),
+        }
+    }
+}
+
+pub type BackdropCaptureDataHandle = InternHandle<BackdropCapture>;
+pub type BackdropRenderDataHandle = InternHandle<BackdropRender>;
+
+impl Internable for BackdropCapture {
+    type Key = BackdropCaptureKey;
+    type StoreData = BackdropCaptureTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_BACKDROP_CAPTURES;
+}
+
+impl Internable for BackdropRender {
+    type Key = BackdropRenderKey;
+    type StoreData = BackdropRenderTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_BACKDROP_RENDERS;
+}
+
+impl InternablePrimitive for BackdropCapture {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> BackdropCaptureKey {
+        BackdropCaptureKey::new(info, self)
+    }
+
+    fn make_instance_kind(
+        _key: BackdropCaptureKey,
+        data_handle: BackdropCaptureDataHandle,
+        _prim_store: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        PrimitiveInstanceKind::BackdropCapture {
+            data_handle,
+        }
+    }
+}
+
+impl InternablePrimitive for BackdropRender {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> BackdropRenderKey {
+        BackdropRenderKey::new(info, self)
+    }
+
+    fn make_instance_kind(
+        _key: BackdropRenderKey,
+        data_handle: BackdropRenderDataHandle,
+        _prim_store: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        PrimitiveInstanceKind::BackdropRender {
+            data_handle,
+            pic_index: PictureIndex::INVALID,
+        }
+    }
+}
+
+impl IsVisible for BackdropCapture {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
+
+impl IsVisible for BackdropRender {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
diff --git a/gfx/wr/webrender/src/prim_store/borders.rs b/gfx/wr/webrender/src/prim_store/borders.rs
new file mode 100644
index 0000000000..7459dd75e1
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/borders.rs
@@ -0,0 +1,387 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{NormalBorder, PremultipliedColorF, Shadow, RasterSpace};
+use api::units::*;
+use crate::border::create_border_segments;
+use crate::border::NormalBorderAu;
+use crate::scene_building::{CreateShadow, IsVisible};
+use crate::frame_builder::{FrameBuildingState};
+use crate::gpu_cache::GpuDataRequest;
+use crate::intern;
+use crate::internal_types::{LayoutPrimitiveInfo, FrameId};
+use crate::prim_store::{
+    BorderSegmentInfo, BrushSegment, NinePatchDescriptor, PrimKey,
+    PrimTemplate, PrimTemplateCommonData,
+    PrimitiveInstanceKind, PrimitiveOpacity,
+    PrimitiveStore, InternablePrimitive,
+};
+use crate::resource_cache::ImageRequest;
+use crate::render_task::RenderTask;
+use crate::render_task_graph::RenderTaskId;
+
+use super::storage;
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+pub struct NormalBorderPrim {
+    pub border: NormalBorderAu,
+    pub widths: LayoutSideOffsetsAu,
+}
+
+pub type NormalBorderKey = PrimKey<NormalBorderPrim>;
+
+impl NormalBorderKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        normal_border: NormalBorderPrim,
+    ) -> Self {
+        NormalBorderKey {
+            common: info.into(),
+            kind: normal_border,
+        }
+    }
+}
+
+impl intern::InternDebug for NormalBorderKey {}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct NormalBorderData {
+    pub brush_segments: Vec<BrushSegment>,
+    pub border_segments: Vec<BorderSegmentInfo>,
+    pub border: NormalBorder,
+    pub widths: LayoutSideOffsets,
+}
+
+impl NormalBorderData {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        common: &mut PrimTemplateCommonData,
+        frame_state: &mut FrameBuildingState,
+    ) {
+        if let Some(ref mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) {
+            self.write_prim_gpu_blocks(request, common.prim_rect.size());
+            self.write_segment_gpu_blocks(request);
+        }
+
+        common.opacity = PrimitiveOpacity::translucent();
+    }
+
+    fn write_prim_gpu_blocks(
+        &self,
+        request: &mut GpuDataRequest,
+        prim_size: LayoutSize
+    ) {
+        // Border primitives currently used for
+        // image borders, and run through the
+        // normal brush_image shader.
+        request.push(PremultipliedColorF::WHITE);
+        request.push(PremultipliedColorF::WHITE);
+        request.push([
+            prim_size.width,
+            prim_size.height,
+            0.0,
+            0.0,
+        ]);
+    }
+
+    fn write_segment_gpu_blocks(
+        &self,
+        request: &mut GpuDataRequest,
+    ) {
+        for segment in &self.brush_segments {
+            // has to match VECS_PER_SEGMENT
+            request.write_segment(
+                segment.local_rect,
+                segment.extra_data,
+            );
+        }
+    }
+}
+
+pub type NormalBorderTemplate = PrimTemplate<NormalBorderData>;
+
+impl From<NormalBorderKey> for NormalBorderTemplate {
+    fn from(key: NormalBorderKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(key.common);
+
+        let mut border: NormalBorder = key.kind.border.into();
+        let widths = LayoutSideOffsets::from_au(key.kind.widths);
+
+        // FIXME(emilio): Is this the best place to do this?
+        border.normalize(&widths);
+
+        let mut brush_segments = Vec::new();
+        let mut border_segments = Vec::new();
+
+        create_border_segments(
+            common.prim_rect.size(),
+            &border,
+            &widths,
+            &mut border_segments,
+            &mut brush_segments,
+        );
+
+        NormalBorderTemplate {
+            common,
+            kind: NormalBorderData {
+                brush_segments,
+                border_segments,
+                border,
+                widths,
+            }
+        }
+    }
+}
+
+pub type NormalBorderDataHandle = intern::Handle<NormalBorderPrim>;
+
+impl intern::Internable for NormalBorderPrim {
+    type Key = NormalBorderKey;
+    type StoreData = NormalBorderTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_NORMAL_BORDERS;
+}
+
+impl InternablePrimitive for NormalBorderPrim {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> NormalBorderKey {
+        NormalBorderKey::new(
+            info,
+            self,
+        )
+    }
+
+    fn make_instance_kind(
+        _key: NormalBorderKey,
+        data_handle: NormalBorderDataHandle,
+        _: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        PrimitiveInstanceKind::NormalBorder {
+            data_handle,
+            render_task_ids: storage::Range::empty(),
+        }
+    }
+}
+
+impl CreateShadow for NormalBorderPrim {
+    fn create_shadow(
+        &self,
+        shadow: &Shadow,
+        _: bool,
+        _: RasterSpace,
+    ) -> Self {
+        let border = self.border.with_color(shadow.color.into());
+        NormalBorderPrim {
+            border,
+            widths: self.widths,
+        }
+    }
+}
+
+impl IsVisible for NormalBorderPrim {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+pub struct ImageBorder {
+    #[ignore_malloc_size_of = "Arc"]
+    pub request: ImageRequest,
+    pub nine_patch: NinePatchDescriptor,
+}
+
+pub type ImageBorderKey = PrimKey<ImageBorder>;
+
+impl ImageBorderKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        image_border: ImageBorder,
+    ) -> Self {
+        ImageBorderKey {
+            common: info.into(),
+            kind: image_border,
+        }
+    }
+}
+
+impl intern::InternDebug for ImageBorderKey {}
+
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct ImageBorderData {
+    #[ignore_malloc_size_of = "Arc"]
+    pub request: ImageRequest,
+    pub brush_segments: Vec<BrushSegment>,
+    pub src_color: Option<RenderTaskId>,
+    pub frame_id: FrameId,
+    pub is_opaque: bool,
+}
+
+impl ImageBorderData {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        common: &mut PrimTemplateCommonData,
+        frame_state: &mut FrameBuildingState,
+    ) {
+        if let Some(ref mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) {
+            self.write_prim_gpu_blocks(request, &common.prim_rect.size());
+            self.write_segment_gpu_blocks(request);
+        }
+
+        let frame_id = frame_state.rg_builder.frame_id();
+        if self.frame_id != frame_id {
+            self.frame_id = frame_id;
+
+            let size = frame_state.resource_cache.request_image(
+                self.request,
+                frame_state.gpu_cache,
+            );
+
+            let task_id = frame_state.rg_builder.add().init(
+                RenderTask::new_image(size, self.request)
+            );
+
+            self.src_color = Some(task_id);
+
+            let image_properties = frame_state
+                .resource_cache
+                .get_image_properties(self.request.key);
+
+            self.is_opaque = image_properties
+                .map(|properties| properties.descriptor.is_opaque())
+                .unwrap_or(true);
+        }
+
+        common.opacity = PrimitiveOpacity { is_opaque: self.is_opaque };
+    }
+
+    fn write_prim_gpu_blocks(
+        &self,
+        request: &mut GpuDataRequest,
+        prim_size: &LayoutSize,
+    ) {
+        // Border primitives currently used for
+        // image borders, and run through the
+        // normal brush_image shader.
+        request.push(PremultipliedColorF::WHITE);
+        request.push(PremultipliedColorF::WHITE);
+        request.push([
+            prim_size.width,
+            prim_size.height,
+            0.0,
+            0.0,
+        ]);
+    }
+
+    fn write_segment_gpu_blocks(
+        &self,
+        request: &mut GpuDataRequest,
+    ) {
+        for segment in &self.brush_segments {
+            // has to match VECS_PER_SEGMENT
+            request.write_segment(
+                segment.local_rect,
+                segment.extra_data,
+            );
+        }
+    }
+}
+
+pub type ImageBorderTemplate = PrimTemplate<ImageBorderData>;
+
+impl From<ImageBorderKey> for ImageBorderTemplate {
+    fn from(key: ImageBorderKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(key.common);
+
+        let brush_segments = key.kind.nine_patch.create_segments(common.prim_rect.size());
+        ImageBorderTemplate {
+            common,
+            kind: ImageBorderData {
+                request: key.kind.request,
+                brush_segments,
+                src_color: None,
+                frame_id: FrameId::INVALID,
+                is_opaque: false,
+            }
+        }
+    }
+}
+
+pub type ImageBorderDataHandle = intern::Handle<ImageBorder>;
+
+impl intern::Internable for ImageBorder {
+    type Key = ImageBorderKey;
+    type StoreData = ImageBorderTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_IMAGE_BORDERS;
+}
+
+impl InternablePrimitive for ImageBorder {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> ImageBorderKey {
+        ImageBorderKey::new(
+            info,
+            self,
+        )
+    }
+
+    fn make_instance_kind(
+        _key: ImageBorderKey,
+        data_handle: ImageBorderDataHandle,
+        _: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        PrimitiveInstanceKind::ImageBorder {
+            data_handle
+        }
+    }
+}
+
+impl IsVisible for ImageBorder {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
+
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_struct_sizes() {
+    use std::mem;
+    // The sizes of these structures are critical for performance on a number of
+    // talos stress tests. If you get a failure here on CI, there's two possibilities:
+    // (a) You made a structure smaller than it currently is. Great work! Update the
+    //     test expectations and move on.
+    // (b) You made a structure larger. This is not necessarily a problem, but should only
+    //     be done with care, and after checking if talos performance regresses badly.
+    assert_eq!(mem::size_of::<NormalBorderPrim>(), 84, "NormalBorderPrim size changed");
+    assert_eq!(mem::size_of::<NormalBorderTemplate>(), 216, "NormalBorderTemplate size changed");
+    assert_eq!(mem::size_of::<NormalBorderKey>(), 104, "NormalBorderKey size changed");
+    assert_eq!(mem::size_of::<ImageBorder>(), 84, "ImageBorder size changed");
+    assert_eq!(mem::size_of::<ImageBorderTemplate>(), 104, "ImageBorderTemplate size changed");
+    assert_eq!(mem::size_of::<ImageBorderKey>(), 104, "ImageBorderKey size changed");
+}
diff --git a/gfx/wr/webrender/src/prim_store/gradient/conic.rs b/gfx/wr/webrender/src/prim_store/gradient/conic.rs
new file mode 100644
index 0000000000..d9c3f5d350
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/gradient/conic.rs
@@ -0,0 +1,399 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Conic gradients
+//!
+//! Specification: https://drafts.csswg.org/css-images-4/#conic-gradients
+//!
+//! Conic gradients are rendered via cached render tasks and composited with the image brush.
+
+use euclid::vec2;
+use api::{ExtendMode, GradientStop, PremultipliedColorF};
+use api::units::*;
+use crate::scene_building::IsVisible;
+use crate::frame_builder::FrameBuildingState;
+use crate::intern::{Internable, InternDebug, Handle as InternHandle};
+use crate::internal_types::LayoutPrimitiveInfo;
+use crate::prim_store::{BrushSegment, GradientTileRange};
+use crate::prim_store::{PrimitiveInstanceKind, PrimitiveOpacity, FloatKey};
+use crate::prim_store::{PrimKeyCommonData, PrimTemplateCommonData, PrimitiveStore};
+use crate::prim_store::{NinePatchDescriptor, PointKey, SizeKey, InternablePrimitive};
+use crate::render_task::{RenderTask, RenderTaskKind};
+use crate::render_task_graph::RenderTaskId;
+use crate::render_task_cache::{RenderTaskCacheKeyKind, RenderTaskCacheKey, RenderTaskParent};
+use crate::renderer::GpuBufferAddress;
+use crate::picture::{SurfaceIndex};
+
+use std::{hash, ops::{Deref, DerefMut}};
+use super::{stops_and_min_alpha, GradientStopKey, GradientGpuBlockBuilder};
+
+/// Hashable conic gradient parameters, for use during prim interning.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, MallocSizeOf, PartialEq)]
+pub struct ConicGradientParams {
+    pub angle: f32, // in radians
+    pub start_offset: f32,
+    pub end_offset: f32,
+}
+
+impl Eq for ConicGradientParams {}
+
+impl hash::Hash for ConicGradientParams {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.angle.to_bits().hash(state);
+        self.start_offset.to_bits().hash(state);
+        self.end_offset.to_bits().hash(state);
+    }
+}
+
+/// Identifying key for a line decoration.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, PartialEq, Hash, MallocSizeOf)]
+pub struct ConicGradientKey {
+    pub common: PrimKeyCommonData,
+    pub extend_mode: ExtendMode,
+    pub center: PointKey,
+    pub params: ConicGradientParams,
+    pub stretch_size: SizeKey,
+    pub stops: Vec<GradientStopKey>,
+    pub tile_spacing: SizeKey,
+    pub nine_patch: Option<Box<NinePatchDescriptor>>,
+}
+
+impl ConicGradientKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        conic_grad: ConicGradient,
+    ) -> Self {
+        ConicGradientKey {
+            common: info.into(),
+            extend_mode: conic_grad.extend_mode,
+            center: conic_grad.center,
+            params: conic_grad.params,
+            stretch_size: conic_grad.stretch_size,
+            stops: conic_grad.stops,
+            tile_spacing: conic_grad.tile_spacing,
+            nine_patch: conic_grad.nine_patch,
+        }
+    }
+}
+
+impl InternDebug for ConicGradientKey {}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct ConicGradientTemplate {
+    pub common: PrimTemplateCommonData,
+    pub extend_mode: ExtendMode,
+    pub center: DevicePoint,
+    pub params: ConicGradientParams,
+    pub task_size: DeviceIntSize,
+    pub scale: DeviceVector2D,
+    pub stretch_size: LayoutSize,
+    pub tile_spacing: LayoutSize,
+    pub brush_segments: Vec<BrushSegment>,
+    pub stops_opacity: PrimitiveOpacity,
+    pub stops: Vec<GradientStop>,
+    pub src_color: Option<RenderTaskId>,
+}
+
+impl Deref for ConicGradientTemplate {
+    type Target = PrimTemplateCommonData;
+    fn deref(&self) -> &Self::Target {
+        &self.common
+    }
+}
+
+impl DerefMut for ConicGradientTemplate {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.common
+    }
+}
+
+impl From<ConicGradientKey> for ConicGradientTemplate {
+    fn from(item: ConicGradientKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(item.common);
+        let mut brush_segments = Vec::new();
+
+        if let Some(ref nine_patch) = item.nine_patch {
+            brush_segments = nine_patch.create_segments(common.prim_rect.size());
+        }
+
+        let (stops, min_alpha) = stops_and_min_alpha(&item.stops);
+
+        // Save opacity of the stops for use in
+        // selecting which pass this gradient
+        // should be drawn in.
+        let stops_opacity = PrimitiveOpacity::from_alpha(min_alpha);
+
+        let mut stretch_size: LayoutSize = item.stretch_size.into();
+        stretch_size.width = stretch_size.width.min(common.prim_rect.width());
+        stretch_size.height = stretch_size.height.min(common.prim_rect.height());
+
+        fn approx_eq(a: f32, b: f32) -> bool { (a - b).abs() < 0.01 }
+
+        // Attempt to detect some of the common configurations with hard gradient stops. Allow
+        // those a higher maximum resolution to avoid the worst cases of aliasing artifacts with
+        // large conic gradients. A better solution would be to go back to rendering very large
+        // conic gradients via a brush shader instead of caching all of them (unclear whether
+        // it is important enough to warrant the better solution).
+        let mut has_hard_stops = false;
+        let mut prev_stop = None;
+        let offset_range = item.params.end_offset - item.params.start_offset;
+        for stop in &stops {
+            if offset_range <= 0.0 {
+                break;
+            }
+            if let Some(prev_offset) = prev_stop {
+                // Check whether two consecutive stops are very close (hard stops).
+                if stop.offset < prev_offset + 0.005 / offset_range {
+                    // a is the angle of the stop normalized into 0-1 space and repeating in the 0-0.25 range.
+                    // If close to 0.0 or 0.25 it means the stop is vertical or horizontal. For those, the lower
+                    // resolution isn't a big issue.
+                    let a = item.params.angle / (2.0 * std::f32::consts::PI)
+                        + item.params.start_offset
+                        + stop.offset / offset_range;
+                    let a = a.rem_euclid(0.25);
+
+                    if !approx_eq(a, 0.0) && !approx_eq(a, 0.25) {
+                        has_hard_stops = true;
+                        break;
+                    }
+                }
+            }
+            prev_stop = Some(stop.offset);
+        }
+
+        let max_size = if has_hard_stops {
+            2048.0
+        } else {
+            1024.0
+        };
+
+        // Avoid rendering enormous gradients. Radial gradients are mostly made of soft transitions,
+        // so it is unlikely that rendering at a higher resolution that 1024 would produce noticeable
+        // differences, especially with 8 bits per channel.
+        let mut task_size: DeviceSize = stretch_size.cast_unit();
+        let mut scale = vec2(1.0, 1.0);
+        if task_size.width > max_size {
+            scale.x = task_size.width / max_size;
+            task_size.width = max_size;
+        }
+        if task_size.height > max_size {
+            scale.y = task_size.height / max_size;
+            task_size.height = max_size;
+        }
+
+        ConicGradientTemplate {
+            common,
+            center: DevicePoint::new(item.center.x, item.center.y),
+            extend_mode: item.extend_mode,
+            params: item.params,
+            stretch_size,
+            task_size: task_size.ceil().to_i32(),
+            scale,
+            tile_spacing: item.tile_spacing.into(),
+            brush_segments,
+            stops_opacity,
+            stops,
+            src_color: None,
+        }
+    }
+}
+
+impl ConicGradientTemplate {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        frame_state: &mut FrameBuildingState,
+        parent_surface: SurfaceIndex,
+    ) {
+        if let Some(mut request) =
+            frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) {
+            // write_prim_gpu_blocks
+            request.push(PremultipliedColorF::WHITE);
+            request.push(PremultipliedColorF::WHITE);
+            request.push([
+                self.stretch_size.width,
+                self.stretch_size.height,
+                0.0,
+                0.0,
+            ]);
+
+            // write_segment_gpu_blocks
+            for segment in &self.brush_segments {
+                // has to match VECS_PER_SEGMENT
+                request.write_segment(
+                    segment.local_rect,
+                    segment.extra_data,
+                );
+            }
+        }
+
+        let cache_key = ConicGradientCacheKey {
+            size: self.task_size,
+            center: PointKey { x: self.center.x, y: self.center.y },
+            scale: PointKey { x: self.scale.x, y: self.scale.y },
+            start_offset: FloatKey(self.params.start_offset),
+            end_offset: FloatKey(self.params.end_offset),
+            angle: FloatKey(self.params.angle),
+            extend_mode: self.extend_mode,
+            stops: self.stops.iter().map(|stop| (*stop).into()).collect(),
+        };
+
+        let task_id = frame_state.resource_cache.request_render_task(
+            RenderTaskCacheKey {
+                size: self.task_size,
+                kind: RenderTaskCacheKeyKind::ConicGradient(cache_key),
+            },
+            frame_state.gpu_cache,
+            frame_state.frame_gpu_data,
+            frame_state.rg_builder,
+            None,
+            false,
+            RenderTaskParent::Surface(parent_surface),
+            &mut frame_state.surface_builder,
+            |rg_builder, gpu_buffer_builder| {
+                let stops = GradientGpuBlockBuilder::build(
+                    false,
+                    gpu_buffer_builder,
+                    &self.stops,
+                );
+
+                rg_builder.add().init(RenderTask::new_dynamic(
+                    self.task_size,
+                    RenderTaskKind::ConicGradient(ConicGradientTask {
+                        extend_mode: self.extend_mode,
+                        scale: self.scale,
+                        center: self.center,
+                        params: self.params.clone(),
+                        stops,
+                    }),
+                ))
+            }
+        );
+
+        self.src_color = Some(task_id);
+
+        // Tile spacing is always handled by decomposing into separate draw calls so the
+        // primitive opacity is equivalent to stops opacity. This might change to being
+        // set to non-opaque in the presence of tile spacing if/when tile spacing is handled
+        // in the same way as with the image primitive.
+        self.opacity = self.stops_opacity;
+    }
+}
+
+pub type ConicGradientDataHandle = InternHandle<ConicGradient>;
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ConicGradient {
+    pub extend_mode: ExtendMode,
+    pub center: PointKey,
+    pub params: ConicGradientParams,
+    pub stretch_size: SizeKey,
+    pub stops: Vec<GradientStopKey>,
+    pub tile_spacing: SizeKey,
+    pub nine_patch: Option<Box<NinePatchDescriptor>>,
+}
+
+impl Internable for ConicGradient {
+    type Key = ConicGradientKey;
+    type StoreData = ConicGradientTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_CONIC_GRADIENTS;
+}
+
+impl InternablePrimitive for ConicGradient {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> ConicGradientKey {
+        ConicGradientKey::new(info, self)
+    }
+
+    fn make_instance_kind(
+        _key: ConicGradientKey,
+        data_handle: ConicGradientDataHandle,
+        _prim_store: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        PrimitiveInstanceKind::ConicGradient {
+            data_handle,
+            visible_tiles_range: GradientTileRange::empty(),
+        }
+    }
+}
+
+impl IsVisible for ConicGradient {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ConicGradientTask {
+    pub extend_mode: ExtendMode,
+    pub center: DevicePoint,
+    pub scale: DeviceVector2D,
+    pub params: ConicGradientParams,
+    pub stops: GpuBufferAddress,
+}
+
+impl ConicGradientTask {
+    pub fn to_instance(&self, target_rect: &DeviceIntRect) -> ConicGradientInstance {
+        ConicGradientInstance {
+            task_rect: target_rect.to_f32(),
+            center: self.center,
+            scale: self.scale,
+            start_offset: self.params.start_offset,
+            end_offset: self.params.end_offset,
+            angle: self.params.angle,
+            extend_mode: self.extend_mode as i32,
+            gradient_stops_address: self.stops.as_int(),
+        }
+    }
+}
+
+/// The per-instance shader input of a radial gradient render task.
+///
+/// Must match the RADIAL_GRADIENT instance description in renderer/vertex.rs.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+#[derive(Clone, Debug)]
+pub struct ConicGradientInstance {
+    pub task_rect: DeviceRect,
+    pub center: DevicePoint,
+    pub scale: DeviceVector2D,
+    pub start_offset: f32,
+    pub end_offset: f32,
+    pub angle: f32,
+    pub extend_mode: i32,
+    pub gradient_stops_address: i32,
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ConicGradientCacheKey {
+    pub size: DeviceIntSize,
+    pub center: PointKey,
+    pub scale: PointKey,
+    pub start_offset: FloatKey,
+    pub end_offset: FloatKey,
+    pub angle: FloatKey,
+    pub extend_mode: ExtendMode,
+    pub stops: Vec<GradientStopKey>,
+}
+
diff --git a/gfx/wr/webrender/src/prim_store/gradient/linear.rs b/gfx/wr/webrender/src/prim_store/gradient/linear.rs
new file mode 100644
index 0000000000..85da4b670a
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/gradient/linear.rs
@@ -0,0 +1,750 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Linear gradients
+//!
+//! Specification: https://drafts.csswg.org/css-images-4/#linear-gradients
+//!
+//! Linear gradients are rendered via cached render tasks and composited with the image brush.
+
+use euclid::approxeq::ApproxEq;
+use euclid::{point2, vec2, size2};
+use api::{ExtendMode, GradientStop, LineOrientation, PremultipliedColorF, ColorF, ColorU};
+use api::units::*;
+use crate::scene_building::IsVisible;
+use crate::frame_builder::FrameBuildingState;
+use crate::intern::{Internable, InternDebug, Handle as InternHandle};
+use crate::internal_types::LayoutPrimitiveInfo;
+use crate::image_tiling::simplify_repeated_primitive;
+use crate::prim_store::{BrushSegment, GradientTileRange};
+use crate::prim_store::{PrimitiveInstanceKind, PrimitiveOpacity};
+use crate::prim_store::{PrimKeyCommonData, PrimTemplateCommonData, PrimitiveStore};
+use crate::prim_store::{NinePatchDescriptor, PointKey, SizeKey, InternablePrimitive};
+use crate::render_task::{RenderTask, RenderTaskKind};
+use crate::render_task_graph::RenderTaskId;
+use crate::render_task_cache::{RenderTaskCacheKeyKind, RenderTaskCacheKey, RenderTaskParent};
+use crate::renderer::GpuBufferAddress;
+use crate::segment::EdgeAaSegmentMask;
+use crate::picture::{SurfaceIndex};
+use crate::util::pack_as_float;
+use super::{stops_and_min_alpha, GradientStopKey, GradientGpuBlockBuilder, apply_gradient_local_clip};
+use std::ops::{Deref, DerefMut};
+use std::mem::swap;
+
+pub const MAX_CACHED_SIZE: f32 = 1024.0;
+
+/// Identifying key for a linear gradient.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, PartialEq, Hash, MallocSizeOf)]
+pub struct LinearGradientKey {
+    pub common: PrimKeyCommonData,
+    pub extend_mode: ExtendMode,
+    pub start_point: PointKey,
+    pub end_point: PointKey,
+    pub stretch_size: SizeKey,
+    pub tile_spacing: SizeKey,
+    pub stops: Vec<GradientStopKey>,
+    pub reverse_stops: bool,
+    pub cached: bool,
+    pub nine_patch: Option<Box<NinePatchDescriptor>>,
+    pub edge_aa_mask: EdgeAaSegmentMask,
+}
+
+impl LinearGradientKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        linear_grad: LinearGradient,
+    ) -> Self {
+        LinearGradientKey {
+            common: info.into(),
+            extend_mode: linear_grad.extend_mode,
+            start_point: linear_grad.start_point,
+            end_point: linear_grad.end_point,
+            stretch_size: linear_grad.stretch_size,
+            tile_spacing: linear_grad.tile_spacing,
+            stops: linear_grad.stops,
+            reverse_stops: linear_grad.reverse_stops,
+            cached: linear_grad.cached,
+            nine_patch: linear_grad.nine_patch,
+            edge_aa_mask: linear_grad.edge_aa_mask,
+        }
+    }
+}
+
+impl InternDebug for LinearGradientKey {}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, MallocSizeOf)]
+pub struct LinearGradientTemplate {
+    pub common: PrimTemplateCommonData,
+    pub extend_mode: ExtendMode,
+    pub start_point: DevicePoint,
+    pub end_point: DevicePoint,
+    pub task_size: DeviceIntSize,
+    pub scale: DeviceVector2D,
+    pub stretch_size: LayoutSize,
+    pub tile_spacing: LayoutSize,
+    pub stops_opacity: PrimitiveOpacity,
+    pub stops: Vec<GradientStop>,
+    pub brush_segments: Vec<BrushSegment>,
+    pub reverse_stops: bool,
+    pub is_fast_path: bool,
+    pub cached: bool,
+    pub src_color: Option<RenderTaskId>,
+}
+
+impl Deref for LinearGradientTemplate {
+    type Target = PrimTemplateCommonData;
+    fn deref(&self) -> &Self::Target {
+        &self.common
+    }
+}
+
+impl DerefMut for LinearGradientTemplate {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.common
+    }
+}
+
+/// Perform a few optimizations to the gradient that are relevant to scene building.
+///
+/// Returns true if the gradient was decomposed into fast-path primitives, indicating
+/// that we shouldn't emit a regular gradient primitive after this returns.
+pub fn optimize_linear_gradient(
+    prim_rect: &mut LayoutRect,
+    tile_size: &mut LayoutSize,
+    mut tile_spacing: LayoutSize,
+    clip_rect: &LayoutRect,
+    start: &mut LayoutPoint,
+    end: &mut LayoutPoint,
+    extend_mode: ExtendMode,
+    stops: &mut [GradientStopKey],
+    // Callback called for each fast-path segment (rect, start end, stops).
+    callback: &mut dyn FnMut(&LayoutRect, LayoutPoint, LayoutPoint, &[GradientStopKey], EdgeAaSegmentMask)
+) -> bool {
+    // First sanitize the gradient parameters. See if we can remove repetitions,
+    // tighten the primitive bounds, etc.
+
+    simplify_repeated_primitive(&tile_size, &mut tile_spacing, prim_rect);
+
+    let vertical = start.x.approx_eq(&end.x);
+    let horizontal = start.y.approx_eq(&end.y);
+
+    let mut horizontally_tiled = prim_rect.width() > tile_size.width;
+    let mut vertically_tiled = prim_rect.height() > tile_size.height;
+
+    // Check whether the tiling is equivalent to stretching on either axis.
+    // Stretching the gradient is more efficient than repeating it.
+    if vertically_tiled && horizontal && tile_spacing.height == 0.0 {
+        tile_size.height = prim_rect.height();
+        vertically_tiled = false;
+    }
+
+    if horizontally_tiled && vertical && tile_spacing.width == 0.0 {
+        tile_size.width = prim_rect.width();
+        horizontally_tiled = false;
+    }
+
+    let offset = apply_gradient_local_clip(
+        prim_rect,
+        &tile_size,
+        &tile_spacing,
+        &clip_rect
+    );
+
+    // The size of gradient render tasks depends on the tile_size. No need to generate
+    // large stretch sizes that will be clipped to the bounds of the primitive.
+    tile_size.width = tile_size.width.min(prim_rect.width());
+    tile_size.height = tile_size.height.min(prim_rect.height());
+
+    *start += offset;
+    *end += offset;
+
+    // Next, in the case of axis-aligned gradients, see if it is worth
+    // decomposing the gradient into multiple gradients with only two
+    // gradient stops per segment to get a faster shader.
+
+    if extend_mode != ExtendMode::Clamp || stops.is_empty() {
+        return false;
+    }
+
+    if !vertical && !horizontal {
+        return false;
+    }
+
+    if vertical && horizontal {
+        return false;
+    }
+
+    if !tile_spacing.is_empty() || vertically_tiled || horizontally_tiled {
+        return false;
+    }
+
+    // If the gradient is small, no need to bother with decomposing it.
+    if (horizontal && tile_size.width < 256.0)
+        || (vertical && tile_size.height < 256.0) {
+
+        return false;
+    }
+
+    // Flip x and y if need be so that we only deal with the horizontal case.
+
+    // From now on don't return false. We are going modifying the caller's
+    // variables and not bother to restore them. If the control flow changes,
+    // Make sure to to restore &mut parameters to sensible values before
+    // returning false.
+
+    let adjust_rect = &mut |rect: &mut LayoutRect| {
+        if vertical {
+            swap(&mut rect.min.x, &mut rect.min.y);
+            swap(&mut rect.max.x, &mut rect.max.y);
+        }
+    };
+
+    let adjust_size = &mut |size: &mut LayoutSize| {
+        if vertical { swap(&mut size.width, &mut size.height); }
+    };
+
+    let adjust_point = &mut |p: &mut LayoutPoint| {
+        if vertical { swap(&mut p.x, &mut p.y); }
+    };
+
+    let clip_rect = match clip_rect.intersection(prim_rect) {
+        Some(clip) => clip,
+        None => {
+            return false;
+        }
+    };
+
+    adjust_rect(prim_rect);
+    adjust_point(start);
+    adjust_point(end);
+    adjust_size(tile_size);
+
+    let length = (end.x - start.x).abs();
+
+    // Decompose the gradient into simple segments. This lets us:
+    // - separate opaque from semi-transparent segments,
+    // - compress long segments into small render tasks,
+    // - make sure hard stops stay so even if the primitive is large.
+
+    let reverse_stops = start.x > end.x;
+
+    // Handle reverse stops so we can assume stops are arranged in increasing x.
+    if reverse_stops {
+        stops.reverse();
+        swap(start, end);
+    }
+
+    // Use fake gradient stop to emulate the potential constant color sections
+    // before and after the gradient endpoints.
+    let mut prev = *stops.first().unwrap();
+    let mut last = *stops.last().unwrap();
+
+    // Set the offsets of the fake stops to position them at the edges of the primitive.
+    prev.offset = -start.x / length;
+    last.offset = (tile_size.width - start.x) / length;
+    if reverse_stops {
+        prev.offset = 1.0 - prev.offset;
+        last.offset = 1.0 - last.offset;
+    }
+
+    let (side_edges, first_edge, last_edge) = if vertical {
+        (
+            EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::RIGHT,
+            EdgeAaSegmentMask::TOP,
+            EdgeAaSegmentMask::BOTTOM
+        )
+    } else {
+        (
+            EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::BOTTOM,
+            EdgeAaSegmentMask::LEFT,
+            EdgeAaSegmentMask::RIGHT
+        )
+    };
+
+    let mut is_first = true;
+    let last_offset = last.offset;
+    for stop in stops.iter().chain((&[last]).iter()) {
+        let prev_stop = prev;
+        prev = *stop;
+
+        if prev_stop.color.a == 0 && stop.color.a == 0 {
+            continue;
+        }
+
+
+        let prev_offset = if reverse_stops { 1.0 - prev_stop.offset } else { prev_stop.offset };
+        let offset = if reverse_stops { 1.0 - stop.offset } else { stop.offset };
+
+        // In layout space, relative to the primitive.
+        let segment_start = start.x + prev_offset * length;
+        let segment_end = start.x + offset * length;
+        let segment_length = segment_end - segment_start;
+
+        if segment_length <= 0.0 {
+            continue;
+        }
+
+        let mut segment_rect = *prim_rect;
+        segment_rect.min.x += segment_start;
+        segment_rect.max.x = segment_rect.min.x + segment_length;
+
+        let mut start = point2(0.0, 0.0);
+        let mut end = point2(segment_length, 0.0);
+
+        adjust_point(&mut start);
+        adjust_point(&mut end);
+        adjust_rect(&mut segment_rect);
+
+        let origin_before_clip = segment_rect.min;
+        segment_rect = match segment_rect.intersection(&clip_rect) {
+            Some(rect) => rect,
+            None => {
+                continue;
+            }
+        };
+        let offset = segment_rect.min - origin_before_clip;
+
+        // Account for the clipping since start and end are relative to the origin.
+        start -= offset;
+        end -= offset;
+
+        let mut edge_flags = side_edges;
+        if is_first {
+            edge_flags |= first_edge;
+            is_first = false;
+        }
+        if stop.offset == last_offset {
+            edge_flags |= last_edge;
+        }
+
+        callback(
+            &segment_rect,
+            start,
+            end,
+            &[
+                GradientStopKey { offset: 0.0, .. prev_stop },
+                GradientStopKey { offset: 1.0, .. *stop },
+            ],
+            edge_flags,
+        );
+    }
+
+    true
+}
+
+impl From<LinearGradientKey> for LinearGradientTemplate {
+    fn from(item: LinearGradientKey) -> Self {
+
+        let mut common = PrimTemplateCommonData::with_key_common(item.common);
+        common.edge_aa_mask = item.edge_aa_mask;
+
+        let (mut stops, min_alpha) = stops_and_min_alpha(&item.stops);
+
+        let mut brush_segments = Vec::new();
+
+        if let Some(ref nine_patch) = item.nine_patch {
+            brush_segments = nine_patch.create_segments(common.prim_rect.size());
+        }
+
+        // Save opacity of the stops for use in
+        // selecting which pass this gradient
+        // should be drawn in.
+        let stops_opacity = PrimitiveOpacity::from_alpha(min_alpha);
+
+        let start_point = DevicePoint::new(item.start_point.x, item.start_point.y);
+        let end_point = DevicePoint::new(item.end_point.x, item.end_point.y);
+        let tile_spacing: LayoutSize = item.tile_spacing.into();
+        let stretch_size: LayoutSize = item.stretch_size.into();
+        let mut task_size: DeviceSize = stretch_size.cast_unit();
+
+        let horizontal = start_point.y.approx_eq(&end_point.y);
+        let vertical = start_point.x.approx_eq(&end_point.x);
+
+        if horizontal {
+            // Completely horizontal, we can stretch the gradient vertically.
+            task_size.height = 1.0;
+        }
+
+        if vertical {
+            // Completely vertical, we can stretch the gradient horizontally.
+            task_size.width = 1.0;
+        }
+
+        // See if we can render the gradient using a special fast-path shader.
+        // The fast path path only works with two gradient stops.
+        let mut is_fast_path = false;
+        if item.cached && stops.len() == 2 && brush_segments.is_empty() {
+            if horizontal
+                && stretch_size.width >= common.prim_rect.width()
+                && start_point.x.approx_eq(&0.0)
+                && end_point.x.approx_eq(&stretch_size.width) {
+                is_fast_path = true;
+                task_size.width = task_size.width.min(256.0);
+            }
+            if vertical
+                && stretch_size.height >= common.prim_rect.height()
+                && start_point.y.approx_eq(&0.0)
+                && end_point.y.approx_eq(&stretch_size.height) {
+                is_fast_path = true;
+                task_size.height = task_size.height.min(256.0);
+            }
+
+            if stops[0].color == stops[1].color {
+                is_fast_path = true;
+                task_size = size2(1.0, 1.0);
+            }
+
+            if is_fast_path && item.reverse_stops {
+                // The fast path doesn't use the gradient gpu blocks builder so handle
+                // reversed stops here.
+                stops.swap(0, 1);
+            }
+        }
+
+        // Avoid rendering enormous gradients. Linear gradients are mostly made of soft transitions,
+        // so it is unlikely that rendering at a higher resolution than 1024 would produce noticeable
+        // differences, especially with 8 bits per channel.
+
+        let mut scale = vec2(1.0, 1.0);
+
+        if task_size.width > MAX_CACHED_SIZE {
+            scale.x = task_size.width / MAX_CACHED_SIZE;
+            task_size.width = MAX_CACHED_SIZE;
+        }
+
+        if task_size.height > MAX_CACHED_SIZE {
+            scale.y = task_size.height / MAX_CACHED_SIZE;
+            task_size.height = MAX_CACHED_SIZE;
+        }
+
+        LinearGradientTemplate {
+            common,
+            extend_mode: item.extend_mode,
+            start_point,
+            end_point,
+            task_size: task_size.ceil().to_i32(),
+            scale,
+            stretch_size,
+            tile_spacing,
+            stops_opacity,
+            stops,
+            brush_segments,
+            reverse_stops: item.reverse_stops,
+            is_fast_path,
+            cached: item.cached,
+            src_color: None,
+        }
+    }
+}
+
+impl LinearGradientTemplate {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        frame_state: &mut FrameBuildingState,
+        parent_surface: SurfaceIndex,
+    ) {
+        if let Some(mut request) = frame_state.gpu_cache.request(
+            &mut self.common.gpu_cache_handle
+        ) {
+
+            // Write_prim_gpu_blocks
+            if self.cached {
+                // We are using the image brush.
+                request.push(PremultipliedColorF::WHITE);
+                request.push(PremultipliedColorF::WHITE);
+                request.push([
+                    self.stretch_size.width,
+                    self.stretch_size.height,
+                    0.0,
+                    0.0,
+                ]);
+            } else {
+                // We are using the gradient brush.
+                request.push([
+                    self.start_point.x,
+                    self.start_point.y,
+                    self.end_point.x,
+                    self.end_point.y,
+                ]);
+                request.push([
+                    pack_as_float(self.extend_mode as u32),
+                    self.stretch_size.width,
+                    self.stretch_size.height,
+                    0.0,
+                ]);
+            }
+
+            // write_segment_gpu_blocks
+            for segment in &self.brush_segments {
+                // has to match VECS_PER_SEGMENT
+                request.write_segment(
+                    segment.local_rect,
+                    segment.extra_data,
+                );
+            }
+        }
+
+        // Tile spacing is always handled by decomposing into separate draw calls so the
+        // primitive opacity is equivalent to stops opacity. This might change to being
+        // set to non-opaque in the presence of tile spacing if/when tile spacing is handled
+        // in the same way as with the image primitive.
+        self.opacity = self.stops_opacity;
+
+        if !self.cached {
+            return;
+        }
+
+        let task_id = if self.is_fast_path {
+            let orientation = if self.task_size.width > self.task_size.height {
+                LineOrientation::Horizontal
+            } else {
+                LineOrientation::Vertical
+            };
+
+            let gradient = FastLinearGradientTask {
+                color0: self.stops[0].color.into(),
+                color1: self.stops[1].color.into(),
+                orientation,
+            };
+
+            frame_state.resource_cache.request_render_task(
+                RenderTaskCacheKey {
+                    size: self.task_size,
+                    kind: RenderTaskCacheKeyKind::FastLinearGradient(gradient),
+                },
+                frame_state.gpu_cache,
+                frame_state.frame_gpu_data,
+                frame_state.rg_builder,
+                None,
+                false,
+                RenderTaskParent::Surface(parent_surface),
+                &mut frame_state.surface_builder,
+                |rg_builder, _| {
+                    rg_builder.add().init(RenderTask::new_dynamic(
+                        self.task_size,
+                        RenderTaskKind::FastLinearGradient(gradient),
+                    ))
+                }
+            )
+        } else {
+            let cache_key = LinearGradientCacheKey {
+                size: self.task_size,
+                start: PointKey { x: self.start_point.x, y: self.start_point.y },
+                end: PointKey { x: self.end_point.x, y: self.end_point.y },
+                scale: PointKey { x: self.scale.x, y: self.scale.y },
+                extend_mode: self.extend_mode,
+                stops: self.stops.iter().map(|stop| (*stop).into()).collect(),
+                reversed_stops: self.reverse_stops,
+            };
+
+            frame_state.resource_cache.request_render_task(
+                RenderTaskCacheKey {
+                    size: self.task_size,
+                    kind: RenderTaskCacheKeyKind::LinearGradient(cache_key),
+                },
+                frame_state.gpu_cache,
+                frame_state.frame_gpu_data,
+                frame_state.rg_builder,
+                None,
+                false,
+                RenderTaskParent::Surface(parent_surface),
+                &mut frame_state.surface_builder,
+                |rg_builder, gpu_buffer_builder| {
+                    let stops = Some(GradientGpuBlockBuilder::build(
+                        self.reverse_stops,
+                        gpu_buffer_builder,
+                        &self.stops,
+                    ));
+
+                    rg_builder.add().init(RenderTask::new_dynamic(
+                        self.task_size,
+                        RenderTaskKind::LinearGradient(LinearGradientTask {
+                            start: self.start_point,
+                            end: self.end_point,
+                            scale: self.scale,
+                            extend_mode: self.extend_mode,
+                            stops: stops.unwrap(),
+                        }),
+                    ))
+                }
+            )
+        };
+
+        self.src_color = Some(task_id);
+    }
+}
+
+pub type LinearGradientDataHandle = InternHandle<LinearGradient>;
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct LinearGradient {
+    pub extend_mode: ExtendMode,
+    pub start_point: PointKey,
+    pub end_point: PointKey,
+    pub stretch_size: SizeKey,
+    pub tile_spacing: SizeKey,
+    pub stops: Vec<GradientStopKey>,
+    pub reverse_stops: bool,
+    pub nine_patch: Option<Box<NinePatchDescriptor>>,
+    pub cached: bool,
+    pub edge_aa_mask: EdgeAaSegmentMask,
+}
+
+impl Internable for LinearGradient {
+    type Key = LinearGradientKey;
+    type StoreData = LinearGradientTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_LINEAR_GRADIENTS;
+}
+
+impl InternablePrimitive for LinearGradient {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> LinearGradientKey {
+        LinearGradientKey::new(info, self)
+    }
+
+    fn make_instance_kind(
+        key: LinearGradientKey,
+        data_handle: LinearGradientDataHandle,
+        _prim_store: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        if key.cached {
+            PrimitiveInstanceKind::CachedLinearGradient {
+                data_handle,
+                visible_tiles_range: GradientTileRange::empty(),
+            }
+        } else {
+            PrimitiveInstanceKind::LinearGradient {
+                data_handle,
+                visible_tiles_range: GradientTileRange::empty(),
+            }
+        }
+    }
+}
+
+impl IsVisible for LinearGradient {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct LinearGradientPrimitive {
+    pub cache_segments: Vec<CachedGradientSegment>,
+    pub visible_tiles_range: GradientTileRange,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct CachedGradientSegment {
+    pub render_task: RenderTaskId,
+    pub local_rect: LayoutRect,
+}
+
+
+#[derive(Copy, Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct FastLinearGradientTask {
+    pub color0: ColorU,
+    pub color1: ColorU,
+    pub orientation: LineOrientation,
+}
+
+impl FastLinearGradientTask {
+    pub fn to_instance(&self, target_rect: &DeviceIntRect) -> FastLinearGradientInstance {
+        FastLinearGradientInstance {
+            task_rect: target_rect.to_f32(),
+            color0: ColorF::from(self.color0).premultiplied(),
+            color1: ColorF::from(self.color1).premultiplied(),
+            axis_select: match self.orientation {
+                LineOrientation::Horizontal => 0.0,
+                LineOrientation::Vertical => 1.0,
+            },
+        }
+    }
+}
+
+pub type FastLinearGradientCacheKey = FastLinearGradientTask;
+
+/// The per-instance shader input of a fast-path linear gradient render task.
+///
+/// Must match the FAST_LINEAR_GRADIENT instance description in renderer/vertex.rs.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+#[derive(Clone, Debug)]
+pub struct FastLinearGradientInstance {
+    pub task_rect: DeviceRect,
+    pub color0: PremultipliedColorF,
+    pub color1: PremultipliedColorF,
+    pub axis_select: f32,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct LinearGradientTask {
+    pub start: DevicePoint,
+    pub end: DevicePoint,
+    pub scale: DeviceVector2D,
+    pub extend_mode: ExtendMode,
+    pub stops: GpuBufferAddress,
+}
+
+impl LinearGradientTask {
+    pub fn to_instance(&self, target_rect: &DeviceIntRect) -> LinearGradientInstance {
+        LinearGradientInstance {
+            task_rect: target_rect.to_f32(),
+            start: self.start,
+            end: self.end,
+            scale: self.scale,
+            extend_mode: self.extend_mode as i32,
+            gradient_stops_address: self.stops.as_int(),
+        }
+    }
+}
+
+/// The per-instance shader input of a linear gradient render task.
+///
+/// Must match the LINEAR_GRADIENT instance description in renderer/vertex.rs.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+#[derive(Clone, Debug)]
+pub struct LinearGradientInstance {
+    pub task_rect: DeviceRect,
+    pub start: DevicePoint,
+    pub end: DevicePoint,
+    pub scale: DeviceVector2D,
+    pub extend_mode: i32,
+    pub gradient_stops_address: i32,
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct LinearGradientCacheKey {
+    pub size: DeviceIntSize,
+    pub start: PointKey,
+    pub end: PointKey,
+    pub scale: PointKey,
+    pub extend_mode: ExtendMode,
+    pub stops: Vec<GradientStopKey>,
+    pub reversed_stops: bool,
+}
diff --git a/gfx/wr/webrender/src/prim_store/gradient/mod.rs b/gfx/wr/webrender/src/prim_store/gradient/mod.rs
new file mode 100644
index 0000000000..d0b922c579
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/gradient/mod.rs
@@ -0,0 +1,392 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ColorF, ColorU, GradientStop, PremultipliedColorF};
+use api::units::{LayoutRect, LayoutSize, LayoutVector2D};
+use crate::renderer::{GpuBufferAddress, GpuBufferBuilder};
+use std::hash;
+
+mod linear;
+mod radial;
+mod conic;
+
+pub use linear::MAX_CACHED_SIZE as LINEAR_MAX_CACHED_SIZE;
+
+pub use linear::*;
+pub use radial::*;
+pub use conic::*;
+
+/// A hashable gradient stop that can be used in primitive keys.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, MallocSizeOf, PartialEq)]
+pub struct GradientStopKey {
+    pub offset: f32,
+    pub color: ColorU,
+}
+
+impl GradientStopKey {
+    pub fn empty() -> Self {
+        GradientStopKey {
+            offset: 0.0,
+            color: ColorU::new(0, 0, 0, 0),
+        }
+    }
+}
+
+impl Into<GradientStopKey> for GradientStop {
+    fn into(self) -> GradientStopKey {
+        GradientStopKey {
+            offset: self.offset,
+            color: self.color.into(),
+        }
+    }
+}
+
+// Convert `stop_keys` into a vector of `GradientStop`s, which is a more
+// convenient representation for the current gradient builder. Compute the
+// minimum stop alpha along the way.
+fn stops_and_min_alpha(stop_keys: &[GradientStopKey]) -> (Vec<GradientStop>, f32) {
+    let mut min_alpha: f32 = 1.0;
+    let stops = stop_keys.iter().map(|stop_key| {
+        let color: ColorF = stop_key.color.into();
+        min_alpha = min_alpha.min(color.a);
+
+        GradientStop {
+            offset: stop_key.offset,
+            color,
+        }
+    }).collect();
+
+    (stops, min_alpha)
+}
+
+impl Eq for GradientStopKey {}
+
+impl hash::Hash for GradientStopKey {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.offset.to_bits().hash(state);
+        self.color.hash(state);
+    }
+}
+
+// The gradient entry index for the first color stop
+pub const GRADIENT_DATA_FIRST_STOP: usize = 0;
+// The gradient entry index for the last color stop
+pub const GRADIENT_DATA_LAST_STOP: usize = GRADIENT_DATA_SIZE - 1;
+
+// The start of the gradient data table
+pub const GRADIENT_DATA_TABLE_BEGIN: usize = GRADIENT_DATA_FIRST_STOP + 1;
+// The exclusive bound of the gradient data table
+pub const GRADIENT_DATA_TABLE_END: usize = GRADIENT_DATA_LAST_STOP;
+// The number of entries in the gradient data table.
+pub const GRADIENT_DATA_TABLE_SIZE: usize = 128;
+
+// The number of entries in a gradient data: GRADIENT_DATA_TABLE_SIZE + first stop entry + last stop entry
+pub const GRADIENT_DATA_SIZE: usize = GRADIENT_DATA_TABLE_SIZE + 2;
+
+/// An entry in a gradient data table representing a segment of the gradient
+/// color space.
+#[derive(Debug, Copy, Clone)]
+#[repr(C)]
+struct GradientDataEntry {
+    start_color: PremultipliedColorF,
+    end_step: PremultipliedColorF,
+}
+
+impl GradientDataEntry {
+    fn white() -> Self {
+        Self {
+            start_color: PremultipliedColorF::WHITE,
+            end_step: PremultipliedColorF::TRANSPARENT,
+        }
+    }
+}
+
+// TODO(gw): Tidy this up to be a free function / module?
+pub struct GradientGpuBlockBuilder {}
+
+impl GradientGpuBlockBuilder {
+    /// Generate a color ramp filling the indices in [start_idx, end_idx) and interpolating
+    /// from start_color to end_color.
+    fn fill_colors(
+        start_idx: usize,
+        end_idx: usize,
+        start_color: &PremultipliedColorF,
+        end_color: &PremultipliedColorF,
+        entries: &mut [GradientDataEntry; GRADIENT_DATA_SIZE],
+        prev_step: &PremultipliedColorF,
+    ) -> PremultipliedColorF {
+        // Calculate the color difference for individual steps in the ramp.
+        let inv_steps = 1.0 / (end_idx - start_idx) as f32;
+        let mut step = PremultipliedColorF {
+            r: (end_color.r - start_color.r) * inv_steps,
+            g: (end_color.g - start_color.g) * inv_steps,
+            b: (end_color.b - start_color.b) * inv_steps,
+            a: (end_color.a - start_color.a) * inv_steps,
+        };
+        // As a subtle form of compression, we ensure that the step values for
+        // each stop range are the same if and only if they belong to the same
+        // stop range. However, if two different stop ranges have the same step,
+        // we need to modify the steps so they compare unequally between ranges.
+        // This allows to quickly compare if two adjacent stops belong to the
+        // same range by comparing their steps.
+        if step == *prev_step {
+            // Modify the step alpha value as if by nextafter(). The difference
+            // here should be so small as to be unnoticeable, but yet allow it
+            // to compare differently.
+            step.a = f32::from_bits(if step.a == 0.0 { 1 } else { step.a.to_bits() + 1 });
+        }
+
+        let mut cur_color = *start_color;
+
+        // Walk the ramp writing start and end colors for each entry.
+        for index in start_idx .. end_idx {
+            let entry = &mut entries[index];
+            entry.start_color = cur_color;
+            cur_color.r += step.r;
+            cur_color.g += step.g;
+            cur_color.b += step.b;
+            cur_color.a += step.a;
+            entry.end_step = step;
+        }
+
+        step
+    }
+
+    /// Compute an index into the gradient entry table based on a gradient stop offset. This
+    /// function maps offsets from [0, 1] to indices in [GRADIENT_DATA_TABLE_BEGIN, GRADIENT_DATA_TABLE_END].
+    #[inline]
+    fn get_index(offset: f32) -> usize {
+        (offset.max(0.0).min(1.0) * GRADIENT_DATA_TABLE_SIZE as f32 +
+            GRADIENT_DATA_TABLE_BEGIN as f32)
+            .round() as usize
+    }
+
+    // Build the gradient data from the supplied stops, reversing them if necessary.
+    pub fn build(
+        reverse_stops: bool,
+        gpu_buffer_builder: &mut GpuBufferBuilder,
+        src_stops: &[GradientStop],
+    ) -> GpuBufferAddress {
+        // Preconditions (should be ensured by DisplayListBuilder):
+        // * we have at least two stops
+        // * first stop has offset 0.0
+        // * last stop has offset 1.0
+        let mut src_stops = src_stops.into_iter();
+        let mut cur_color = match src_stops.next() {
+            Some(stop) => {
+                debug_assert_eq!(stop.offset, 0.0);
+                stop.color.premultiplied()
+            }
+            None => {
+                error!("Zero gradient stops found!");
+                PremultipliedColorF::BLACK
+            }
+        };
+
+        // A table of gradient entries, with two colors per entry, that specify the start and end color
+        // within the segment of the gradient space represented by that entry. To lookup a gradient result,
+        // first the entry index is calculated to determine which two colors to interpolate between, then
+        // the offset within that entry bucket is used to interpolate between the two colors in that entry.
+        // This layout is motivated by the fact that if one naively tries to store a single color per entry
+        // and interpolate directly between entries, then hard stops will become softened because the end
+        // color of an entry actually differs from the start color of the next entry, even though they fall
+        // at the same edge offset in the gradient space. Instead, the two-color-per-entry layout preserves
+        // hard stops, as the end color for a given entry can differ from the start color for the following
+        // entry.
+        // Colors are stored in RGBA32F format (in the GPU cache). This table requires the gradient color
+        // stops to be normalized to the range [0, 1]. The first and last entries hold the first and last
+        // color stop colors respectively, while the entries in between hold the interpolated color stop
+        // values for the range [0, 1].
+        // As a further optimization, rather than directly storing the end color, the difference of the end
+        // color from the start color is stored instead, so that an entry can be evaluated more cheaply
+        // with start+diff*offset instead of mix(start,end,offset). Further, the color difference in two
+        // adjacent entries will always be the same if they were generated from the same set of stops/run.
+        // To allow fast searching of the table, if two adjacent entries generated from different sets of
+        // stops (a boundary) have the same difference, the floating-point bits of the stop will be nudged
+        // so that they compare differently without perceptibly altering the interpolation result. This way,
+        // one can quickly scan the table and recover runs just by comparing the color differences of the
+        // current and next entry.
+        // For example, a table with 2 inside entries (startR,startG,startB):(diffR,diffG,diffB) might look
+        // like so:
+        //     first           | 0.0              | 0.5              | last
+        //     (0,0,0):(0,0,0) | (1,0,0):(-1,1,0) | (0,0,1):(0,1,-1) | (1,1,1):(0,0,0)
+        //     ^ solid black     ^ red to green     ^ blue to green    ^ solid white
+        let mut entries = [GradientDataEntry::white(); GRADIENT_DATA_SIZE];
+        let mut prev_step = cur_color;
+        if reverse_stops {
+            // Fill in the first entry (for reversed stops) with the first color stop
+            prev_step = GradientGpuBlockBuilder::fill_colors(
+                GRADIENT_DATA_LAST_STOP,
+                GRADIENT_DATA_LAST_STOP + 1,
+                &cur_color,
+                &cur_color,
+                &mut entries,
+                &prev_step,
+            );
+
+            // Fill in the center of the gradient table, generating a color ramp between each consecutive pair
+            // of gradient stops. Each iteration of a loop will fill the indices in [next_idx, cur_idx). The
+            // loop will then fill indices in [GRADIENT_DATA_TABLE_BEGIN, GRADIENT_DATA_TABLE_END).
+            let mut cur_idx = GRADIENT_DATA_TABLE_END;
+            for next in src_stops {
+                let next_color = next.color.premultiplied();
+                let next_idx = Self::get_index(1.0 - next.offset);
+
+                if next_idx < cur_idx {
+                    prev_step = GradientGpuBlockBuilder::fill_colors(
+                        next_idx,
+                        cur_idx,
+                        &next_color,
+                        &cur_color,
+                        &mut entries,
+                        &prev_step,
+                    );
+                    cur_idx = next_idx;
+                }
+
+                cur_color = next_color;
+            }
+            if cur_idx != GRADIENT_DATA_TABLE_BEGIN {
+                error!("Gradient stops abruptly at {}, auto-completing to white", cur_idx);
+            }
+
+            // Fill in the last entry (for reversed stops) with the last color stop
+            GradientGpuBlockBuilder::fill_colors(
+                GRADIENT_DATA_FIRST_STOP,
+                GRADIENT_DATA_FIRST_STOP + 1,
+                &cur_color,
+                &cur_color,
+                &mut entries,
+                &prev_step,
+            );
+        } else {
+            // Fill in the first entry with the first color stop
+            prev_step = GradientGpuBlockBuilder::fill_colors(
+                GRADIENT_DATA_FIRST_STOP,
+                GRADIENT_DATA_FIRST_STOP + 1,
+                &cur_color,
+                &cur_color,
+                &mut entries,
+                &prev_step,
+            );
+
+            // Fill in the center of the gradient table, generating a color ramp between each consecutive pair
+            // of gradient stops. Each iteration of a loop will fill the indices in [cur_idx, next_idx). The
+            // loop will then fill indices in [GRADIENT_DATA_TABLE_BEGIN, GRADIENT_DATA_TABLE_END).
+            let mut cur_idx = GRADIENT_DATA_TABLE_BEGIN;
+            for next in src_stops {
+                let next_color = next.color.premultiplied();
+                let next_idx = Self::get_index(next.offset);
+
+                if next_idx > cur_idx {
+                    prev_step = GradientGpuBlockBuilder::fill_colors(
+                        cur_idx,
+                        next_idx,
+                        &cur_color,
+                        &next_color,
+                        &mut entries,
+                        &prev_step,
+                    );
+                    cur_idx = next_idx;
+                }
+
+                cur_color = next_color;
+            }
+            if cur_idx != GRADIENT_DATA_TABLE_END {
+                error!("Gradient stops abruptly at {}, auto-completing to white", cur_idx);
+            }
+
+            // Fill in the last entry with the last color stop
+            GradientGpuBlockBuilder::fill_colors(
+                GRADIENT_DATA_LAST_STOP,
+                GRADIENT_DATA_LAST_STOP + 1,
+                &cur_color,
+                &cur_color,
+                &mut entries,
+                &prev_step,
+            );
+        }
+
+        let mut writer = gpu_buffer_builder.write_blocks(2 * entries.len());
+
+        for entry in entries {
+            writer.push_one(entry.start_color);
+            writer.push_one(entry.end_step);
+        }
+
+        writer.finish()
+    }
+}
+
+// If the gradient is not tiled we know that any content outside of the clip will not
+// be shown. Applying the clip early reduces how much of the gradient we
+// render and cache. We do this optimization separately on each axis.
+// Returns the offset between the new and old primitive rect origin, to apply to the
+// gradient parameters that are relative to the primitive origin.
+pub fn apply_gradient_local_clip(
+    prim_rect: &mut LayoutRect,
+    stretch_size: &LayoutSize,
+    tile_spacing: &LayoutSize,
+    clip_rect: &LayoutRect,
+) -> LayoutVector2D {
+    let w = prim_rect.max.x.min(clip_rect.max.x) - prim_rect.min.x;
+    let h = prim_rect.max.y.min(clip_rect.max.y) - prim_rect.min.y;
+    let is_tiled_x = w > stretch_size.width + tile_spacing.width;
+    let is_tiled_y = h > stretch_size.height + tile_spacing.height;
+
+    let mut offset = LayoutVector2D::new(0.0, 0.0);
+
+    if !is_tiled_x {
+        let diff = (clip_rect.min.x - prim_rect.min.x).min(prim_rect.width());
+        if diff > 0.0 {
+            prim_rect.min.x += diff;
+            offset.x = -diff;
+        }
+
+        let diff = prim_rect.max.x - clip_rect.max.x;
+        if diff > 0.0 {
+            prim_rect.max.x -= diff;
+        }
+    }
+
+    if !is_tiled_y {
+        let diff = (clip_rect.min.y - prim_rect.min.y).min(prim_rect.height());
+        if diff > 0.0 {
+            prim_rect.min.y += diff;
+            offset.y = -diff;
+        }
+
+        let diff = prim_rect.max.y - clip_rect.max.y;
+        if diff > 0.0 {
+            prim_rect.max.y -= diff;
+        }
+    }
+
+    offset
+}
+
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_struct_sizes() {
+    use std::mem;
+    // The sizes of these structures are critical for performance on a number of
+    // talos stress tests. If you get a failure here on CI, there's two possibilities:
+    // (a) You made a structure smaller than it currently is. Great work! Update the
+    //     test expectations and move on.
+    // (b) You made a structure larger. This is not necessarily a problem, but should only
+    //     be done with care, and after checking if talos performance regresses badly.
+    assert_eq!(mem::size_of::<LinearGradient>(), 72, "LinearGradient size changed");
+    assert_eq!(mem::size_of::<LinearGradientTemplate>(), 144, "LinearGradientTemplate size changed");
+    assert_eq!(mem::size_of::<LinearGradientKey>(), 88, "LinearGradientKey size changed");
+
+    assert_eq!(mem::size_of::<RadialGradient>(), 72, "RadialGradient size changed");
+    assert_eq!(mem::size_of::<RadialGradientTemplate>(), 144, "RadialGradientTemplate size changed");
+    assert_eq!(mem::size_of::<RadialGradientKey>(), 96, "RadialGradientKey size changed");
+
+    assert_eq!(mem::size_of::<ConicGradient>(), 72, "ConicGradient size changed");
+    assert_eq!(mem::size_of::<ConicGradientTemplate>(), 144, "ConicGradientTemplate size changed");
+    assert_eq!(mem::size_of::<ConicGradientKey>(), 96, "ConicGradientKey size changed");
+}
diff --git a/gfx/wr/webrender/src/prim_store/gradient/radial.rs b/gfx/wr/webrender/src/prim_store/gradient/radial.rs
new file mode 100644
index 0000000000..f3f20f9a55
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/gradient/radial.rs
@@ -0,0 +1,531 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Radial gradients
+//!
+//! Specification: https://drafts.csswg.org/css-images-4/#radial-gradients
+//!
+//! Radial gradients are rendered via cached render tasks and composited with the image brush.
+
+use euclid::{vec2, size2};
+use api::{ExtendMode, GradientStop, PremultipliedColorF, ColorU};
+use api::units::*;
+use crate::scene_building::IsVisible;
+use crate::frame_builder::FrameBuildingState;
+use crate::intern::{Internable, InternDebug, Handle as InternHandle};
+use crate::internal_types::LayoutPrimitiveInfo;
+use crate::prim_store::{BrushSegment, GradientTileRange, InternablePrimitive};
+use crate::prim_store::{PrimitiveInstanceKind, PrimitiveOpacity};
+use crate::prim_store::{PrimKeyCommonData, PrimTemplateCommonData, PrimitiveStore};
+use crate::prim_store::{NinePatchDescriptor, PointKey, SizeKey, FloatKey};
+use crate::render_task::{RenderTask, RenderTaskKind};
+use crate::render_task_graph::RenderTaskId;
+use crate::render_task_cache::{RenderTaskCacheKeyKind, RenderTaskCacheKey, RenderTaskParent};
+use crate::renderer::GpuBufferAddress;
+use crate::picture::{SurfaceIndex};
+
+use std::{hash, ops::{Deref, DerefMut}};
+use super::{
+    stops_and_min_alpha, GradientStopKey, GradientGpuBlockBuilder,
+    apply_gradient_local_clip,
+};
+
+/// Hashable radial gradient parameters, for use during prim interning.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, MallocSizeOf, PartialEq)]
+pub struct RadialGradientParams {
+    pub start_radius: f32,
+    pub end_radius: f32,
+    pub ratio_xy: f32,
+}
+
+impl Eq for RadialGradientParams {}
+
+impl hash::Hash for RadialGradientParams {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.start_radius.to_bits().hash(state);
+        self.end_radius.to_bits().hash(state);
+        self.ratio_xy.to_bits().hash(state);
+    }
+}
+
+/// Identifying key for a radial gradient.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, PartialEq, Hash, MallocSizeOf)]
+pub struct RadialGradientKey {
+    pub common: PrimKeyCommonData,
+    pub extend_mode: ExtendMode,
+    pub center: PointKey,
+    pub params: RadialGradientParams,
+    pub stretch_size: SizeKey,
+    pub stops: Vec<GradientStopKey>,
+    pub tile_spacing: SizeKey,
+    pub nine_patch: Option<Box<NinePatchDescriptor>>,
+}
+
+impl RadialGradientKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        radial_grad: RadialGradient,
+    ) -> Self {
+        RadialGradientKey {
+            common: info.into(),
+            extend_mode: radial_grad.extend_mode,
+            center: radial_grad.center,
+            params: radial_grad.params,
+            stretch_size: radial_grad.stretch_size,
+            stops: radial_grad.stops,
+            tile_spacing: radial_grad.tile_spacing,
+            nine_patch: radial_grad.nine_patch,
+        }
+    }
+}
+
+impl InternDebug for RadialGradientKey {}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+#[derive(Debug)]
+pub struct RadialGradientTemplate {
+    pub common: PrimTemplateCommonData,
+    pub extend_mode: ExtendMode,
+    pub params: RadialGradientParams,
+    pub center: DevicePoint,
+    pub task_size: DeviceIntSize,
+    pub scale: DeviceVector2D,
+    pub stretch_size: LayoutSize,
+    pub tile_spacing: LayoutSize,
+    pub brush_segments: Vec<BrushSegment>,
+    pub stops_opacity: PrimitiveOpacity,
+    pub stops: Vec<GradientStop>,
+    pub src_color: Option<RenderTaskId>,
+}
+
+impl Deref for RadialGradientTemplate {
+    type Target = PrimTemplateCommonData;
+    fn deref(&self) -> &Self::Target {
+        &self.common
+    }
+}
+
+impl DerefMut for RadialGradientTemplate {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.common
+    }
+}
+
+impl From<RadialGradientKey> for RadialGradientTemplate {
+    fn from(item: RadialGradientKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(item.common);
+        let mut brush_segments = Vec::new();
+
+        if let Some(ref nine_patch) = item.nine_patch {
+            brush_segments = nine_patch.create_segments(common.prim_rect.size());
+        }
+
+        let (stops, min_alpha) = stops_and_min_alpha(&item.stops);
+
+        // Save opacity of the stops for use in
+        // selecting which pass this gradient
+        // should be drawn in.
+        let stops_opacity = PrimitiveOpacity::from_alpha(min_alpha);
+
+        let mut stretch_size: LayoutSize = item.stretch_size.into();
+        stretch_size.width = stretch_size.width.min(common.prim_rect.width());
+        stretch_size.height = stretch_size.height.min(common.prim_rect.height());
+
+        // Avoid rendering enormous gradients. Radial gradients are mostly made of soft transitions,
+        // so it is unlikely that rendering at a higher resolution that 1024 would produce noticeable
+        // differences, especially with 8 bits per channel.
+        const MAX_SIZE: f32 = 1024.0;
+        let mut task_size: DeviceSize = stretch_size.cast_unit();
+        let mut scale = vec2(1.0, 1.0);
+        if task_size.width > MAX_SIZE {
+            scale.x = task_size.width/ MAX_SIZE;
+            task_size.width = MAX_SIZE;
+        }
+        if task_size.height > MAX_SIZE {
+            scale.y = task_size.height /MAX_SIZE;
+            task_size.height = MAX_SIZE;
+        }
+
+        RadialGradientTemplate {
+            common,
+            center: DevicePoint::new(item.center.x, item.center.y),
+            extend_mode: item.extend_mode,
+            params: item.params,
+            stretch_size,
+            task_size: task_size.ceil().to_i32(),
+            scale,
+            tile_spacing: item.tile_spacing.into(),
+            brush_segments,
+            stops_opacity,
+            stops,
+            src_color: None,
+        }
+    }
+}
+
+impl RadialGradientTemplate {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        frame_state: &mut FrameBuildingState,
+        parent_surface: SurfaceIndex,
+    ) {
+        if let Some(mut request) =
+            frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) {
+            // write_prim_gpu_blocks
+            request.push(PremultipliedColorF::WHITE);
+            request.push(PremultipliedColorF::WHITE);
+            request.push([
+                self.stretch_size.width,
+                self.stretch_size.height,
+                0.0,
+                0.0,
+            ]);
+
+            // write_segment_gpu_blocks
+            for segment in &self.brush_segments {
+                // has to match VECS_PER_SEGMENT
+                request.write_segment(
+                    segment.local_rect,
+                    segment.extra_data,
+                );
+            }
+        }
+
+        let task_size = self.task_size;
+        let cache_key = RadialGradientCacheKey {
+            size: task_size,
+            center: PointKey { x: self.center.x, y: self.center.y },
+            scale: PointKey { x: self.scale.x, y: self.scale.y },
+            start_radius: FloatKey(self.params.start_radius),
+            end_radius: FloatKey(self.params.end_radius),
+            ratio_xy: FloatKey(self.params.ratio_xy),
+            extend_mode: self.extend_mode,
+            stops: self.stops.iter().map(|stop| (*stop).into()).collect(),
+        };
+
+        let task_id = frame_state.resource_cache.request_render_task(
+            RenderTaskCacheKey {
+                size: task_size,
+                kind: RenderTaskCacheKeyKind::RadialGradient(cache_key),
+            },
+            frame_state.gpu_cache,
+            frame_state.frame_gpu_data,
+            frame_state.rg_builder,
+            None,
+            false,
+            RenderTaskParent::Surface(parent_surface),
+            &mut frame_state.surface_builder,
+            |rg_builder, gpu_buffer_builder| {
+                let stops = GradientGpuBlockBuilder::build(
+                    false,
+                    gpu_buffer_builder,
+                    &self.stops,
+                );
+
+                rg_builder.add().init(RenderTask::new_dynamic(
+                    task_size,
+                    RenderTaskKind::RadialGradient(RadialGradientTask {
+                        extend_mode: self.extend_mode,
+                        center: self.center,
+                        scale: self.scale,
+                        params: self.params.clone(),
+                        stops,
+                    }),
+                ))
+            }
+        );
+
+        self.src_color = Some(task_id);
+
+        // Tile spacing is always handled by decomposing into separate draw calls so the
+        // primitive opacity is equivalent to stops opacity. This might change to being
+        // set to non-opaque in the presence of tile spacing if/when tile spacing is handled
+        // in the same way as with the image primitive.
+        self.opacity = self.stops_opacity;
+    }
+}
+
+pub type RadialGradientDataHandle = InternHandle<RadialGradient>;
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RadialGradient {
+    pub extend_mode: ExtendMode,
+    pub center: PointKey,
+    pub params: RadialGradientParams,
+    pub stretch_size: SizeKey,
+    pub stops: Vec<GradientStopKey>,
+    pub tile_spacing: SizeKey,
+    pub nine_patch: Option<Box<NinePatchDescriptor>>,
+}
+
+impl Internable for RadialGradient {
+    type Key = RadialGradientKey;
+    type StoreData = RadialGradientTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_RADIAL_GRADIENTS;
+}
+
+impl InternablePrimitive for RadialGradient {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> RadialGradientKey {
+        RadialGradientKey::new(info, self)
+    }
+
+    fn make_instance_kind(
+        _key: RadialGradientKey,
+        data_handle: RadialGradientDataHandle,
+        _prim_store: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        PrimitiveInstanceKind::RadialGradient {
+            data_handle,
+            visible_tiles_range: GradientTileRange::empty(),
+        }
+    }
+}
+
+impl IsVisible for RadialGradient {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RadialGradientTask {
+    pub extend_mode: ExtendMode,
+    pub center: DevicePoint,
+    pub scale: DeviceVector2D,
+    pub params: RadialGradientParams,
+    pub stops: GpuBufferAddress,
+}
+
+impl RadialGradientTask {
+    pub fn to_instance(&self, target_rect: &DeviceIntRect) -> RadialGradientInstance {
+        RadialGradientInstance {
+            task_rect: target_rect.to_f32(),
+            center: self.center,
+            scale: self.scale,
+            start_radius: self.params.start_radius,
+            end_radius: self.params.end_radius,
+            ratio_xy: self.params.ratio_xy,
+            extend_mode: self.extend_mode as i32,
+            gradient_stops_address: self.stops.as_int(),
+        }
+    }
+}
+
+/// The per-instance shader input of a radial gradient render task.
+///
+/// Must match the RADIAL_GRADIENT instance description in renderer/vertex.rs.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+#[derive(Clone, Debug)]
+pub struct RadialGradientInstance {
+    pub task_rect: DeviceRect,
+    pub center: DevicePoint,
+    pub scale: DeviceVector2D,
+    pub start_radius: f32,
+    pub end_radius: f32,
+    pub ratio_xy: f32,
+    pub extend_mode: i32,
+    pub gradient_stops_address: i32,
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RadialGradientCacheKey {
+    pub size: DeviceIntSize,
+    pub center: PointKey,
+    pub scale: PointKey,
+    pub start_radius: FloatKey,
+    pub end_radius: FloatKey,
+    pub ratio_xy: FloatKey,
+    pub extend_mode: ExtendMode,
+    pub stops: Vec<GradientStopKey>,
+}
+
+/// Avoid invoking the radial gradient shader on large areas where the color is
+/// constant.
+///
+/// If the extend mode is set to clamp, the "interesting" part
+/// of the gradient is only in the bounds of the gradient's ellipse, and the rest
+/// is the color of the last gradient stop.
+///
+/// Sometimes we run into radial gradient with a small radius compared to the
+/// primitive bounds, which means a large area of the primitive is a constant color
+/// This function tries to detect that, potentially shrink the gradient primitive to only
+/// the useful part and if needed insert solid color primitives around the gradient where
+/// parts of it have been removed.
+pub fn optimize_radial_gradient(
+    prim_rect: &mut LayoutRect,
+    stretch_size: &mut LayoutSize,
+    center: &mut LayoutPoint,
+    tile_spacing: &mut LayoutSize,
+    clip_rect: &LayoutRect,
+    radius: LayoutSize,
+    end_offset: f32,
+    extend_mode: ExtendMode,
+    stops: &[GradientStopKey],
+    solid_parts: &mut dyn FnMut(&LayoutRect, ColorU),
+) {
+    let offset = apply_gradient_local_clip(
+        prim_rect,
+        stretch_size,
+        tile_spacing,
+        clip_rect
+    );
+
+    *center += offset;
+
+    if extend_mode != ExtendMode::Clamp || stops.is_empty() {
+        return;
+    }
+
+    // Bounding box of the "interesting" part of the gradient.
+    let min = prim_rect.min + center.to_vector() - radius.to_vector() * end_offset;
+    let max = prim_rect.min + center.to_vector() + radius.to_vector() * end_offset;
+
+    // The (non-repeated) gradient primitive rect.
+    let gradient_rect = LayoutRect::from_origin_and_size(
+        prim_rect.min,
+        *stretch_size,
+    );
+
+    // How much internal margin between the primitive bounds and the gradient's
+    // bounding rect (areas that are a constant color).
+    let mut l = (min.x - gradient_rect.min.x).max(0.0).floor();
+    let mut t = (min.y - gradient_rect.min.y).max(0.0).floor();
+    let mut r = (gradient_rect.max.x - max.x).max(0.0).floor();
+    let mut b = (gradient_rect.max.y - max.y).max(0.0).floor();
+
+    let is_tiled = prim_rect.width() > stretch_size.width + tile_spacing.width
+        || prim_rect.height() > stretch_size.height + tile_spacing.height;
+
+    let bg_color = stops.last().unwrap().color;
+
+    if bg_color.a != 0 && is_tiled {
+        // If the primitive has repetitions, it's not enough to insert solid rects around it,
+        // so bail out.
+        return;
+    }
+
+    // If the background is fully transparent, shrinking the primitive bounds as much as possible
+    // is always a win. If the background is not transparent, we have to insert solid rectangles
+    // around the shrunk parts.
+    // If the background is transparent and the primitive is tiled, the optimization may introduce
+    // tile spacing which forces the tiling to be manually decomposed.
+    // Either way, don't bother optimizing unless it saves a significant amount of pixels.
+    if bg_color.a != 0 || (is_tiled && tile_spacing.is_empty()) {
+        let threshold = 128.0;
+        if l < threshold { l = 0.0 }
+        if t < threshold { t = 0.0 }
+        if r < threshold { r = 0.0 }
+        if b < threshold { b = 0.0 }
+    }
+
+    if l + t + r + b == 0.0 {
+        // No adjustment to make;
+        return;
+    }
+
+    // Insert solid rectangles around the gradient, in the places where the primitive will be
+    // shrunk.
+    if bg_color.a != 0 {
+        if l != 0.0 && t != 0.0 {
+            let solid_rect = LayoutRect::from_origin_and_size(
+                gradient_rect.min,
+                size2(l, t),
+            );
+            solid_parts(&solid_rect, bg_color);
+        }
+
+        if l != 0.0 && b != 0.0 {
+            let solid_rect = LayoutRect::from_origin_and_size(
+                gradient_rect.bottom_left() - vec2(0.0, b),
+                size2(l, b),
+            );
+            solid_parts(&solid_rect, bg_color);
+        }
+
+        if t != 0.0 && r != 0.0 {
+            let solid_rect = LayoutRect::from_origin_and_size(
+                gradient_rect.top_right() - vec2(r, 0.0),
+                size2(r, t),
+            );
+            solid_parts(&solid_rect, bg_color);
+        }
+
+        if r != 0.0 && b != 0.0 {
+            let solid_rect = LayoutRect::from_origin_and_size(
+                gradient_rect.bottom_right() - vec2(r, b),
+                size2(r, b),
+            );
+            solid_parts(&solid_rect, bg_color);
+        }
+
+        if l != 0.0 {
+            let solid_rect = LayoutRect::from_origin_and_size(
+                gradient_rect.min + vec2(0.0, t),
+                size2(l, gradient_rect.height() - t - b),
+            );
+            solid_parts(&solid_rect, bg_color);
+        }
+
+        if r != 0.0 {
+            let solid_rect = LayoutRect::from_origin_and_size(
+                gradient_rect.top_right() + vec2(-r, t),
+                size2(r, gradient_rect.height() - t - b),
+            );
+            solid_parts(&solid_rect, bg_color);
+        }
+
+        if t != 0.0 {
+            let solid_rect = LayoutRect::from_origin_and_size(
+                gradient_rect.min + vec2(l, 0.0),
+                size2(gradient_rect.width() - l - r, t),
+            );
+            solid_parts(&solid_rect, bg_color);
+        }
+
+        if b != 0.0 {
+            let solid_rect = LayoutRect::from_origin_and_size(
+                gradient_rect.bottom_left() + vec2(l, -b),
+                size2(gradient_rect.width() - l - r, b),
+            );
+            solid_parts(&solid_rect, bg_color);
+        }
+    }
+
+    // Shrink the gradient primitive.
+
+    prim_rect.min.x += l;
+    prim_rect.min.y += t;
+
+    stretch_size.width -= l + r;
+    stretch_size.height -= b + t;
+
+    center.x -= l;
+    center.y -= t;
+
+    tile_spacing.width += l + r;
+    tile_spacing.height += t + b;
+}
diff --git a/gfx/wr/webrender/src/prim_store/image.rs b/gfx/wr/webrender/src/prim_store/image.rs
new file mode 100644
index 0000000000..9e1edc7f41
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/image.rs
@@ -0,0 +1,682 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{
+    AlphaType, ColorDepth, ColorF, ColorU, ExternalImageData, ExternalImageType,
+    ImageKey as ApiImageKey, ImageBufferKind, ImageRendering, PremultipliedColorF,
+    RasterSpace, Shadow, YuvColorSpace, ColorRange, YuvFormat,
+};
+use api::units::*;
+use crate::scene_building::{CreateShadow, IsVisible};
+use crate::frame_builder::{FrameBuildingContext, FrameBuildingState};
+use crate::gpu_cache::{GpuCache, GpuDataRequest};
+use crate::intern::{Internable, InternDebug, Handle as InternHandle};
+use crate::internal_types::{LayoutPrimitiveInfo};
+use crate::picture::SurfaceIndex;
+use crate::prim_store::{
+    EdgeAaSegmentMask, PrimitiveInstanceKind,
+    PrimitiveOpacity, PrimKey,
+    PrimTemplate, PrimTemplateCommonData, PrimitiveStore, SegmentInstanceIndex,
+    SizeKey, InternablePrimitive,
+};
+use crate::render_target::RenderTargetKind;
+use crate::render_task_graph::RenderTaskId;
+use crate::render_task::RenderTask;
+use crate::render_task_cache::{
+    RenderTaskCacheKey, RenderTaskCacheKeyKind, RenderTaskParent
+};
+use crate::resource_cache::{ImageRequest, ImageProperties, ResourceCache};
+use crate::util::pack_as_float;
+use crate::visibility::{PrimitiveVisibility, compute_conservative_visible_rect};
+use crate::spatial_tree::SpatialNodeIndex;
+use crate::image_tiling;
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct VisibleImageTile {
+    pub src_color: RenderTaskId,
+    pub edge_flags: EdgeAaSegmentMask,
+    pub local_rect: LayoutRect,
+    pub local_clip_rect: LayoutRect,
+}
+
+// Key that identifies a unique (partial) image that is being
+// stored in the render task cache.
+#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ImageCacheKey {
+    pub request: ImageRequest,
+    pub texel_rect: Option<DeviceIntRect>,
+}
+
+/// Instance specific fields for an image primitive. These are
+/// currently stored in a separate array to avoid bloating the
+/// size of PrimitiveInstance. In the future, we should be able
+/// to remove this and store the information inline, by:
+/// (a) Removing opacity collapse / binding support completely.
+///     Once we have general picture caching, we don't need this.
+/// (b) Change visible_tiles to use Storage in the primitive
+///     scratch buffer. This will reduce the size of the
+///     visible_tiles field here, and save memory allocation
+///     when image tiling is used. I've left it as a Vec for
+///     now to reduce the number of changes, and because image
+///     tiling is very rare on real pages.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct ImageInstance {
+    pub segment_instance_index: SegmentInstanceIndex,
+    pub tight_local_clip_rect: LayoutRect,
+    pub visible_tiles: Vec<VisibleImageTile>,
+    pub src_color: Option<RenderTaskId>,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, PartialEq, MallocSizeOf, Hash)]
+pub struct Image {
+    pub key: ApiImageKey,
+    pub stretch_size: SizeKey,
+    pub tile_spacing: SizeKey,
+    pub color: ColorU,
+    pub image_rendering: ImageRendering,
+    pub alpha_type: AlphaType,
+}
+
+pub type ImageKey = PrimKey<Image>;
+
+impl ImageKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        image: Image,
+    ) -> Self {
+        ImageKey {
+            common: info.into(),
+            kind: image,
+        }
+    }
+}
+
+impl InternDebug for ImageKey {}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, MallocSizeOf)]
+pub struct ImageData {
+    pub key: ApiImageKey,
+    pub stretch_size: LayoutSize,
+    pub tile_spacing: LayoutSize,
+    pub color: ColorF,
+    pub image_rendering: ImageRendering,
+    pub alpha_type: AlphaType,
+}
+
+impl From<Image> for ImageData {
+    fn from(image: Image) -> Self {
+        ImageData {
+            key: image.key,
+            color: image.color.into(),
+            stretch_size: image.stretch_size.into(),
+            tile_spacing: image.tile_spacing.into(),
+            image_rendering: image.image_rendering,
+            alpha_type: image.alpha_type,
+        }
+    }
+}
+
+impl ImageData {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        common: &mut PrimTemplateCommonData,
+        image_instance: &mut ImageInstance,
+        parent_surface: SurfaceIndex,
+        prim_spatial_node_index: SpatialNodeIndex,
+        frame_state: &mut FrameBuildingState,
+        frame_context: &FrameBuildingContext,
+        visibility: &mut PrimitiveVisibility,
+    ) {
+
+        let image_properties = frame_state
+            .resource_cache
+            .get_image_properties(self.key);
+
+        common.opacity = match &image_properties {
+            Some(properties) => {
+                if properties.descriptor.is_opaque() {
+                    PrimitiveOpacity::from_alpha(self.color.a)
+                } else {
+                    PrimitiveOpacity::translucent()
+                }
+            }
+            None => PrimitiveOpacity::opaque(),
+        };
+
+        if self.stretch_size.width >= common.prim_rect.width() &&
+            self.stretch_size.height >= common.prim_rect.height() {
+
+            common.may_need_repetition = false;
+        }
+
+        let request = ImageRequest {
+            key: self.key,
+            rendering: self.image_rendering,
+            tile: None,
+        };
+
+        match image_properties {
+            // Non-tiled (most common) path.
+            Some(ImageProperties { tiling: None, ref descriptor, ref external_image, .. }) => {
+                let mut size = frame_state.resource_cache.request_image(
+                    request,
+                    frame_state.gpu_cache,
+                );
+
+                let orig_task_id = frame_state.rg_builder.add().init(
+                    RenderTask::new_image(size, request)
+                );
+
+                // On some devices we cannot render from an ImageBufferKind::TextureExternal
+                // source using most shaders, so must peform a copy to a regular texture first.
+                let task_id = if frame_context.fb_config.external_images_require_copy
+                    && matches!(
+                        external_image,
+                        Some(ExternalImageData {
+                            image_type: ExternalImageType::TextureHandle(
+                                ImageBufferKind::TextureExternal
+                            ),
+                            ..
+                        })
+                    )
+                {
+                    let target_kind = if descriptor.format.bytes_per_pixel() == 1 {
+                        RenderTargetKind::Alpha
+                    } else {
+                        RenderTargetKind::Color
+                    };
+
+                    let task_id = RenderTask::new_scaling(
+                        orig_task_id,
+                        frame_state.rg_builder,
+                        target_kind,
+                        size
+                    );
+
+                    frame_state.surface_builder.add_child_render_task(
+                        task_id,
+                        frame_state.rg_builder,
+                    );
+
+                    task_id
+                } else {
+                    orig_task_id
+                };
+
+                // Every frame, for cached items, we need to request the render
+                // task cache item. The closure will be invoked on the first
+                // time through, and any time the render task output has been
+                // evicted from the texture cache.
+                if self.tile_spacing == LayoutSize::zero() {
+                    // Most common case.
+                    image_instance.src_color = Some(task_id);
+                } else {
+                    let padding = DeviceIntSideOffsets::new(
+                        0,
+                        (self.tile_spacing.width * size.width as f32 / self.stretch_size.width) as i32,
+                        (self.tile_spacing.height * size.height as f32 / self.stretch_size.height) as i32,
+                        0,
+                    );
+
+                    size.width += padding.horizontal();
+                    size.height += padding.vertical();
+
+                    if padding != DeviceIntSideOffsets::zero() {
+                        common.opacity = PrimitiveOpacity::translucent();
+                    }
+
+                    let image_cache_key = ImageCacheKey {
+                        request,
+                        texel_rect: None,
+                    };
+                    let target_kind = if descriptor.format.bytes_per_pixel() == 1 {
+                        RenderTargetKind::Alpha
+                    } else {
+                        RenderTargetKind::Color
+                    };
+
+                    // Request a pre-rendered image task.
+                    let cached_task_handle = frame_state.resource_cache.request_render_task(
+                        RenderTaskCacheKey {
+                            size,
+                            kind: RenderTaskCacheKeyKind::Image(image_cache_key),
+                        },
+                        frame_state.gpu_cache,
+                        frame_state.frame_gpu_data,
+                        frame_state.rg_builder,
+                        None,
+                        descriptor.is_opaque(),
+                        RenderTaskParent::Surface(parent_surface),
+                        &mut frame_state.surface_builder,
+                        |rg_builder, _| {
+                            // Create a task to blit from the texture cache to
+                            // a normal transient render task surface.
+                            // TODO: figure out if/when we can do a blit instead.
+                            let cache_to_target_task_id = RenderTask::new_scaling_with_padding(
+                                task_id,
+                                rg_builder,
+                                target_kind,
+                                size,
+                                padding,
+                            );
+
+                            // Create a task to blit the rect from the child render
+                            // task above back into the right spot in the persistent
+                            // render target cache.
+                            RenderTask::new_blit(
+                                size,
+                                cache_to_target_task_id,
+                                rg_builder,
+                            )
+                        }
+                    );
+
+                    image_instance.src_color = Some(cached_task_handle);
+                }
+            }
+            // Tiled image path.
+            Some(ImageProperties { tiling: Some(tile_size), visible_rect, .. }) => {
+                // we'll  have a source handle per visible tile instead.
+                image_instance.src_color = None;
+
+                image_instance.visible_tiles.clear();
+                // TODO: rename the blob's visible_rect into something that doesn't conflict
+                // with the terminology we use during culling since it's not really the same
+                // thing.
+                let active_rect = visible_rect;
+
+                // Tighten the clip rect because decomposing the repeated image can
+                // produce primitives that are partially covering the original image
+                // rect and we want to clip these extra parts out.
+                let tight_clip_rect = visibility
+                    .clip_chain
+                    .local_clip_rect
+                    .intersection(&common.prim_rect).unwrap();
+                image_instance.tight_local_clip_rect = tight_clip_rect;
+
+                let visible_rect = compute_conservative_visible_rect(
+                    &visibility.clip_chain,
+                    frame_state.current_dirty_region().combined,
+                    prim_spatial_node_index,
+                    frame_context.spatial_tree,
+                );
+
+                let base_edge_flags = edge_flags_for_tile_spacing(&self.tile_spacing);
+
+                let stride = self.stretch_size + self.tile_spacing;
+
+                // We are performing the decomposition on the CPU here, no need to
+                // have it in the shader.
+                common.may_need_repetition = false;
+
+                let repetitions = image_tiling::repetitions(
+                    &common.prim_rect,
+                    &visible_rect,
+                    stride,
+                );
+
+                for image_tiling::Repetition { origin, edge_flags } in repetitions {
+                    let edge_flags = base_edge_flags | edge_flags;
+
+                    let layout_image_rect = LayoutRect::from_origin_and_size(
+                        origin,
+                        self.stretch_size,
+                    );
+
+                    let tiles = image_tiling::tiles(
+                        &layout_image_rect,
+                        &visible_rect,
+                        &active_rect,
+                        tile_size as i32,
+                    );
+
+                    for tile in tiles {
+                        let request = request.with_tile(tile.offset);
+                        let size = frame_state.resource_cache.request_image(
+                            request,
+                            frame_state.gpu_cache,
+                        );
+
+                        let task_id = frame_state.rg_builder.add().init(
+                            RenderTask::new_image(size, request)
+                        );
+
+                        image_instance.visible_tiles.push(VisibleImageTile {
+                            src_color: task_id,
+                            edge_flags: tile.edge_flags & edge_flags,
+                            local_rect: tile.rect,
+                            local_clip_rect: tight_clip_rect,
+                        });
+                    }
+                }
+
+                if image_instance.visible_tiles.is_empty() {
+                    // Mark as invisible
+                    visibility.reset();
+                }
+            }
+            None => {
+                image_instance.src_color = None;
+            }
+        }
+
+        if let Some(mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) {
+            self.write_prim_gpu_blocks(&mut request);
+        }
+    }
+
+    pub fn write_prim_gpu_blocks(&self, request: &mut GpuDataRequest) {
+        // Images are drawn as a white color, modulated by the total
+        // opacity coming from any collapsed property bindings.
+        // Size has to match `VECS_PER_SPECIFIC_BRUSH` from `brush_image.glsl` exactly.
+        request.push(self.color.premultiplied());
+        request.push(PremultipliedColorF::WHITE);
+        request.push([
+            self.stretch_size.width + self.tile_spacing.width,
+            self.stretch_size.height + self.tile_spacing.height,
+            0.0,
+            0.0,
+        ]);
+    }
+}
+
+fn edge_flags_for_tile_spacing(tile_spacing: &LayoutSize) -> EdgeAaSegmentMask {
+    let mut flags = EdgeAaSegmentMask::empty();
+
+    if tile_spacing.width > 0.0 {
+        flags |= EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::RIGHT;
+    }
+    if tile_spacing.height > 0.0 {
+        flags |= EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::BOTTOM;
+    }
+
+    flags
+}
+
+pub type ImageTemplate = PrimTemplate<ImageData>;
+
+impl From<ImageKey> for ImageTemplate {
+    fn from(image: ImageKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(image.common);
+
+        ImageTemplate {
+            common,
+            kind: image.kind.into(),
+        }
+    }
+}
+
+pub type ImageDataHandle = InternHandle<Image>;
+
+impl Internable for Image {
+    type Key = ImageKey;
+    type StoreData = ImageTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_IMAGES;
+}
+
+impl InternablePrimitive for Image {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> ImageKey {
+        ImageKey::new(info, self)
+    }
+
+    fn make_instance_kind(
+        _key: ImageKey,
+        data_handle: ImageDataHandle,
+        prim_store: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        // TODO(gw): Refactor this to not need a separate image
+        //           instance (see ImageInstance struct).
+        let image_instance_index = prim_store.images.push(ImageInstance {
+            segment_instance_index: SegmentInstanceIndex::INVALID,
+            tight_local_clip_rect: LayoutRect::zero(),
+            visible_tiles: Vec::new(),
+            src_color: None,
+        });
+
+        PrimitiveInstanceKind::Image {
+            data_handle,
+            image_instance_index,
+            is_compositor_surface: false,
+        }
+    }
+}
+
+impl CreateShadow for Image {
+    fn create_shadow(
+        &self,
+        shadow: &Shadow,
+        _: bool,
+        _: RasterSpace,
+    ) -> Self {
+        Image {
+            tile_spacing: self.tile_spacing,
+            stretch_size: self.stretch_size,
+            key: self.key,
+            image_rendering: self.image_rendering,
+            alpha_type: self.alpha_type,
+            color: shadow.color.into(),
+        }
+    }
+}
+
+impl IsVisible for Image {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+pub struct YuvImage {
+    pub color_depth: ColorDepth,
+    pub yuv_key: [ApiImageKey; 3],
+    pub format: YuvFormat,
+    pub color_space: YuvColorSpace,
+    pub color_range: ColorRange,
+    pub image_rendering: ImageRendering,
+}
+
+pub type YuvImageKey = PrimKey<YuvImage>;
+
+impl YuvImageKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        yuv_image: YuvImage,
+    ) -> Self {
+        YuvImageKey {
+            common: info.into(),
+            kind: yuv_image,
+        }
+    }
+}
+
+impl InternDebug for YuvImageKey {}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct YuvImageData {
+    pub color_depth: ColorDepth,
+    pub yuv_key: [ApiImageKey; 3],
+    pub src_yuv: [Option<RenderTaskId>; 3],
+    pub format: YuvFormat,
+    pub color_space: YuvColorSpace,
+    pub color_range: ColorRange,
+    pub image_rendering: ImageRendering,
+}
+
+impl From<YuvImage> for YuvImageData {
+    fn from(image: YuvImage) -> Self {
+        YuvImageData {
+            color_depth: image.color_depth,
+            yuv_key: image.yuv_key,
+            src_yuv: [None, None, None],
+            format: image.format,
+            color_space: image.color_space,
+            color_range: image.color_range,
+            image_rendering: image.image_rendering,
+        }
+    }
+}
+
+impl YuvImageData {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        common: &mut PrimTemplateCommonData,
+        frame_state: &mut FrameBuildingState,
+    ) {
+
+        self.src_yuv = [ None, None, None ];
+
+        let channel_num = self.format.get_plane_num();
+        debug_assert!(channel_num <= 3);
+        for channel in 0 .. channel_num {
+            let request = ImageRequest {
+                key: self.yuv_key[channel],
+                rendering: self.image_rendering,
+                tile: None,
+            };
+
+            let size = frame_state.resource_cache.request_image(
+                request,
+                frame_state.gpu_cache,
+            );
+
+            let task_id = frame_state.rg_builder.add().init(
+                RenderTask::new_image(size, request)
+            );
+
+            self.src_yuv[channel] = Some(task_id);
+        }
+
+        if let Some(mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) {
+            self.write_prim_gpu_blocks(&mut request);
+        };
+
+        // YUV images never have transparency
+        common.opacity = PrimitiveOpacity::opaque();
+    }
+
+    pub fn request_resources(
+        &mut self,
+        resource_cache: &mut ResourceCache,
+        gpu_cache: &mut GpuCache,
+    ) {
+        let channel_num = self.format.get_plane_num();
+        debug_assert!(channel_num <= 3);
+        for channel in 0 .. channel_num {
+            resource_cache.request_image(
+                ImageRequest {
+                    key: self.yuv_key[channel],
+                    rendering: self.image_rendering,
+                    tile: None,
+                },
+                gpu_cache,
+            );
+        }
+    }
+
+    pub fn write_prim_gpu_blocks(&self, request: &mut GpuDataRequest) {
+        let ranged_color_space = self.color_space.with_range(self.color_range);
+        request.push([
+            pack_as_float(self.color_depth.bit_depth()),
+            pack_as_float(ranged_color_space as u32),
+            pack_as_float(self.format as u32),
+            0.0
+        ]);
+    }
+}
+
+pub type YuvImageTemplate = PrimTemplate<YuvImageData>;
+
+impl From<YuvImageKey> for YuvImageTemplate {
+    fn from(image: YuvImageKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(image.common);
+
+        YuvImageTemplate {
+            common,
+            kind: image.kind.into(),
+        }
+    }
+}
+
+pub type YuvImageDataHandle = InternHandle<YuvImage>;
+
+impl Internable for YuvImage {
+    type Key = YuvImageKey;
+    type StoreData = YuvImageTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_YUV_IMAGES;
+}
+
+impl InternablePrimitive for YuvImage {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> YuvImageKey {
+        YuvImageKey::new(info, self)
+    }
+
+    fn make_instance_kind(
+        _key: YuvImageKey,
+        data_handle: YuvImageDataHandle,
+        _prim_store: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        PrimitiveInstanceKind::YuvImage {
+            data_handle,
+            segment_instance_index: SegmentInstanceIndex::INVALID,
+            is_compositor_surface: false,
+        }
+    }
+}
+
+impl IsVisible for YuvImage {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
+
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_struct_sizes() {
+    use std::mem;
+    // The sizes of these structures are critical for performance on a number of
+    // talos stress tests. If you get a failure here on CI, there's two possibilities:
+    // (a) You made a structure smaller than it currently is. Great work! Update the
+    //     test expectations and move on.
+    // (b) You made a structure larger. This is not necessarily a problem, but should only
+    //     be done with care, and after checking if talos performance regresses badly.
+    assert_eq!(mem::size_of::<Image>(), 32, "Image size changed");
+    assert_eq!(mem::size_of::<ImageTemplate>(), 72, "ImageTemplate size changed");
+    assert_eq!(mem::size_of::<ImageKey>(), 52, "ImageKey size changed");
+    assert_eq!(mem::size_of::<YuvImage>(), 32, "YuvImage size changed");
+    assert_eq!(mem::size_of::<YuvImageTemplate>(), 84, "YuvImageTemplate size changed");
+    assert_eq!(mem::size_of::<YuvImageKey>(), 52, "YuvImageKey size changed");
+}
diff --git a/gfx/wr/webrender/src/prim_store/interned.rs b/gfx/wr/webrender/src/prim_store/interned.rs
new file mode 100644
index 0000000000..f536b50b09
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/interned.rs
@@ -0,0 +1,14 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// list of all interned primitives to match enumerate_interners!
+
+pub use crate::prim_store::backdrop::{BackdropCapture, BackdropRender};
+pub use crate::prim_store::borders::{ImageBorder, NormalBorderPrim};
+pub use crate::prim_store::image::{Image, YuvImage};
+pub use crate::prim_store::line_dec::{LineDecoration};
+pub use crate::prim_store::gradient::{LinearGradient, RadialGradient, ConicGradient};
+pub use crate::prim_store::picture::Picture;
+pub use crate::prim_store::text_run::TextRun;
+
diff --git a/gfx/wr/webrender/src/prim_store/line_dec.rs b/gfx/wr/webrender/src/prim_store/line_dec.rs
new file mode 100644
index 0000000000..496bab7569
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/line_dec.rs
@@ -0,0 +1,257 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{
+    ColorF, ColorU, RasterSpace,
+    LineOrientation, LineStyle, PremultipliedColorF, Shadow,
+};
+use api::units::*;
+use crate::scene_building::{CreateShadow, IsVisible};
+use crate::frame_builder::{FrameBuildingState};
+use crate::gpu_cache::GpuDataRequest;
+use crate::intern;
+use crate::internal_types::LayoutPrimitiveInfo;
+use crate::prim_store::{
+    PrimKey, PrimTemplate, PrimTemplateCommonData,
+    InternablePrimitive, PrimitiveStore,
+};
+use crate::prim_store::PrimitiveInstanceKind;
+
+/// Maximum resolution in device pixels at which line decorations are rasterized.
+pub const MAX_LINE_DECORATION_RESOLUTION: u32 = 4096;
+
+#[derive(Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct LineDecorationCacheKey {
+    pub style: LineStyle,
+    pub orientation: LineOrientation,
+    pub wavy_line_thickness: Au,
+    pub size: LayoutSizeAu,
+}
+
+/// Identifying key for a line decoration.
+#[derive(Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct LineDecoration {
+    // If the cache_key is Some(..) it is a line decoration
+    // that relies on a render task (e.g. wavy). If the
+    // cache key is None, it uses a fast path to draw the
+    // line decoration as a solid rect.
+    pub cache_key: Option<LineDecorationCacheKey>,
+    pub color: ColorU,
+}
+
+pub type LineDecorationKey = PrimKey<LineDecoration>;
+
+impl LineDecorationKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        line_dec: LineDecoration,
+    ) -> Self {
+        LineDecorationKey {
+            common: info.into(),
+            kind: line_dec,
+        }
+    }
+}
+
+impl intern::InternDebug for LineDecorationKey {}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct LineDecorationData {
+    pub cache_key: Option<LineDecorationCacheKey>,
+    pub color: ColorF,
+}
+
+impl LineDecorationData {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        common: &mut PrimTemplateCommonData,
+        frame_state: &mut FrameBuildingState,
+    ) {
+        if let Some(ref mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) {
+            self.write_prim_gpu_blocks(request);
+        }
+    }
+
+    fn write_prim_gpu_blocks(
+        &self,
+        request: &mut GpuDataRequest
+    ) {
+        match self.cache_key.as_ref() {
+            Some(cache_key) => {
+                request.push(self.color.premultiplied());
+                request.push(PremultipliedColorF::WHITE);
+                request.push([
+                    cache_key.size.width.to_f32_px(),
+                    cache_key.size.height.to_f32_px(),
+                    0.0,
+                    0.0,
+                ]);
+            }
+            None => {
+                request.push(self.color.premultiplied());
+            }
+        }
+    }
+}
+
+pub type LineDecorationTemplate = PrimTemplate<LineDecorationData>;
+
+impl From<LineDecorationKey> for LineDecorationTemplate {
+    fn from(line_dec: LineDecorationKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(line_dec.common);
+        LineDecorationTemplate {
+            common,
+            kind: LineDecorationData {
+                cache_key: line_dec.kind.cache_key,
+                color: line_dec.kind.color.into(),
+            }
+        }
+    }
+}
+
+pub type LineDecorationDataHandle = intern::Handle<LineDecoration>;
+
+impl intern::Internable for LineDecoration {
+    type Key = LineDecorationKey;
+    type StoreData = LineDecorationTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_LINE_DECORATIONS;
+}
+
+impl InternablePrimitive for LineDecoration {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> LineDecorationKey {
+        LineDecorationKey::new(
+            info,
+            self,
+        )
+    }
+
+    fn make_instance_kind(
+        _key: LineDecorationKey,
+        data_handle: LineDecorationDataHandle,
+        _: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        PrimitiveInstanceKind::LineDecoration {
+            data_handle,
+            render_task: None,
+        }
+    }
+}
+
+impl CreateShadow for LineDecoration {
+    fn create_shadow(
+        &self,
+        shadow: &Shadow,
+        _: bool,
+        _: RasterSpace,
+    ) -> Self {
+        LineDecoration {
+            color: shadow.color.into(),
+            cache_key: self.cache_key.clone(),
+        }
+    }
+}
+
+impl IsVisible for LineDecoration {
+    fn is_visible(&self) -> bool {
+        self.color.a > 0
+    }
+}
+
+/// Choose the decoration mask tile size for a given line.
+///
+/// Given a line with overall size `rect_size` and the given `orientation`,
+/// return the dimensions of a single mask tile for the decoration pattern
+/// described by `style` and `wavy_line_thickness`.
+///
+/// If `style` is `Solid`, no mask tile is necessary; return `None`. The other
+/// styles each have their own characteristic periods of repetition, so for each
+/// one, this function returns a `LayoutSize` with the right aspect ratio and
+/// whose specific size is convenient for the `cs_line_decoration.glsl` fragment
+/// shader to work with. The shader uses a local coordinate space in which the
+/// tile fills a rectangle with one corner at the origin, and with the size this
+/// function returns.
+///
+/// The returned size is not necessarily in pixels; device scaling and other
+/// concerns can still affect the actual task size.
+///
+/// Regardless of whether `orientation` is `Vertical` or `Horizontal`, the
+/// `width` and `height` of the returned size are always horizontal and
+/// vertical, respectively.
+pub fn get_line_decoration_size(
+    rect_size: &LayoutSize,
+    orientation: LineOrientation,
+    style: LineStyle,
+    wavy_line_thickness: f32,
+) -> Option<LayoutSize> {
+    let h = match orientation {
+        LineOrientation::Horizontal => rect_size.height,
+        LineOrientation::Vertical => rect_size.width,
+    };
+
+    // TODO(gw): The formulae below are based on the existing gecko and line
+    //           shader code. They give reasonable results for most inputs,
+    //           but could definitely do with a detailed pass to get better
+    //           quality on a wider range of inputs!
+    //           See nsCSSRendering::PaintDecorationLine in Gecko.
+
+    let (parallel, perpendicular) = match style {
+        LineStyle::Solid => {
+            return None;
+        }
+        LineStyle::Dashed => {
+            let dash_length = (3.0 * h).min(64.0).max(1.0);
+
+            (2.0 * dash_length, 4.0)
+        }
+        LineStyle::Dotted => {
+            let diameter = h.min(64.0).max(1.0);
+            let period = 2.0 * diameter;
+
+            (period, diameter)
+        }
+        LineStyle::Wavy => {
+            let line_thickness = wavy_line_thickness.max(1.0);
+            let slope_length = h - line_thickness;
+            let flat_length = ((line_thickness - 1.0) * 2.0).max(1.0);
+            let approx_period = 2.0 * (slope_length + flat_length);
+
+            (approx_period, h)
+        }
+    };
+
+    Some(match orientation {
+        LineOrientation::Horizontal => LayoutSize::new(parallel, perpendicular),
+        LineOrientation::Vertical => LayoutSize::new(perpendicular, parallel),
+    })
+}
+
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_struct_sizes() {
+    use std::mem;
+    // The sizes of these structures are critical for performance on a number of
+    // talos stress tests. If you get a failure here on CI, there's two possibilities:
+    // (a) You made a structure smaller than it currently is. Great work! Update the
+    //     test expectations and move on.
+    // (b) You made a structure larger. This is not necessarily a problem, but should only
+    //     be done with care, and after checking if talos performance regresses badly.
+    assert_eq!(mem::size_of::<LineDecoration>(), 20, "LineDecoration size changed");
+    assert_eq!(mem::size_of::<LineDecorationTemplate>(), 60, "LineDecorationTemplate size changed");
+    assert_eq!(mem::size_of::<LineDecorationKey>(), 40, "LineDecorationKey size changed");
+}
diff --git a/gfx/wr/webrender/src/prim_store/mod.rs b/gfx/wr/webrender/src/prim_store/mod.rs
new file mode 100644
index 0000000000..7deecac93c
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/mod.rs
@@ -0,0 +1,1448 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{BorderRadius, ClipMode, ColorF, ColorU, RasterSpace};
+use api::{ImageRendering, RepeatMode, PrimitiveFlags};
+use api::{PremultipliedColorF, PropertyBinding, Shadow};
+use api::{PrimitiveKeyKind, FillRule, POLYGON_CLIP_VERTEX_MAX};
+use api::units::*;
+use euclid::{SideOffsets2D, Size2D};
+use malloc_size_of::MallocSizeOf;
+use crate::clip::ClipLeafId;
+use crate::segment::EdgeAaSegmentMask;
+use crate::border::BorderSegmentCacheKey;
+use crate::debug_item::{DebugItem, DebugMessage};
+use crate::debug_colors;
+use crate::scene_building::{CreateShadow, IsVisible};
+use crate::frame_builder::FrameBuildingState;
+use glyph_rasterizer::GlyphKey;
+use crate::gpu_cache::{GpuCacheAddress, GpuCacheHandle, GpuDataRequest};
+use crate::gpu_types::{BrushFlags};
+use crate::intern;
+use crate::picture::PicturePrimitive;
+use crate::render_task_graph::RenderTaskId;
+use crate::resource_cache::ImageProperties;
+use crate::scene::SceneProperties;
+use std::{hash, ops, u32, usize};
+use crate::util::Recycler;
+use crate::internal_types::{FastHashSet, LayoutPrimitiveInfo};
+use crate::visibility::PrimitiveVisibility;
+
+pub mod backdrop;
+pub mod borders;
+pub mod gradient;
+pub mod image;
+pub mod line_dec;
+pub mod picture;
+pub mod text_run;
+pub mod interned;
+
+mod storage;
+
+use backdrop::{BackdropCaptureDataHandle, BackdropRenderDataHandle};
+use borders::{ImageBorderDataHandle, NormalBorderDataHandle};
+use gradient::{LinearGradientPrimitive, LinearGradientDataHandle, RadialGradientDataHandle, ConicGradientDataHandle};
+use image::{ImageDataHandle, ImageInstance, YuvImageDataHandle};
+use line_dec::LineDecorationDataHandle;
+use picture::PictureDataHandle;
+use text_run::{TextRunDataHandle, TextRunPrimitive};
+
+pub const VECS_PER_SEGMENT: usize = 2;
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, MallocSizeOf)]
+pub struct PrimitiveOpacity {
+    pub is_opaque: bool,
+}
+
+impl PrimitiveOpacity {
+    pub fn opaque() -> PrimitiveOpacity {
+        PrimitiveOpacity { is_opaque: true }
+    }
+
+    pub fn translucent() -> PrimitiveOpacity {
+        PrimitiveOpacity { is_opaque: false }
+    }
+
+    pub fn from_alpha(alpha: f32) -> PrimitiveOpacity {
+        PrimitiveOpacity {
+            is_opaque: alpha >= 1.0,
+        }
+    }
+}
+
+/// For external images, it's not possible to know the
+/// UV coords of the image (or the image data itself)
+/// until the render thread receives the frame and issues
+/// callbacks to the client application. For external
+/// images that are visible, a DeferredResolve is created
+/// that is stored in the frame. This allows the render
+/// thread to iterate this list and update any changed
+/// texture data and update the UV rect. Any filtering
+/// is handled externally for NativeTexture external
+/// images.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct DeferredResolve {
+    pub address: GpuCacheAddress,
+    pub image_properties: ImageProperties,
+    pub rendering: ImageRendering,
+}
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct ClipTaskIndex(pub u32);
+
+impl ClipTaskIndex {
+    pub const INVALID: ClipTaskIndex = ClipTaskIndex(0);
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, MallocSizeOf, Ord, PartialOrd)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PictureIndex(pub usize);
+
+impl PictureIndex {
+    pub const INVALID: PictureIndex = PictureIndex(!0);
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Copy, Debug, Clone, MallocSizeOf, PartialEq)]
+pub struct RectangleKey {
+    pub x0: f32,
+    pub y0: f32,
+    pub x1: f32,
+    pub y1: f32,
+}
+
+impl RectangleKey {
+    pub fn intersects(&self, other: &Self) -> bool {
+        self.x0 < other.x1
+            && other.x0 < self.x1
+            && self.y0 < other.y1
+            && other.y0 < self.y1
+    }
+}
+
+impl Eq for RectangleKey {}
+
+impl hash::Hash for RectangleKey {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.x0.to_bits().hash(state);
+        self.y0.to_bits().hash(state);
+        self.x1.to_bits().hash(state);
+        self.y1.to_bits().hash(state);
+    }
+}
+
+impl From<RectangleKey> for LayoutRect {
+    fn from(key: RectangleKey) -> LayoutRect {
+        LayoutRect {
+            min: LayoutPoint::new(key.x0, key.y0),
+            max: LayoutPoint::new(key.x1, key.y1),
+        }
+    }
+}
+
+impl From<RectangleKey> for WorldRect {
+    fn from(key: RectangleKey) -> WorldRect {
+        WorldRect {
+            min: WorldPoint::new(key.x0, key.y0),
+            max: WorldPoint::new(key.x1, key.y1),
+        }
+    }
+}
+
+impl From<LayoutRect> for RectangleKey {
+    fn from(rect: LayoutRect) -> RectangleKey {
+        RectangleKey {
+            x0: rect.min.x,
+            y0: rect.min.y,
+            x1: rect.max.x,
+            y1: rect.max.y,
+        }
+    }
+}
+
+impl From<PictureRect> for RectangleKey {
+    fn from(rect: PictureRect) -> RectangleKey {
+        RectangleKey {
+            x0: rect.min.x,
+            y0: rect.min.y,
+            x1: rect.max.x,
+            y1: rect.max.y,
+        }
+    }
+}
+
+impl From<WorldRect> for RectangleKey {
+    fn from(rect: WorldRect) -> RectangleKey {
+        RectangleKey {
+            x0: rect.min.x,
+            y0: rect.min.y,
+            x1: rect.max.x,
+            y1: rect.max.y,
+        }
+    }
+}
+
+/// To create a fixed-size representation of a polygon, we use a fixed
+/// number of points. Our initialization method restricts us to values
+/// <= 32. If our constant POLYGON_CLIP_VERTEX_MAX is > 32, the Rust
+/// compiler will complain.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Copy, Debug, Clone, Hash, MallocSizeOf, PartialEq)]
+pub struct PolygonKey {
+    pub point_count: u8,
+    pub points: [PointKey; POLYGON_CLIP_VERTEX_MAX],
+    pub fill_rule: FillRule,
+}
+
+impl PolygonKey {
+    pub fn new(
+        points_layout: &Vec<LayoutPoint>,
+        fill_rule: FillRule,
+    ) -> Self {
+        // We have to fill fixed-size arrays with data from a Vec.
+        // We'll do this by initializing the arrays to known-good
+        // values then overwriting those values as long as our
+        // iterator provides values.
+        let mut points: [PointKey; POLYGON_CLIP_VERTEX_MAX] = [PointKey { x: 0.0, y: 0.0}; POLYGON_CLIP_VERTEX_MAX];
+
+        let mut point_count: u8 = 0;
+        for (src, dest) in points_layout.iter().zip(points.iter_mut()) {
+            *dest = (*src as LayoutPoint).into();
+            point_count = point_count + 1;
+        }
+
+        PolygonKey {
+            point_count,
+            points,
+            fill_rule,
+        }
+    }
+}
+
+impl Eq for PolygonKey {}
+
+/// A hashable SideOffset2D that can be used in primitive keys.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, MallocSizeOf, PartialEq)]
+pub struct SideOffsetsKey {
+    pub top: f32,
+    pub right: f32,
+    pub bottom: f32,
+    pub left: f32,
+}
+
+impl Eq for SideOffsetsKey {}
+
+impl hash::Hash for SideOffsetsKey {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.top.to_bits().hash(state);
+        self.right.to_bits().hash(state);
+        self.bottom.to_bits().hash(state);
+        self.left.to_bits().hash(state);
+    }
+}
+
+impl From<SideOffsetsKey> for LayoutSideOffsets {
+    fn from(key: SideOffsetsKey) -> LayoutSideOffsets {
+        LayoutSideOffsets::new(
+            key.top,
+            key.right,
+            key.bottom,
+            key.left,
+        )
+    }
+}
+
+impl<U> From<SideOffsets2D<f32, U>> for SideOffsetsKey {
+    fn from(offsets: SideOffsets2D<f32, U>) -> SideOffsetsKey {
+        SideOffsetsKey {
+            top: offsets.top,
+            right: offsets.right,
+            bottom: offsets.bottom,
+            left: offsets.left,
+        }
+    }
+}
+
+/// A hashable size for using as a key during primitive interning.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Copy, Debug, Clone, MallocSizeOf, PartialEq)]
+pub struct SizeKey {
+    w: f32,
+    h: f32,
+}
+
+impl Eq for SizeKey {}
+
+impl hash::Hash for SizeKey {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.w.to_bits().hash(state);
+        self.h.to_bits().hash(state);
+    }
+}
+
+impl From<SizeKey> for LayoutSize {
+    fn from(key: SizeKey) -> LayoutSize {
+        LayoutSize::new(key.w, key.h)
+    }
+}
+
+impl<U> From<Size2D<f32, U>> for SizeKey {
+    fn from(size: Size2D<f32, U>) -> SizeKey {
+        SizeKey {
+            w: size.width,
+            h: size.height,
+        }
+    }
+}
+
+/// A hashable vec for using as a key during primitive interning.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Copy, Debug, Clone, MallocSizeOf, PartialEq)]
+pub struct VectorKey {
+    pub x: f32,
+    pub y: f32,
+}
+
+impl Eq for VectorKey {}
+
+impl hash::Hash for VectorKey {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.x.to_bits().hash(state);
+        self.y.to_bits().hash(state);
+    }
+}
+
+impl From<VectorKey> for LayoutVector2D {
+    fn from(key: VectorKey) -> LayoutVector2D {
+        LayoutVector2D::new(key.x, key.y)
+    }
+}
+
+impl From<VectorKey> for WorldVector2D {
+    fn from(key: VectorKey) -> WorldVector2D {
+        WorldVector2D::new(key.x, key.y)
+    }
+}
+
+impl From<LayoutVector2D> for VectorKey {
+    fn from(vec: LayoutVector2D) -> VectorKey {
+        VectorKey {
+            x: vec.x,
+            y: vec.y,
+        }
+    }
+}
+
+impl From<WorldVector2D> for VectorKey {
+    fn from(vec: WorldVector2D) -> VectorKey {
+        VectorKey {
+            x: vec.x,
+            y: vec.y,
+        }
+    }
+}
+
+/// A hashable point for using as a key during primitive interning.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, MallocSizeOf, PartialEq)]
+pub struct PointKey {
+    pub x: f32,
+    pub y: f32,
+}
+
+impl Eq for PointKey {}
+
+impl hash::Hash for PointKey {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.x.to_bits().hash(state);
+        self.y.to_bits().hash(state);
+    }
+}
+
+impl From<PointKey> for LayoutPoint {
+    fn from(key: PointKey) -> LayoutPoint {
+        LayoutPoint::new(key.x, key.y)
+    }
+}
+
+impl From<LayoutPoint> for PointKey {
+    fn from(p: LayoutPoint) -> PointKey {
+        PointKey {
+            x: p.x,
+            y: p.y,
+        }
+    }
+}
+
+impl From<PicturePoint> for PointKey {
+    fn from(p: PicturePoint) -> PointKey {
+        PointKey {
+            x: p.x,
+            y: p.y,
+        }
+    }
+}
+
+impl From<WorldPoint> for PointKey {
+    fn from(p: WorldPoint) -> PointKey {
+        PointKey {
+            x: p.x,
+            y: p.y,
+        }
+    }
+}
+
+/// A hashable float for using as a key during primitive interning.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, MallocSizeOf, PartialEq)]
+pub struct FloatKey(f32);
+
+impl Eq for FloatKey {}
+
+impl hash::Hash for FloatKey {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.0.to_bits().hash(state);
+    }
+}
+
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+pub struct PrimKeyCommonData {
+    pub flags: PrimitiveFlags,
+    pub prim_rect: RectangleKey,
+}
+
+impl From<&LayoutPrimitiveInfo> for PrimKeyCommonData {
+    fn from(info: &LayoutPrimitiveInfo) -> Self {
+        PrimKeyCommonData {
+            flags: info.flags,
+            prim_rect: info.rect.into(),
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+pub struct PrimKey<T: MallocSizeOf> {
+    pub common: PrimKeyCommonData,
+    pub kind: T,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+pub struct PrimitiveKey {
+    pub common: PrimKeyCommonData,
+    pub kind: PrimitiveKeyKind,
+}
+
+impl PrimitiveKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        kind: PrimitiveKeyKind,
+    ) -> Self {
+        PrimitiveKey {
+            common: info.into(),
+            kind,
+        }
+    }
+}
+
+impl intern::InternDebug for PrimitiveKey {}
+
+/// The shared information for a given primitive. This is interned and retained
+/// both across frames and display lists, by comparing the matching PrimitiveKey.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub enum PrimitiveTemplateKind {
+    Rectangle {
+        color: PropertyBinding<ColorF>,
+    },
+    Clear,
+}
+
+impl PrimitiveTemplateKind {
+    /// Write any GPU blocks for the primitive template to the given request object.
+    pub fn write_prim_gpu_blocks(
+        &self,
+        request: &mut GpuDataRequest,
+        scene_properties: &SceneProperties,
+    ) {
+        match *self {
+            PrimitiveTemplateKind::Clear => {
+                // Opaque black with operator dest out
+                request.push(PremultipliedColorF::BLACK);
+            }
+            PrimitiveTemplateKind::Rectangle { ref color, .. } => {
+                request.push(scene_properties.resolve_color(color).premultiplied())
+            }
+        }
+    }
+}
+
+/// Construct the primitive template data from a primitive key. This
+/// is invoked when a primitive key is created and the interner
+/// doesn't currently contain a primitive with this key.
+impl From<PrimitiveKeyKind> for PrimitiveTemplateKind {
+    fn from(kind: PrimitiveKeyKind) -> Self {
+        match kind {
+            PrimitiveKeyKind::Clear => {
+                PrimitiveTemplateKind::Clear
+            }
+            PrimitiveKeyKind::Rectangle { color, .. } => {
+                PrimitiveTemplateKind::Rectangle {
+                    color: color.into(),
+                }
+            }
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+#[derive(Debug)]
+pub struct PrimTemplateCommonData {
+    pub flags: PrimitiveFlags,
+    pub may_need_repetition: bool,
+    pub prim_rect: LayoutRect,
+    pub opacity: PrimitiveOpacity,
+    /// The GPU cache handle for a primitive template. Since this structure
+    /// is retained across display lists by interning, this GPU cache handle
+    /// also remains valid, which reduces the number of updates to the GPU
+    /// cache when a new display list is processed.
+    pub gpu_cache_handle: GpuCacheHandle,
+    /// Specifies the edges that are *allowed* to have anti-aliasing.
+    /// In other words EdgeAaSegmentFlags::all() does not necessarily mean all edges will
+    /// be anti-aliased, only that they could be.
+    ///
+    /// Use this to force disable anti-alasing on edges of the primitives.
+    pub edge_aa_mask: EdgeAaSegmentMask,
+}
+
+impl PrimTemplateCommonData {
+    pub fn with_key_common(common: PrimKeyCommonData) -> Self {
+        PrimTemplateCommonData {
+            flags: common.flags,
+            may_need_repetition: true,
+            prim_rect: common.prim_rect.into(),
+            gpu_cache_handle: GpuCacheHandle::new(),
+            opacity: PrimitiveOpacity::translucent(),
+            edge_aa_mask: EdgeAaSegmentMask::all(),
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct PrimTemplate<T> {
+    pub common: PrimTemplateCommonData,
+    pub kind: T,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct PrimitiveTemplate {
+    pub common: PrimTemplateCommonData,
+    pub kind: PrimitiveTemplateKind,
+}
+
+impl ops::Deref for PrimitiveTemplate {
+    type Target = PrimTemplateCommonData;
+    fn deref(&self) -> &Self::Target {
+        &self.common
+    }
+}
+
+impl ops::DerefMut for PrimitiveTemplate {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.common
+    }
+}
+
+impl From<PrimitiveKey> for PrimitiveTemplate {
+    fn from(item: PrimitiveKey) -> Self {
+        PrimitiveTemplate {
+            common: PrimTemplateCommonData::with_key_common(item.common),
+            kind: item.kind.into(),
+        }
+    }
+}
+
+impl PrimitiveTemplate {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        frame_state: &mut FrameBuildingState,
+        scene_properties: &SceneProperties,
+    ) {
+        if let Some(mut request) = frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) {
+            self.kind.write_prim_gpu_blocks(&mut request, scene_properties);
+        }
+
+        self.opacity = match self.kind {
+            PrimitiveTemplateKind::Clear => {
+                PrimitiveOpacity::translucent()
+            }
+            PrimitiveTemplateKind::Rectangle { ref color, .. } => {
+                PrimitiveOpacity::from_alpha(scene_properties.resolve_color(color).a)
+            }
+        };
+    }
+}
+
+type PrimitiveDataHandle = intern::Handle<PrimitiveKeyKind>;
+
+impl intern::Internable for PrimitiveKeyKind {
+    type Key = PrimitiveKey;
+    type StoreData = PrimitiveTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_PRIMITIVES;
+}
+
+impl InternablePrimitive for PrimitiveKeyKind {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> PrimitiveKey {
+        PrimitiveKey::new(info, self)
+    }
+
+    fn make_instance_kind(
+        key: PrimitiveKey,
+        data_handle: PrimitiveDataHandle,
+        prim_store: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        match key.kind {
+            PrimitiveKeyKind::Clear => {
+                PrimitiveInstanceKind::Clear {
+                    data_handle
+                }
+            }
+            PrimitiveKeyKind::Rectangle { color, .. } => {
+                let color_binding_index = match color {
+                    PropertyBinding::Binding(..) => {
+                        prim_store.color_bindings.push(color)
+                    }
+                    PropertyBinding::Value(..) => ColorBindingIndex::INVALID,
+                };
+                PrimitiveInstanceKind::Rectangle {
+                    data_handle,
+                    segment_instance_index: SegmentInstanceIndex::INVALID,
+                    color_binding_index,
+                }
+            }
+        }
+    }
+}
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct VisibleMaskImageTile {
+    pub tile_offset: TileOffset,
+    pub tile_rect: LayoutRect,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct VisibleGradientTile {
+    pub handle: GpuCacheHandle,
+    pub local_rect: LayoutRect,
+    pub local_clip_rect: LayoutRect,
+}
+
+/// Information about how to cache a border segment,
+/// along with the current render task cache entry.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, MallocSizeOf)]
+pub struct BorderSegmentInfo {
+    pub local_task_size: LayoutSize,
+    pub cache_key: BorderSegmentCacheKey,
+}
+
+/// Represents the visibility state of a segment (wrt clip masks).
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[derive(Debug, Clone)]
+pub enum ClipMaskKind {
+    /// The segment has a clip mask, specified by the render task.
+    Mask(RenderTaskId),
+    /// The segment has no clip mask.
+    None,
+    /// The segment is made invisible / clipped completely.
+    Clipped,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, MallocSizeOf)]
+pub struct BrushSegment {
+    pub local_rect: LayoutRect,
+    pub may_need_clip_mask: bool,
+    pub edge_flags: EdgeAaSegmentMask,
+    pub extra_data: [f32; 4],
+    pub brush_flags: BrushFlags,
+}
+
+impl BrushSegment {
+    pub fn new(
+        local_rect: LayoutRect,
+        may_need_clip_mask: bool,
+        edge_flags: EdgeAaSegmentMask,
+        extra_data: [f32; 4],
+        brush_flags: BrushFlags,
+    ) -> Self {
+        Self {
+            local_rect,
+            may_need_clip_mask,
+            edge_flags,
+            extra_data,
+            brush_flags,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct ClipRect {
+    rect: LayoutRect,
+    mode: f32,
+}
+
+#[derive(Debug, Clone)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct ClipCorner {
+    rect: LayoutRect,
+    outer_radius_x: f32,
+    outer_radius_y: f32,
+    inner_radius_x: f32,
+    inner_radius_y: f32,
+}
+
+impl ClipCorner {
+    fn uniform(rect: LayoutRect, outer_radius: f32, inner_radius: f32) -> ClipCorner {
+        ClipCorner {
+            rect,
+            outer_radius_x: outer_radius,
+            outer_radius_y: outer_radius,
+            inner_radius_x: inner_radius,
+            inner_radius_y: inner_radius,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipData {
+    rect: ClipRect,
+    top_left: ClipCorner,
+    top_right: ClipCorner,
+    bottom_left: ClipCorner,
+    bottom_right: ClipCorner,
+}
+
+impl ClipData {
+    pub fn rounded_rect(size: LayoutSize, radii: &BorderRadius, mode: ClipMode) -> ClipData {
+        // TODO(gw): For simplicity, keep most of the clip GPU structs the
+        //           same as they were, even though the origin is now always
+        //           zero, since they are in the clip's local space. In future,
+        //           we could reduce the GPU cache size of ClipData.
+        let rect = LayoutRect::from_size(size);
+
+        ClipData {
+            rect: ClipRect {
+                rect,
+                mode: mode as u32 as f32,
+            },
+            top_left: ClipCorner {
+                rect: LayoutRect::from_origin_and_size(
+                    LayoutPoint::new(rect.min.x, rect.min.y),
+                    LayoutSize::new(radii.top_left.width, radii.top_left.height),
+                ),
+                outer_radius_x: radii.top_left.width,
+                outer_radius_y: radii.top_left.height,
+                inner_radius_x: 0.0,
+                inner_radius_y: 0.0,
+            },
+            top_right: ClipCorner {
+                rect: LayoutRect::from_origin_and_size(
+                    LayoutPoint::new(
+                        rect.max.x - radii.top_right.width,
+                        rect.min.y,
+                    ),
+                    LayoutSize::new(radii.top_right.width, radii.top_right.height),
+                ),
+                outer_radius_x: radii.top_right.width,
+                outer_radius_y: radii.top_right.height,
+                inner_radius_x: 0.0,
+                inner_radius_y: 0.0,
+            },
+            bottom_left: ClipCorner {
+                rect: LayoutRect::from_origin_and_size(
+                    LayoutPoint::new(
+                        rect.min.x,
+                        rect.max.y - radii.bottom_left.height,
+                    ),
+                    LayoutSize::new(radii.bottom_left.width, radii.bottom_left.height),
+                ),
+                outer_radius_x: radii.bottom_left.width,
+                outer_radius_y: radii.bottom_left.height,
+                inner_radius_x: 0.0,
+                inner_radius_y: 0.0,
+            },
+            bottom_right: ClipCorner {
+                rect: LayoutRect::from_origin_and_size(
+                    LayoutPoint::new(
+                        rect.max.x - radii.bottom_right.width,
+                        rect.max.y - radii.bottom_right.height,
+                    ),
+                    LayoutSize::new(radii.bottom_right.width, radii.bottom_right.height),
+                ),
+                outer_radius_x: radii.bottom_right.width,
+                outer_radius_y: radii.bottom_right.height,
+                inner_radius_x: 0.0,
+                inner_radius_y: 0.0,
+            },
+        }
+    }
+
+    pub fn uniform(size: LayoutSize, radius: f32, mode: ClipMode) -> ClipData {
+        // TODO(gw): For simplicity, keep most of the clip GPU structs the
+        //           same as they were, even though the origin is now always
+        //           zero, since they are in the clip's local space. In future,
+        //           we could reduce the GPU cache size of ClipData.
+        let rect = LayoutRect::from_size(size);
+
+        ClipData {
+            rect: ClipRect {
+                rect,
+                mode: mode as u32 as f32,
+            },
+            top_left: ClipCorner::uniform(
+                LayoutRect::from_origin_and_size(
+                    LayoutPoint::new(rect.min.x, rect.min.y),
+                    LayoutSize::new(radius, radius),
+                ),
+                radius,
+                0.0,
+            ),
+            top_right: ClipCorner::uniform(
+                LayoutRect::from_origin_and_size(
+                    LayoutPoint::new(rect.max.x - radius, rect.min.y),
+                    LayoutSize::new(radius, radius),
+                ),
+                radius,
+                0.0,
+            ),
+            bottom_left: ClipCorner::uniform(
+                LayoutRect::from_origin_and_size(
+                    LayoutPoint::new(rect.min.x, rect.max.y - radius),
+                    LayoutSize::new(radius, radius),
+                ),
+                radius,
+                0.0,
+            ),
+            bottom_right: ClipCorner::uniform(
+                LayoutRect::from_origin_and_size(
+                    LayoutPoint::new(
+                        rect.max.x - radius,
+                        rect.max.y - radius,
+                    ),
+                    LayoutSize::new(radius, radius),
+                ),
+                radius,
+                0.0,
+            ),
+        }
+    }
+}
+
+/// A hashable descriptor for nine-patches, used by image and
+/// gradient borders.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct NinePatchDescriptor {
+    pub width: i32,
+    pub height: i32,
+    pub slice: DeviceIntSideOffsets,
+    pub fill: bool,
+    pub repeat_horizontal: RepeatMode,
+    pub repeat_vertical: RepeatMode,
+    pub outset: SideOffsetsKey,
+    pub widths: SideOffsetsKey,
+}
+
+impl IsVisible for PrimitiveKeyKind {
+    // Return true if the primary primitive is visible.
+    // Used to trivially reject non-visible primitives.
+    // TODO(gw): Currently, primitives other than those
+    //           listed here are handled before the
+    //           add_primitive() call. In the future
+    //           we should move the logic for all other
+    //           primitive types to use this.
+    fn is_visible(&self) -> bool {
+        match *self {
+            PrimitiveKeyKind::Clear => {
+                true
+            }
+            PrimitiveKeyKind::Rectangle { ref color, .. } => {
+                match *color {
+                    PropertyBinding::Value(value) => value.a > 0,
+                    PropertyBinding::Binding(..) => true,
+                }
+            }
+        }
+    }
+}
+
+impl CreateShadow for PrimitiveKeyKind {
+    // Create a clone of this PrimitiveContainer, applying whatever
+    // changes are necessary to the primitive to support rendering
+    // it as part of the supplied shadow.
+    fn create_shadow(
+        &self,
+        shadow: &Shadow,
+        _: bool,
+        _: RasterSpace,
+    ) -> PrimitiveKeyKind {
+        match *self {
+            PrimitiveKeyKind::Rectangle { .. } => {
+                PrimitiveKeyKind::Rectangle {
+                    color: PropertyBinding::Value(shadow.color.into()),
+                }
+            }
+            PrimitiveKeyKind::Clear => {
+                panic!("bug: this prim is not supported in shadow contexts");
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub enum PrimitiveInstanceKind {
+    /// Direct reference to a Picture
+    Picture {
+        /// Handle to the common interned data for this primitive.
+        data_handle: PictureDataHandle,
+        pic_index: PictureIndex,
+        segment_instance_index: SegmentInstanceIndex,
+    },
+    /// A run of glyphs, with associated font parameters.
+    TextRun {
+        /// Handle to the common interned data for this primitive.
+        data_handle: TextRunDataHandle,
+        /// Index to the per instance scratch data for this primitive.
+        run_index: TextRunIndex,
+    },
+    /// A line decoration. cache_handle refers to a cached render
+    /// task handle, if this line decoration is not a simple solid.
+    LineDecoration {
+        /// Handle to the common interned data for this primitive.
+        data_handle: LineDecorationDataHandle,
+        // TODO(gw): For now, we need to store some information in
+        //           the primitive instance that is created during
+        //           prepare_prims and read during the batching pass.
+        //           Once we unify the prepare_prims and batching to
+        //           occur at the same time, we can remove most of
+        //           the things we store here in the instance, and
+        //           use them directly. This will remove cache_handle,
+        //           but also the opacity, clip_task_id etc below.
+        render_task: Option<RenderTaskId>,
+    },
+    NormalBorder {
+        /// Handle to the common interned data for this primitive.
+        data_handle: NormalBorderDataHandle,
+        render_task_ids: storage::Range<RenderTaskId>,
+    },
+    ImageBorder {
+        /// Handle to the common interned data for this primitive.
+        data_handle: ImageBorderDataHandle,
+    },
+    Rectangle {
+        /// Handle to the common interned data for this primitive.
+        data_handle: PrimitiveDataHandle,
+        segment_instance_index: SegmentInstanceIndex,
+        color_binding_index: ColorBindingIndex,
+    },
+    YuvImage {
+        /// Handle to the common interned data for this primitive.
+        data_handle: YuvImageDataHandle,
+        segment_instance_index: SegmentInstanceIndex,
+        is_compositor_surface: bool,
+    },
+    Image {
+        /// Handle to the common interned data for this primitive.
+        data_handle: ImageDataHandle,
+        image_instance_index: ImageInstanceIndex,
+        is_compositor_surface: bool,
+    },
+    /// Always rendered directly into the picture. This tends to be
+    /// faster with SWGL.
+    LinearGradient {
+        /// Handle to the common interned data for this primitive.
+        data_handle: LinearGradientDataHandle,
+        visible_tiles_range: GradientTileRange,
+    },
+    /// Always rendered via a cached render task. Usually faster with
+    /// a GPU.
+    CachedLinearGradient {
+        /// Handle to the common interned data for this primitive.
+        data_handle: LinearGradientDataHandle,
+        visible_tiles_range: GradientTileRange,
+    },
+    RadialGradient {
+        /// Handle to the common interned data for this primitive.
+        data_handle: RadialGradientDataHandle,
+        visible_tiles_range: GradientTileRange,
+    },
+    ConicGradient {
+        /// Handle to the common interned data for this primitive.
+        data_handle: ConicGradientDataHandle,
+        visible_tiles_range: GradientTileRange,
+    },
+    /// Clear out a rect, used for special effects.
+    Clear {
+        /// Handle to the common interned data for this primitive.
+        data_handle: PrimitiveDataHandle,
+    },
+    /// Render a portion of a specified backdrop.
+    BackdropCapture {
+        data_handle: BackdropCaptureDataHandle,
+    },
+    BackdropRender {
+        data_handle: BackdropRenderDataHandle,
+        pic_index: PictureIndex,
+    },
+}
+
+impl PrimitiveInstanceKind {
+    pub fn as_pic(&self) -> PictureIndex {
+        match self {
+            PrimitiveInstanceKind::Picture { pic_index, .. } => *pic_index,
+            _ => panic!("bug: as_pic called on a prim that is not a picture"),
+        }
+    }
+}
+
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveInstanceIndex(pub u32);
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct PrimitiveInstance {
+    /// Identifies the kind of primitive this
+    /// instance is, and references to where
+    /// the relevant information for the primitive
+    /// can be found.
+    pub kind: PrimitiveInstanceKind,
+
+    /// All information and state related to clip(s) for this primitive
+    pub clip_leaf_id: ClipLeafId,
+
+    /// Information related to the current visibility state of this
+    /// primitive.
+    // TODO(gw): Currently built each frame, but can be retained.
+    pub vis: PrimitiveVisibility,
+}
+
+impl PrimitiveInstance {
+    pub fn new(
+        kind: PrimitiveInstanceKind,
+        clip_leaf_id: ClipLeafId,
+    ) -> Self {
+        PrimitiveInstance {
+            kind,
+            vis: PrimitiveVisibility::new(),
+            clip_leaf_id,
+        }
+    }
+
+    // Reset any pre-frame state for this primitive.
+    pub fn reset(&mut self) {
+        self.vis.reset();
+    }
+
+    pub fn clear_visibility(&mut self) {
+        self.vis.reset();
+    }
+
+    pub fn uid(&self) -> intern::ItemUid {
+        match &self.kind {
+            PrimitiveInstanceKind::Clear { data_handle, .. } |
+            PrimitiveInstanceKind::Rectangle { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::Image { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::ImageBorder { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::LineDecoration { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::LinearGradient { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::CachedLinearGradient { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::NormalBorder { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::Picture { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::RadialGradient { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::ConicGradient { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::TextRun { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::YuvImage { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::BackdropCapture { data_handle, .. } => {
+                data_handle.uid()
+            }
+            PrimitiveInstanceKind::BackdropRender { data_handle, .. } => {
+                data_handle.uid()
+            }
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[derive(Debug)]
+pub struct SegmentedInstance {
+    pub gpu_cache_handle: GpuCacheHandle,
+    pub segments_range: SegmentsRange,
+}
+
+pub type GlyphKeyStorage = storage::Storage<GlyphKey>;
+pub type TextRunIndex = storage::Index<TextRunPrimitive>;
+pub type TextRunStorage = storage::Storage<TextRunPrimitive>;
+pub type ColorBindingIndex = storage::Index<PropertyBinding<ColorU>>;
+pub type ColorBindingStorage = storage::Storage<PropertyBinding<ColorU>>;
+pub type BorderHandleStorage = storage::Storage<RenderTaskId>;
+pub type SegmentStorage = storage::Storage<BrushSegment>;
+pub type SegmentsRange = storage::Range<BrushSegment>;
+pub type SegmentInstanceStorage = storage::Storage<SegmentedInstance>;
+pub type SegmentInstanceIndex = storage::Index<SegmentedInstance>;
+pub type ImageInstanceStorage = storage::Storage<ImageInstance>;
+pub type ImageInstanceIndex = storage::Index<ImageInstance>;
+pub type GradientTileStorage = storage::Storage<VisibleGradientTile>;
+pub type GradientTileRange = storage::Range<VisibleGradientTile>;
+pub type LinearGradientStorage = storage::Storage<LinearGradientPrimitive>;
+
+/// Contains various vecs of data that is used only during frame building,
+/// where we want to recycle the memory each new display list, to avoid constantly
+/// re-allocating and moving memory around. Written during primitive preparation,
+/// and read during batching.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct PrimitiveScratchBuffer {
+    /// Contains a list of clip mask instance parameters
+    /// per segment generated.
+    pub clip_mask_instances: Vec<ClipMaskKind>,
+
+    /// List of glyphs keys that are allocated by each
+    /// text run instance.
+    pub glyph_keys: GlyphKeyStorage,
+
+    /// List of render task handles for border segment instances
+    /// that have been added this frame.
+    pub border_cache_handles: BorderHandleStorage,
+
+    /// A list of brush segments that have been built for this scene.
+    pub segments: SegmentStorage,
+
+    /// A list of segment ranges and GPU cache handles for prim instances
+    /// that have opted into segment building. In future, this should be
+    /// removed in favor of segment building during primitive interning.
+    pub segment_instances: SegmentInstanceStorage,
+
+    /// A list of visible tiles that tiled gradients use to store
+    /// per-tile information.
+    pub gradient_tiles: GradientTileStorage,
+
+    /// List of debug display items for rendering.
+    pub debug_items: Vec<DebugItem>,
+
+    /// List of current debug messages to log on screen
+    messages: Vec<DebugMessage>,
+
+    /// Set of sub-graphs that are required, determined during visibility pass
+    pub required_sub_graphs: FastHashSet<PictureIndex>,
+}
+
+impl Default for PrimitiveScratchBuffer {
+    fn default() -> Self {
+        PrimitiveScratchBuffer {
+            clip_mask_instances: Vec::new(),
+            glyph_keys: GlyphKeyStorage::new(0),
+            border_cache_handles: BorderHandleStorage::new(0),
+            segments: SegmentStorage::new(0),
+            segment_instances: SegmentInstanceStorage::new(0),
+            gradient_tiles: GradientTileStorage::new(0),
+            debug_items: Vec::new(),
+            messages: Vec::new(),
+            required_sub_graphs: FastHashSet::default(),
+        }
+    }
+}
+
+impl PrimitiveScratchBuffer {
+    pub fn recycle(&mut self, recycler: &mut Recycler) {
+        recycler.recycle_vec(&mut self.clip_mask_instances);
+        self.glyph_keys.recycle(recycler);
+        self.border_cache_handles.recycle(recycler);
+        self.segments.recycle(recycler);
+        self.segment_instances.recycle(recycler);
+        self.gradient_tiles.recycle(recycler);
+        recycler.recycle_vec(&mut self.debug_items);
+    }
+
+    pub fn begin_frame(&mut self) {
+        // Clear the clip mask tasks for the beginning of the frame. Append
+        // a single kind representing no clip mask, at the ClipTaskIndex::INVALID
+        // location.
+        self.clip_mask_instances.clear();
+        self.clip_mask_instances.push(ClipMaskKind::None);
+
+        self.border_cache_handles.clear();
+
+        // TODO(gw): As in the previous code, the gradient tiles store GPU cache
+        //           handles that are cleared (and thus invalidated + re-uploaded)
+        //           every frame. This maintains the existing behavior, but we
+        //           should fix this in the future to retain handles.
+        self.gradient_tiles.clear();
+
+        self.required_sub_graphs.clear();
+
+        self.debug_items.clear();
+    }
+
+    pub fn end_frame(&mut self) {
+        const MSGS_TO_RETAIN: usize = 32;
+        const TIME_TO_RETAIN: u64 = 2000000000;
+        const LINE_HEIGHT: f32 = 20.0;
+        const X0: f32 = 32.0;
+        const Y0: f32 = 32.0;
+        let now = time::precise_time_ns();
+
+        let msgs_to_remove = self.messages.len().max(MSGS_TO_RETAIN) - MSGS_TO_RETAIN;
+        let mut msgs_removed = 0;
+
+        self.messages.retain(|msg| {
+            if msgs_removed < msgs_to_remove {
+                msgs_removed += 1;
+                return false;
+            }
+
+            if msg.timestamp + TIME_TO_RETAIN < now {
+                return false;
+            }
+
+            true
+        });
+
+        let mut y = Y0 + self.messages.len() as f32 * LINE_HEIGHT;
+        let shadow_offset = 1.0;
+
+        for msg in &self.messages {
+            self.debug_items.push(DebugItem::Text {
+                position: DevicePoint::new(X0 + shadow_offset, y + shadow_offset),
+                color: debug_colors::BLACK,
+                msg: msg.msg.clone(),
+            });
+
+            self.debug_items.push(DebugItem::Text {
+                position: DevicePoint::new(X0, y),
+                color: debug_colors::RED,
+                msg: msg.msg.clone(),
+            });
+
+            y -= LINE_HEIGHT;
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn push_debug_rect(
+        &mut self,
+        rect: DeviceRect,
+        outer_color: ColorF,
+        inner_color: ColorF,
+    ) {
+        self.debug_items.push(DebugItem::Rect {
+            rect,
+            outer_color,
+            inner_color,
+        });
+    }
+
+    #[allow(dead_code)]
+    pub fn push_debug_string(
+        &mut self,
+        position: DevicePoint,
+        color: ColorF,
+        msg: String,
+    ) {
+        self.debug_items.push(DebugItem::Text {
+            position,
+            color,
+            msg,
+        });
+    }
+
+    #[allow(dead_code)]
+    pub fn log(
+        &mut self,
+        msg: String,
+    ) {
+        self.messages.push(DebugMessage {
+            msg,
+            timestamp: time::precise_time_ns(),
+        })
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone, Debug)]
+pub struct PrimitiveStoreStats {
+    picture_count: usize,
+    text_run_count: usize,
+    image_count: usize,
+    linear_gradient_count: usize,
+    color_binding_count: usize,
+}
+
+impl PrimitiveStoreStats {
+    pub fn empty() -> Self {
+        PrimitiveStoreStats {
+            picture_count: 0,
+            text_run_count: 0,
+            image_count: 0,
+            linear_gradient_count: 0,
+            color_binding_count: 0,
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct PrimitiveStore {
+    pub pictures: Vec<PicturePrimitive>,
+    pub text_runs: TextRunStorage,
+    pub linear_gradients: LinearGradientStorage,
+
+    /// A list of image instances. These are stored separately as
+    /// storing them inline in the instance makes the structure bigger
+    /// for other types.
+    pub images: ImageInstanceStorage,
+
+    /// animated color bindings for this primitive.
+    pub color_bindings: ColorBindingStorage,
+}
+
+impl PrimitiveStore {
+    pub fn new(stats: &PrimitiveStoreStats) -> PrimitiveStore {
+        PrimitiveStore {
+            pictures: Vec::with_capacity(stats.picture_count),
+            text_runs: TextRunStorage::new(stats.text_run_count),
+            images: ImageInstanceStorage::new(stats.image_count),
+            color_bindings: ColorBindingStorage::new(stats.color_binding_count),
+            linear_gradients: LinearGradientStorage::new(stats.linear_gradient_count),
+        }
+    }
+
+    pub fn get_stats(&self) -> PrimitiveStoreStats {
+        PrimitiveStoreStats {
+            picture_count: self.pictures.len(),
+            text_run_count: self.text_runs.len(),
+            image_count: self.images.len(),
+            linear_gradient_count: self.linear_gradients.len(),
+            color_binding_count: self.color_bindings.len(),
+        }
+    }
+
+    #[allow(unused)]
+    pub fn print_picture_tree(&self, root: PictureIndex) {
+        use crate::print_tree::PrintTree;
+        let mut pt = PrintTree::new("picture tree");
+        self.pictures[root.0].print(&self.pictures, root, &mut pt);
+    }
+}
+
+/// Trait for primitives that are directly internable.
+/// see SceneBuilder::add_primitive<P>
+pub trait InternablePrimitive: intern::Internable<InternData = ()> + Sized {
+    /// Build a new key from self with `info`.
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> Self::Key;
+
+    fn make_instance_kind(
+        key: Self::Key,
+        data_handle: intern::Handle<Self>,
+        prim_store: &mut PrimitiveStore,
+        reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind;
+}
+
+
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_struct_sizes() {
+    use std::mem;
+    // The sizes of these structures are critical for performance on a number of
+    // talos stress tests. If you get a failure here on CI, there's two possibilities:
+    // (a) You made a structure smaller than it currently is. Great work! Update the
+    //     test expectations and move on.
+    // (b) You made a structure larger. This is not necessarily a problem, but should only
+    //     be done with care, and after checking if talos performance regresses badly.
+    assert_eq!(mem::size_of::<PrimitiveInstance>(), 104, "PrimitiveInstance size changed");
+    assert_eq!(mem::size_of::<PrimitiveInstanceKind>(), 24, "PrimitiveInstanceKind size changed");
+    assert_eq!(mem::size_of::<PrimitiveTemplate>(), 56, "PrimitiveTemplate size changed");
+    assert_eq!(mem::size_of::<PrimitiveTemplateKind>(), 28, "PrimitiveTemplateKind size changed");
+    assert_eq!(mem::size_of::<PrimitiveKey>(), 36, "PrimitiveKey size changed");
+    assert_eq!(mem::size_of::<PrimitiveKeyKind>(), 16, "PrimitiveKeyKind size changed");
+}
diff --git a/gfx/wr/webrender/src/prim_store/picture.rs b/gfx/wr/webrender/src/prim_store/picture.rs
new file mode 100644
index 0000000000..c3ec88783a
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/picture.rs
@@ -0,0 +1,328 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{
+    ColorU, MixBlendMode, FilterPrimitiveInput, FilterPrimitiveKind, ColorSpace,
+    PropertyBinding, PropertyBindingId, CompositeOperator, RasterSpace,
+};
+use api::units::{Au, LayoutVector2D};
+use crate::scene_building::IsVisible;
+use crate::filterdata::SFilterData;
+use crate::intern::ItemUid;
+use crate::intern::{Internable, InternDebug, Handle as InternHandle};
+use crate::internal_types::{LayoutPrimitiveInfo, Filter};
+use crate::picture::PictureCompositeMode;
+use crate::prim_store::{
+    PrimitiveInstanceKind, PrimitiveStore, VectorKey,
+    InternablePrimitive,
+};
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, MallocSizeOf, PartialEq, Hash, Eq)]
+pub enum CompositeOperatorKey {
+    Over,
+    In,
+    Out,
+    Atop,
+    Xor,
+    Lighter,
+    Arithmetic([Au; 4]),
+}
+
+impl From<CompositeOperator> for CompositeOperatorKey {
+    fn from(operator: CompositeOperator) -> Self {
+        match operator {
+            CompositeOperator::Over => CompositeOperatorKey::Over,
+            CompositeOperator::In => CompositeOperatorKey::In,
+            CompositeOperator::Out => CompositeOperatorKey::Out,
+            CompositeOperator::Atop => CompositeOperatorKey::Atop,
+            CompositeOperator::Xor => CompositeOperatorKey::Xor,
+            CompositeOperator::Lighter => CompositeOperatorKey::Lighter,
+            CompositeOperator::Arithmetic(k_vals) => {
+                let k_vals = [
+                    Au::from_f32_px(k_vals[0]),
+                    Au::from_f32_px(k_vals[1]),
+                    Au::from_f32_px(k_vals[2]),
+                    Au::from_f32_px(k_vals[3]),
+                ];
+                CompositeOperatorKey::Arithmetic(k_vals)
+            }
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, MallocSizeOf, PartialEq, Hash, Eq)]
+pub enum FilterPrimitiveKey {
+    Identity(ColorSpace, FilterPrimitiveInput),
+    Flood(ColorSpace, ColorU),
+    Blend(ColorSpace, MixBlendMode, FilterPrimitiveInput, FilterPrimitiveInput),
+    Blur(ColorSpace, Au, Au, FilterPrimitiveInput),
+    Opacity(ColorSpace, Au, FilterPrimitiveInput),
+    ColorMatrix(ColorSpace, [Au; 20], FilterPrimitiveInput),
+    DropShadow(ColorSpace, (VectorKey, Au, ColorU), FilterPrimitiveInput),
+    ComponentTransfer(ColorSpace, FilterPrimitiveInput, Vec<SFilterData>),
+    Offset(ColorSpace, FilterPrimitiveInput, VectorKey),
+    Composite(ColorSpace, FilterPrimitiveInput, FilterPrimitiveInput, CompositeOperatorKey),
+}
+
+/// Represents a hashable description of how a picture primitive
+/// will be composited into its parent.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, MallocSizeOf, PartialEq, Hash, Eq)]
+pub enum PictureCompositeKey {
+    // No visual compositing effect
+    Identity,
+
+    // FilterOp
+    Blur(Au, Au, bool),
+    Brightness(Au),
+    Contrast(Au),
+    Grayscale(Au),
+    HueRotate(Au),
+    Invert(Au),
+    Opacity(Au),
+    OpacityBinding(PropertyBindingId, Au),
+    Saturate(Au),
+    Sepia(Au),
+    DropShadows(Vec<(VectorKey, Au, ColorU)>),
+    ColorMatrix([Au; 20]),
+    SrgbToLinear,
+    LinearToSrgb,
+    ComponentTransfer(ItemUid),
+    Flood(ColorU),
+    SvgFilter(Vec<FilterPrimitiveKey>),
+
+    // MixBlendMode
+    Multiply,
+    Screen,
+    Overlay,
+    Darken,
+    Lighten,
+    ColorDodge,
+    ColorBurn,
+    HardLight,
+    SoftLight,
+    Difference,
+    Exclusion,
+    Hue,
+    Saturation,
+    Color,
+    Luminosity,
+    PlusLighter,
+}
+
+impl From<Option<PictureCompositeMode>> for PictureCompositeKey {
+    fn from(mode: Option<PictureCompositeMode>) -> Self {
+        match mode {
+            Some(PictureCompositeMode::MixBlend(mode)) => {
+                match mode {
+                    MixBlendMode::Normal => PictureCompositeKey::Identity,
+                    MixBlendMode::Multiply => PictureCompositeKey::Multiply,
+                    MixBlendMode::Screen => PictureCompositeKey::Screen,
+                    MixBlendMode::Overlay => PictureCompositeKey::Overlay,
+                    MixBlendMode::Darken => PictureCompositeKey::Darken,
+                    MixBlendMode::Lighten => PictureCompositeKey::Lighten,
+                    MixBlendMode::ColorDodge => PictureCompositeKey::ColorDodge,
+                    MixBlendMode::ColorBurn => PictureCompositeKey::ColorBurn,
+                    MixBlendMode::HardLight => PictureCompositeKey::HardLight,
+                    MixBlendMode::SoftLight => PictureCompositeKey::SoftLight,
+                    MixBlendMode::Difference => PictureCompositeKey::Difference,
+                    MixBlendMode::Exclusion => PictureCompositeKey::Exclusion,
+                    MixBlendMode::Hue => PictureCompositeKey::Hue,
+                    MixBlendMode::Saturation => PictureCompositeKey::Saturation,
+                    MixBlendMode::Color => PictureCompositeKey::Color,
+                    MixBlendMode::Luminosity => PictureCompositeKey::Luminosity,
+                    MixBlendMode::PlusLighter => PictureCompositeKey::PlusLighter,
+                }
+            }
+            Some(PictureCompositeMode::Filter(op)) => {
+                match op {
+                    Filter::Blur { width, height, should_inflate } =>
+                        PictureCompositeKey::Blur(Au::from_f32_px(width), Au::from_f32_px(height), should_inflate),
+                    Filter::Brightness(value) => PictureCompositeKey::Brightness(Au::from_f32_px(value)),
+                    Filter::Contrast(value) => PictureCompositeKey::Contrast(Au::from_f32_px(value)),
+                    Filter::Grayscale(value) => PictureCompositeKey::Grayscale(Au::from_f32_px(value)),
+                    Filter::HueRotate(value) => PictureCompositeKey::HueRotate(Au::from_f32_px(value)),
+                    Filter::Invert(value) => PictureCompositeKey::Invert(Au::from_f32_px(value)),
+                    Filter::Saturate(value) => PictureCompositeKey::Saturate(Au::from_f32_px(value)),
+                    Filter::Sepia(value) => PictureCompositeKey::Sepia(Au::from_f32_px(value)),
+                    Filter::SrgbToLinear => PictureCompositeKey::SrgbToLinear,
+                    Filter::LinearToSrgb => PictureCompositeKey::LinearToSrgb,
+                    Filter::Identity => PictureCompositeKey::Identity,
+                    Filter::DropShadows(ref shadows) => {
+                        PictureCompositeKey::DropShadows(
+                            shadows.iter().map(|shadow| {
+                                (shadow.offset.into(), Au::from_f32_px(shadow.blur_radius), shadow.color.into())
+                            }).collect()
+                        )
+                    }
+                    Filter::Opacity(binding, _) => {
+                        match binding {
+                            PropertyBinding::Value(value) => {
+                                PictureCompositeKey::Opacity(Au::from_f32_px(value))
+                            }
+                            PropertyBinding::Binding(key, default) => {
+                                PictureCompositeKey::OpacityBinding(key.id, Au::from_f32_px(default))
+                            }
+                        }
+                    }
+                    Filter::ColorMatrix(values) => {
+                        let mut quantized_values: [Au; 20] = [Au(0); 20];
+                        for (value, result) in values.iter().zip(quantized_values.iter_mut()) {
+                            *result = Au::from_f32_px(*value);
+                        }
+                        PictureCompositeKey::ColorMatrix(quantized_values)
+                    }
+                    Filter::ComponentTransfer => unreachable!(),
+                    Filter::Flood(color) => PictureCompositeKey::Flood(color.into()),
+                }
+            }
+            Some(PictureCompositeMode::ComponentTransferFilter(handle)) => {
+                PictureCompositeKey::ComponentTransfer(handle.uid())
+            }
+            Some(PictureCompositeMode::SvgFilter(filter_primitives, filter_data)) => {
+                PictureCompositeKey::SvgFilter(filter_primitives.into_iter().map(|primitive| {
+                    match primitive.kind {
+                        FilterPrimitiveKind::Identity(identity) => FilterPrimitiveKey::Identity(primitive.color_space, identity.input),
+                        FilterPrimitiveKind::Blend(blend) => FilterPrimitiveKey::Blend(primitive.color_space, blend.mode, blend.input1, blend.input2),
+                        FilterPrimitiveKind::Flood(flood) => FilterPrimitiveKey::Flood(primitive.color_space, flood.color.into()),
+                        FilterPrimitiveKind::Blur(blur) =>
+                            FilterPrimitiveKey::Blur(primitive.color_space, Au::from_f32_px(blur.width), Au::from_f32_px(blur.height), blur.input),
+                        FilterPrimitiveKind::Opacity(opacity) =>
+                            FilterPrimitiveKey::Opacity(primitive.color_space, Au::from_f32_px(opacity.opacity), opacity.input),
+                        FilterPrimitiveKind::ColorMatrix(color_matrix) => {
+                            let mut quantized_values: [Au; 20] = [Au(0); 20];
+                            for (value, result) in color_matrix.matrix.iter().zip(quantized_values.iter_mut()) {
+                                *result = Au::from_f32_px(*value);
+                            }
+                            FilterPrimitiveKey::ColorMatrix(primitive.color_space, quantized_values, color_matrix.input)
+                        }
+                        FilterPrimitiveKind::DropShadow(drop_shadow) => {
+                            FilterPrimitiveKey::DropShadow(
+                                primitive.color_space,
+                                (
+                                    drop_shadow.shadow.offset.into(),
+                                    Au::from_f32_px(drop_shadow.shadow.blur_radius),
+                                    drop_shadow.shadow.color.into(),
+                                ),
+                                drop_shadow.input,
+                            )
+                        }
+                        FilterPrimitiveKind::ComponentTransfer(component_transfer) =>
+                            FilterPrimitiveKey::ComponentTransfer(primitive.color_space, component_transfer.input, filter_data.clone()),
+                        FilterPrimitiveKind::Offset(info) =>
+                            FilterPrimitiveKey::Offset(primitive.color_space, info.input, info.offset.into()),
+                        FilterPrimitiveKind::Composite(info) =>
+                            FilterPrimitiveKey::Composite(primitive.color_space, info.input1, info.input2, info.operator.into()),
+                    }
+                }).collect())
+            }
+            Some(PictureCompositeMode::Blit(_)) |
+            Some(PictureCompositeMode::TileCache { .. }) |
+            Some(PictureCompositeMode::IntermediateSurface) |
+            None => {
+                PictureCompositeKey::Identity
+            }
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+pub struct Picture {
+    pub composite_mode_key: PictureCompositeKey,
+    pub raster_space: RasterSpace,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+pub struct PictureKey {
+    pub composite_mode_key: PictureCompositeKey,
+    pub raster_space: RasterSpace,
+}
+
+impl PictureKey {
+    pub fn new(
+        pic: Picture,
+    ) -> Self {
+        PictureKey {
+            composite_mode_key: pic.composite_mode_key,
+            raster_space: pic.raster_space,
+        }
+    }
+}
+
+impl InternDebug for PictureKey {}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct PictureData;
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct PictureTemplate;
+
+impl From<PictureKey> for PictureTemplate {
+    fn from(_: PictureKey) -> Self {
+        PictureTemplate
+    }
+}
+
+pub type PictureDataHandle = InternHandle<Picture>;
+
+impl Internable for Picture {
+    type Key = PictureKey;
+    type StoreData = PictureTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_PICTURES;
+}
+
+impl InternablePrimitive for Picture {
+    fn into_key(
+        self,
+        _: &LayoutPrimitiveInfo,
+    ) -> PictureKey {
+        PictureKey::new(self)
+    }
+
+    fn make_instance_kind(
+        _key: PictureKey,
+        _: PictureDataHandle,
+        _: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        // Should never be hit as this method should not be
+        // called for pictures.
+        unreachable!();
+    }
+}
+
+impl IsVisible for Picture {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
+
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_struct_sizes() {
+    use std::mem;
+    // The sizes of these structures are critical for performance on a number of
+    // talos stress tests. If you get a failure here on CI, there's two possibilities:
+    // (a) You made a structure smaller than it currently is. Great work! Update the
+    //     test expectations and move on.
+    // (b) You made a structure larger. This is not necessarily a problem, but should only
+    //     be done with care, and after checking if talos performance regresses badly.
+    assert_eq!(mem::size_of::<Picture>(), 96, "Picture size changed");
+    assert_eq!(mem::size_of::<PictureTemplate>(), 0, "PictureTemplate size changed");
+    assert_eq!(mem::size_of::<PictureKey>(), 96, "PictureKey size changed");
+}
diff --git a/gfx/wr/webrender/src/prim_store/storage.rs b/gfx/wr/webrender/src/prim_store/storage.rs
new file mode 100644
index 0000000000..4b99d87556
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/storage.rs
@@ -0,0 +1,156 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use std::{iter::Extend, ops, marker::PhantomData, u32};
+use crate::util::Recycler;
+
+#[derive(Debug, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct Index<T>(u32, PhantomData<T>);
+
+// We explicitly implement Copy + Clone instead of using #[derive(Copy, Clone)]
+// because we don't want to require that T implements Clone + Copy.
+impl<T> Clone for Index<T> {
+    fn clone(&self) -> Self { *self }
+}
+
+impl<T> Copy for Index<T> {}
+
+impl<T> PartialEq for Index<T> {
+    fn eq(&self, other: &Self) -> bool {
+        self.0 == other.0
+    }
+}
+
+impl<T> Index<T> {
+    fn new(idx: usize) -> Self {
+        debug_assert!(idx < u32::max_value() as usize);
+        Index(idx as u32, PhantomData)
+    }
+
+    pub const INVALID: Index<T> = Index(u32::MAX, PhantomData);
+    pub const UNUSED: Index<T> = Index(u32::MAX-1, PhantomData);
+}
+
+#[derive(Debug)]
+pub struct OpenRange<T> {
+    start: Index<T>,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct Range<T> {
+    pub start: Index<T>,
+    pub end: Index<T>,
+}
+
+// We explicitly implement Copy + Clone instead of using #[derive(Copy, Clone)]
+// because we don't want to require that T implements Clone + Copy.
+impl<T> Clone for Range<T> {
+    fn clone(&self) -> Self {
+        Range { start: self.start, end: self.end }
+    }
+}
+impl<T> Copy for Range<T> {}
+
+impl<T> Range<T> {
+    /// Create an empty `Range`
+    pub fn empty() -> Self {
+        Range {
+            start: Index::new(0),
+            end: Index::new(0),
+        }
+    }
+
+    /// Check for an empty `Range`
+    pub fn is_empty(self) -> bool {
+        self.start.0 >= self.end.0
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct Storage<T> {
+    data: Vec<T>,
+}
+
+impl<T> Storage<T> {
+    pub fn new(initial_capacity: usize) -> Self {
+        Storage {
+            data: Vec::with_capacity(initial_capacity),
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    pub fn clear(&mut self) {
+        self.data.clear();
+    }
+
+    pub fn push(&mut self, t: T) -> Index<T> {
+        let index = self.data.len();
+        self.data.push(t);
+        Index(index as u32, PhantomData)
+    }
+
+    pub fn reserve(&mut self, count: usize) {
+        self.data.reserve(count);
+    }
+
+    pub fn recycle(&mut self, recycler: &mut Recycler) {
+        recycler.recycle_vec(&mut self.data);
+    }
+
+    pub fn extend<II: IntoIterator<Item=T>>(&mut self, iter: II) -> Range<T> {
+        let range = self.open_range();
+        self.data.extend(iter);
+
+        self.close_range(range)
+    }
+
+    pub fn open_range(&self) -> OpenRange<T> {
+        OpenRange {
+            start: Index::new(self.data.len())
+        }
+    }
+
+    pub fn close_range(&self, range: OpenRange<T>) -> Range<T> {
+        Range {
+            start: range.start,
+            end: Index::new(self.data.len()),
+        }
+    }
+}
+
+impl<T> ops::Index<Index<T>> for Storage<T> {
+    type Output = T;
+    fn index(&self, index: Index<T>) -> &Self::Output {
+        &self.data[index.0 as usize]
+    }
+}
+
+impl<T> ops::IndexMut<Index<T>> for Storage<T> {
+    fn index_mut(&mut self, index: Index<T>) -> &mut Self::Output {
+        &mut self.data[index.0 as usize]
+    }
+}
+
+impl<T> ops::Index<Range<T>> for Storage<T> {
+    type Output = [T];
+    fn index(&self, index: Range<T>) -> &Self::Output {
+        let start = index.start.0 as _;
+        let end = index.end.0 as _;
+        &self.data[start..end]
+    }
+}
+
+impl<T> ops::IndexMut<Range<T>> for Storage<T> {
+    fn index_mut(&mut self, index: Range<T>) -> &mut Self::Output {
+        let start = index.start.0 as _;
+        let end = index.end.0 as _;
+        &mut self.data[start..end]
+    }
+}
diff --git a/gfx/wr/webrender/src/prim_store/text_run.rs b/gfx/wr/webrender/src/prim_store/text_run.rs
new file mode 100644
index 0000000000..61562306be
--- /dev/null
+++ b/gfx/wr/webrender/src/prim_store/text_run.rs
@@ -0,0 +1,505 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ColorF, FontInstanceFlags, GlyphInstance, RasterSpace, Shadow};
+use api::units::{LayoutToWorldTransform, LayoutVector2D, RasterPixelScale, DevicePixelScale};
+use crate::scene_building::{CreateShadow, IsVisible};
+use crate::frame_builder::FrameBuildingState;
+use glyph_rasterizer::{FontInstance, FontTransform, GlyphKey, FONT_SIZE_LIMIT};
+use crate::gpu_cache::GpuCache;
+use crate::intern;
+use crate::internal_types::LayoutPrimitiveInfo;
+use crate::picture::SurfaceInfo;
+use crate::prim_store::{PrimitiveOpacity,  PrimitiveScratchBuffer};
+use crate::prim_store::{PrimitiveStore, PrimKeyCommonData, PrimTemplateCommonData};
+use crate::renderer::{MAX_VERTEX_TEXTURE_WIDTH};
+use crate::resource_cache::{ResourceCache};
+use crate::util::{MatrixHelpers};
+use crate::prim_store::{InternablePrimitive, PrimitiveInstanceKind};
+use crate::spatial_tree::{SpatialTree, SpatialNodeIndex};
+use crate::space::SpaceSnapper;
+use crate::util::PrimaryArc;
+
+use std::ops;
+use std::sync::Arc;
+
+use super::storage;
+
+/// A run of glyphs, with associated font information.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+pub struct TextRunKey {
+    pub common: PrimKeyCommonData,
+    pub font: FontInstance,
+    pub glyphs: PrimaryArc<Vec<GlyphInstance>>,
+    pub shadow: bool,
+    pub requested_raster_space: RasterSpace,
+}
+
+impl TextRunKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        text_run: TextRun,
+    ) -> Self {
+        TextRunKey {
+            common: info.into(),
+            font: text_run.font,
+            glyphs: PrimaryArc(text_run.glyphs),
+            shadow: text_run.shadow,
+            requested_raster_space: text_run.requested_raster_space,
+        }
+    }
+}
+
+impl intern::InternDebug for TextRunKey {}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct TextRunTemplate {
+    pub common: PrimTemplateCommonData,
+    pub font: FontInstance,
+    #[ignore_malloc_size_of = "Measured via PrimaryArc"]
+    pub glyphs: Arc<Vec<GlyphInstance>>,
+}
+
+impl ops::Deref for TextRunTemplate {
+    type Target = PrimTemplateCommonData;
+    fn deref(&self) -> &Self::Target {
+        &self.common
+    }
+}
+
+impl ops::DerefMut for TextRunTemplate {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.common
+    }
+}
+
+impl From<TextRunKey> for TextRunTemplate {
+    fn from(item: TextRunKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(item.common);
+        TextRunTemplate {
+            common,
+            font: item.font,
+            glyphs: item.glyphs.0,
+        }
+    }
+}
+
+impl TextRunTemplate {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        frame_state: &mut FrameBuildingState,
+    ) {
+        self.write_prim_gpu_blocks(frame_state);
+        self.opacity = PrimitiveOpacity::translucent();
+    }
+
+    fn write_prim_gpu_blocks(
+        &mut self,
+        frame_state: &mut FrameBuildingState,
+    ) {
+        // corresponds to `fetch_glyph` in the shaders
+        if let Some(mut request) = frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) {
+            request.push(ColorF::from(self.font.color).premultiplied());
+            // this is the only case where we need to provide plain color to GPU
+            let bg_color = ColorF::from(self.font.bg_color);
+            request.push([bg_color.r, bg_color.g, bg_color.b, 1.0]);
+
+            let mut gpu_block = [0.0; 4];
+            for (i, src) in self.glyphs.iter().enumerate() {
+                // Two glyphs are packed per GPU block.
+
+                if (i & 1) == 0 {
+                    gpu_block[0] = src.point.x;
+                    gpu_block[1] = src.point.y;
+                } else {
+                    gpu_block[2] = src.point.x;
+                    gpu_block[3] = src.point.y;
+                    request.push(gpu_block);
+                }
+            }
+
+            // Ensure the last block is added in the case
+            // of an odd number of glyphs.
+            if (self.glyphs.len() & 1) != 0 {
+                request.push(gpu_block);
+            }
+
+            assert!(request.current_used_block_num() <= MAX_VERTEX_TEXTURE_WIDTH);
+        }
+    }
+}
+
+pub type TextRunDataHandle = intern::Handle<TextRun>;
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct TextRun {
+    pub font: FontInstance,
+    #[ignore_malloc_size_of = "Measured via PrimaryArc"]
+    pub glyphs: Arc<Vec<GlyphInstance>>,
+    pub shadow: bool,
+    pub requested_raster_space: RasterSpace,
+}
+
+impl intern::Internable for TextRun {
+    type Key = TextRunKey;
+    type StoreData = TextRunTemplate;
+    type InternData = ();
+    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_TEXT_RUNS;
+}
+
+impl InternablePrimitive for TextRun {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> TextRunKey {
+        TextRunKey::new(
+            info,
+            self,
+        )
+    }
+
+    fn make_instance_kind(
+        key: TextRunKey,
+        data_handle: TextRunDataHandle,
+        prim_store: &mut PrimitiveStore,
+        reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        let run_index = prim_store.text_runs.push(TextRunPrimitive {
+            used_font: key.font.clone(),
+            glyph_keys_range: storage::Range::empty(),
+            reference_frame_relative_offset,
+            snapped_reference_frame_relative_offset: reference_frame_relative_offset,
+            shadow: key.shadow,
+            raster_scale: 1.0,
+            requested_raster_space: key.requested_raster_space,
+        });
+
+        PrimitiveInstanceKind::TextRun{ data_handle, run_index }
+    }
+}
+
+impl CreateShadow for TextRun {
+    fn create_shadow(
+        &self,
+        shadow: &Shadow,
+        blur_is_noop: bool,
+        current_raster_space: RasterSpace,
+    ) -> Self {
+        let mut font = FontInstance {
+            color: shadow.color.into(),
+            ..self.font.clone()
+        };
+        if shadow.blur_radius > 0.0 {
+            font.disable_subpixel_aa();
+        }
+
+        let requested_raster_space = if blur_is_noop {
+            current_raster_space
+        } else {
+            RasterSpace::Local(1.0)
+        };
+
+        TextRun {
+            font,
+            glyphs: self.glyphs.clone(),
+            shadow: true,
+            requested_raster_space,
+        }
+    }
+}
+
+impl IsVisible for TextRun {
+    fn is_visible(&self) -> bool {
+        self.font.color.a > 0
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct TextRunPrimitive {
+    pub used_font: FontInstance,
+    pub glyph_keys_range: storage::Range<GlyphKey>,
+    pub reference_frame_relative_offset: LayoutVector2D,
+    pub snapped_reference_frame_relative_offset: LayoutVector2D,
+    pub shadow: bool,
+    pub raster_scale: f32,
+    pub requested_raster_space: RasterSpace,
+}
+
+impl TextRunPrimitive {
+    pub fn update_font_instance(
+        &mut self,
+        specified_font: &FontInstance,
+        surface: &SurfaceInfo,
+        spatial_node_index: SpatialNodeIndex,
+        transform: &LayoutToWorldTransform,
+        mut allow_subpixel: bool,
+        raster_space: RasterSpace,
+        spatial_tree: &SpatialTree,
+    ) -> bool {
+        // If local raster space is specified, include that in the scale
+        // of the glyphs that get rasterized.
+        // TODO(gw): Once we support proper local space raster modes, this
+        //           will implicitly be part of the device pixel ratio for
+        //           the (cached) local space surface, and so this code
+        //           will no longer be required.
+        let raster_scale = raster_space.local_scale().unwrap_or(1.0).max(0.001);
+
+        let dps = surface.device_pixel_scale.0;
+        let font_size = specified_font.size.to_f32_px();
+
+        // Small floating point error can accumulate in the raster * device_pixel scale.
+        // Round that to the nearest 100th of a scale factor to remove this error while
+        // still allowing reasonably accurate scale factors when a pinch-zoom is stopped
+        // at a fractional amount.
+        let quantized_scale = (dps * raster_scale * 100.0).round() / 100.0;
+        let mut device_font_size = font_size * quantized_scale;
+
+        // Check there is a valid transform that doesn't exceed the font size limit.
+        // Ensure the font is supposed to be rasterized in screen-space.
+        // Only support transforms that can be coerced to simple 2D transforms.
+        // Add texture padding to the rasterized glyph buffer when one anticipates
+        // the glyph will need to be scaled when rendered.
+        let (use_subpixel_aa, transform_glyphs, texture_padding, oversized) = if raster_space != RasterSpace::Screen ||
+            transform.has_perspective_component() || !transform.has_2d_inverse()
+        {
+            (false, false, true, device_font_size > FONT_SIZE_LIMIT)
+        } else if transform.exceeds_2d_scale((FONT_SIZE_LIMIT / device_font_size) as f64) {
+            (false, false, true, true)
+        } else {
+            (true, !transform.is_simple_2d_translation(), false, false)
+        };
+
+        let font_transform = if transform_glyphs {
+            // Get the font transform matrix (skew / scale) from the complete transform.
+            // Fold in the device pixel scale.
+            self.raster_scale = 1.0;
+            FontTransform::from(transform)
+        } else {
+            if oversized {
+                // Font sizes larger than the limit need to be scaled, thus can't use subpixels.
+                // In this case we adjust the font size and raster space to ensure
+                // we rasterize at the limit, to minimize the amount of scaling.
+                let limited_raster_scale = FONT_SIZE_LIMIT / (font_size * dps);
+                device_font_size = FONT_SIZE_LIMIT;
+
+                // Record the raster space the text needs to be snapped in. The original raster
+                // scale would have been too big.
+                self.raster_scale = limited_raster_scale;
+            } else {
+                // Record the raster space the text needs to be snapped in. We may have changed
+                // from RasterSpace::Screen due to a transform with perspective or without a 2d
+                // inverse, or it may have been RasterSpace::Local all along.
+                self.raster_scale = raster_scale;
+            }
+
+            // Rasterize the glyph without any transform
+            FontTransform::identity()
+        };
+
+        // TODO(aosmond): Snapping really ought to happen during scene building
+        // as much as possible. This will allow clips to be already adjusted
+        // based on the snapping requirements of the primitive. This may affect
+        // complex clips that create a different task, and when we rasterize
+        // glyphs without the transform (because the shader doesn't have the
+        // snap offsets to adjust its clip). These rects are fairly conservative
+        // to begin with and do not appear to be causing significant issues at
+        // this time.
+        self.snapped_reference_frame_relative_offset = if transform_glyphs {
+            // Don't touch the reference frame relative offset. We'll let the
+            // shader do the snapping in device pixels.
+            self.reference_frame_relative_offset
+        } else {
+            // TODO(dp): The SurfaceInfo struct needs to be updated to use RasterPixelScale
+            //           rather than DevicePixelScale, however this is a large chunk of
+            //           work that will be done as a follow up patch.
+            let raster_pixel_scale = RasterPixelScale::new(surface.device_pixel_scale.0);
+
+            // There may be an animation, so snap the reference frame relative
+            // offset such that it excludes the impact, if any.
+            let snap_to_device = SpaceSnapper::new_with_target(
+                surface.raster_spatial_node_index,
+                spatial_node_index,
+                raster_pixel_scale,
+                spatial_tree,
+            );
+            snap_to_device.snap_point(&self.reference_frame_relative_offset.to_point()).to_vector()
+        };
+
+        let mut flags = specified_font.flags;
+        if transform_glyphs {
+            flags |= FontInstanceFlags::TRANSFORM_GLYPHS;
+        }
+        if texture_padding {
+            flags |= FontInstanceFlags::TEXTURE_PADDING;
+        }
+
+        // If the transform or device size is different, then the caller of
+        // this method needs to know to rebuild the glyphs.
+        let cache_dirty =
+            self.used_font.transform != font_transform ||
+            self.used_font.size != device_font_size.into() ||
+            self.used_font.flags != flags;
+
+        // Construct used font instance from the specified font instance
+        self.used_font = FontInstance {
+            transform: font_transform,
+            size: device_font_size.into(),
+            flags,
+            ..specified_font.clone()
+        };
+
+        // If we are using special estimated background subpixel blending, then
+        // we can allow it regardless of what the surface says.
+        allow_subpixel |= self.used_font.bg_color.a != 0;
+
+        // If using local space glyphs, we don't want subpixel AA.
+        if !allow_subpixel || !use_subpixel_aa {
+            self.used_font.disable_subpixel_aa();
+
+            // Disable subpixel positioning for oversized glyphs to avoid
+            // thrashing the glyph cache with many subpixel variations of
+            // big glyph textures. A possible subpixel positioning error
+            // is small relative to the maximum font size and thus should
+            // not be very noticeable.
+            if oversized {
+                self.used_font.disable_subpixel_position();
+            }
+        }
+
+        cache_dirty
+    }
+
+    /// Gets the raster space to use when rendering this primitive.
+    /// Usually this would be the requested raster space. However, if
+    /// the primitive's spatial node or one of its ancestors is being pinch zoomed
+    /// then we round it. This prevents us rasterizing glyphs for every minor
+    /// change in zoom level, as that would be too expensive.
+    fn get_raster_space_for_prim(
+        &self,
+        prim_spatial_node_index: SpatialNodeIndex,
+        low_quality_pinch_zoom: bool,
+        device_pixel_scale: DevicePixelScale,
+        spatial_tree: &SpatialTree,
+    ) -> RasterSpace {
+        let prim_spatial_node = spatial_tree.get_spatial_node(prim_spatial_node_index);
+        if prim_spatial_node.is_ancestor_or_self_zooming {
+            if low_quality_pinch_zoom {
+                // In low-quality mode, we set the scale to be 1.0. However, the device-pixel
+                // scale selected for the zoom will be taken into account in the caller to this
+                // function when it's converted from local -> device pixels. Since in this mode
+                // the device-pixel scale is constant during the zoom, this gives the desired
+                // performance while also allowing the scale to be adjusted to a new factor at
+                // the end of a pinch-zoom.
+                RasterSpace::Local(1.0)
+            } else {
+                let root_spatial_node_index = spatial_tree.root_reference_frame_index();
+
+                // For high-quality mode, we quantize the exact scale factor as before. However,
+                // we want to _undo_ the effect of the device-pixel scale on the picture cache
+                // tiles (which changes now that they are raster roots). Divide the rounded value
+                // by the device-pixel scale so that the local -> device conversion has no effect.
+                let scale_factors = spatial_tree
+                    .get_relative_transform(prim_spatial_node_index, root_spatial_node_index)
+                    .scale_factors();
+
+                // Round the scale up to the nearest power of 2, but don't exceed 8.
+                let scale = scale_factors.0.max(scale_factors.1).min(8.0).max(1.0);
+                let rounded_up = 2.0f32.powf(scale.log2().ceil());
+
+                RasterSpace::Local(rounded_up / device_pixel_scale.0)
+            }
+        } else {
+            // Assume that if we have a RasterSpace::Local, it is frequently changing, in which
+            // case we want to undo the device-pixel scale, as we do above.
+            match self.requested_raster_space {
+                RasterSpace::Local(scale) => RasterSpace::Local(scale / device_pixel_scale.0),
+                RasterSpace::Screen => RasterSpace::Screen,
+            }
+        }
+    }
+
+    pub fn request_resources(
+        &mut self,
+        prim_offset: LayoutVector2D,
+        specified_font: &FontInstance,
+        glyphs: &[GlyphInstance],
+        transform: &LayoutToWorldTransform,
+        surface: &SurfaceInfo,
+        spatial_node_index: SpatialNodeIndex,
+        allow_subpixel: bool,
+        low_quality_pinch_zoom: bool,
+        resource_cache: &mut ResourceCache,
+        gpu_cache: &mut GpuCache,
+        spatial_tree: &SpatialTree,
+        scratch: &mut PrimitiveScratchBuffer,
+    ) {
+        let raster_space = self.get_raster_space_for_prim(
+            spatial_node_index,
+            low_quality_pinch_zoom,
+            surface.device_pixel_scale,
+            spatial_tree,
+        );
+
+        let cache_dirty = self.update_font_instance(
+            specified_font,
+            surface,
+            spatial_node_index,
+            transform,
+            allow_subpixel,
+            raster_space,
+            spatial_tree,
+        );
+
+        if self.glyph_keys_range.is_empty() || cache_dirty {
+            let subpx_dir = self.used_font.get_subpx_dir();
+
+            let dps = surface.device_pixel_scale.0;
+            let transform = match raster_space {
+                RasterSpace::Local(scale) => FontTransform::new(scale * dps, 0.0, 0.0, scale * dps),
+                RasterSpace::Screen => self.used_font.transform.scale(dps),
+            };
+
+            self.glyph_keys_range = scratch.glyph_keys.extend(
+                glyphs.iter().map(|src| {
+                    let src_point = src.point + prim_offset;
+                    let device_offset = transform.transform(&src_point);
+                    GlyphKey::new(src.index, device_offset, subpx_dir)
+                }));
+        }
+
+        resource_cache.request_glyphs(
+            self.used_font.clone(),
+            &scratch.glyph_keys[self.glyph_keys_range],
+            gpu_cache,
+        );
+    }
+}
+
+/// These are linux only because FontInstancePlatformOptions varies in size by platform.
+#[test]
+#[cfg(target_os = "linux")]
+fn test_struct_sizes() {
+    use std::mem;
+    // The sizes of these structures are critical for performance on a number of
+    // talos stress tests. If you get a failure here on CI, there's two possibilities:
+    // (a) You made a structure smaller than it currently is. Great work! Update the
+    //     test expectations and move on.
+    // (b) You made a structure larger. This is not necessarily a problem, but should only
+    //     be done with care, and after checking if talos performance regresses badly.
+    assert_eq!(mem::size_of::<TextRun>(), 64, "TextRun size changed");
+    assert_eq!(mem::size_of::<TextRunTemplate>(), 80, "TextRunTemplate size changed");
+    assert_eq!(mem::size_of::<TextRunKey>(), 80, "TextRunKey size changed");
+    assert_eq!(mem::size_of::<TextRunPrimitive>(), 80, "TextRunPrimitive size changed");
+}
diff --git a/gfx/wr/webrender/src/print_tree.rs b/gfx/wr/webrender/src/print_tree.rs
new file mode 100644
index 0000000000..56d7852561
--- /dev/null
+++ b/gfx/wr/webrender/src/print_tree.rs
@@ -0,0 +1,122 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use std::io::Write;
+use std::fmt::Write as FmtWrite;
+
+/// A struct that makes it easier to print out a pretty tree of data, which
+/// can be visually scanned more easily.
+pub struct PrintTree<W>
+where
+    W: Write
+{
+    /// The current level of recursion.
+    level: u32,
+
+    /// An item which is queued up, so that we can determine if we need
+    /// a mid-tree prefix or a branch ending prefix.
+    queued_item: Option<String>,
+
+    // We hold lines until they are done, and then output them all at
+    // once
+    line_buffer: String,
+
+    /// The sink to print to.
+    sink: W,
+}
+
+/// A trait that makes it easy to describe a pretty tree of data,
+/// regardless of the printing destination, to either print it
+/// directly to stdout, or serialize it as in the debugger
+pub trait PrintTreePrinter {
+    fn new_level(&mut self, title: String);
+    fn end_level(&mut self);
+    fn add_item(&mut self, text: String);
+}
+
+// The default does nothing but log
+impl PrintTree<std::io::Sink> {
+    pub fn new(title: &str) -> Self {
+        PrintTree::new_with_sink(title, std::io::sink())
+    }
+}
+
+impl<W> PrintTree<W>
+where
+    W: Write
+{
+    pub fn new_with_sink(title: &str, sink: W) -> Self {
+        let mut result = PrintTree {
+            level: 1,
+            queued_item: None,
+            line_buffer: String::new(),
+            sink,
+        };
+
+        writeln!(result.line_buffer, "\u{250c} {}", title).unwrap();
+        result.flush_line();
+        result
+    }
+
+    fn print_level_prefix(&mut self) {
+        for _ in 0 .. self.level {
+            write!(self.line_buffer, "\u{2502}  ").unwrap();
+        }
+    }
+
+    fn flush_queued_item(&mut self, prefix: &str) {
+        if let Some(queued_item) = self.queued_item.take() {
+            self.print_level_prefix();
+            writeln!(self.line_buffer, "{} {}", prefix, queued_item).unwrap();
+            self.flush_line();
+        }
+    }
+
+    fn flush_line(&mut self) {
+        debug!("{}", self.line_buffer);
+        self.sink.write_all(self.line_buffer.as_bytes()).unwrap();
+        self.line_buffer.clear();
+    }
+}
+
+impl<W> PrintTreePrinter for PrintTree<W>
+where
+    W: Write
+{
+    /// Descend one level in the tree with the given title.
+    fn new_level(&mut self, title: String) {
+        self.flush_queued_item("\u{251C}\u{2500}");
+
+        self.print_level_prefix();
+        writeln!(self.line_buffer, "\u{251C}\u{2500} {}", title).unwrap();
+        self.flush_line();
+
+        self.level = self.level + 1;
+    }
+
+    /// Ascend one level in the tree.
+    fn end_level(&mut self) {
+        self.flush_queued_item("\u{2514}\u{2500}");
+        self.level = self.level - 1;
+    }
+
+    /// Add an item to the current level in the tree.
+    fn add_item(&mut self, text: String) {
+        self.flush_queued_item("\u{251C}\u{2500}");
+        self.queued_item = Some(text);
+    }
+}
+
+impl<W> Drop for PrintTree<W>
+where
+    W: Write
+{
+    fn drop(&mut self) {
+        self.flush_queued_item("\u{9492}\u{9472}");
+    }
+}
+
+pub trait PrintableTree {
+    fn print_with<T: PrintTreePrinter>(&self, pt: &mut T);
+}
diff --git a/gfx/wr/webrender/src/profiler.rs b/gfx/wr/webrender/src/profiler.rs
new file mode 100644
index 0000000000..ccf86e8647
--- /dev/null
+++ b/gfx/wr/webrender/src/profiler.rs
@@ -0,0 +1,1856 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! # Overlay profiler
+//!
+//! ## Profiler UI string syntax
+//!
+//! Comma-separated list of of tokens with trailing and leading spaces trimmed.
+//! Each tokens can be:
+//! - A counter name with an optional prefix. The name corresponds to the displayed name (see the
+//!   counters vector below.
+//!   - By default (no prefix) the counter is shown as average + max over half a second.
+//!   - With a '#' prefix the counter is shown as a graph.
+//!   - With a '*' prefix the counter is shown as a change indicator.
+//!   - Some special counters such as GPU time queries have specific visualizations ignoring prefixes.
+//! - A preset name to append the preset to the UI (see PROFILER_PRESETS).
+//! - An empty token to insert a bit of vertical space.
+//! - A '|' token to start a new column.
+//! - A '_' token to start a new row.
+
+use api::{ColorF, ColorU};
+use glyph_rasterizer::profiler::GlyphRasterizeProfiler;
+use crate::renderer::DebugRenderer;
+use crate::device::query::GpuTimer;
+use euclid::{Point2D, Rect, Size2D, vec2, default};
+use crate::internal_types::FastHashMap;
+use crate::renderer::{FullFrameStats, MAX_VERTEX_TEXTURE_WIDTH, init::wr_has_been_initialized};
+use api::units::DeviceIntSize;
+use std::collections::vec_deque::VecDeque;
+use std::fmt::{Write, Debug};
+use std::f32;
+use std::ops::Range;
+use std::time::Duration;
+use time::precise_time_ns;
+
+macro_rules! set_text {
+    ($dst:expr, $($arg:tt)*) => {
+        $dst.clear();
+        write!($dst, $($arg)*).unwrap();
+    };
+}
+
+const GRAPH_WIDTH: f32 = 1024.0;
+const GRAPH_HEIGHT: f32 = 320.0;
+const GRAPH_PADDING: f32 = 8.0;
+const GRAPH_FRAME_HEIGHT: f32 = 16.0;
+const PROFILE_SPACING: f32 = 15.0;
+const PROFILE_PADDING: f32 = 10.0;
+const BACKGROUND_COLOR: ColorU = ColorU { r: 20, g: 20, b: 20, a: 220 };
+
+const ONE_SECOND_NS: u64 = 1_000_000_000;
+
+/// Profiler UI string presets. Defined in the profiler UI string syntax, can contain other presets.
+static PROFILER_PRESETS: &'static[(&'static str, &'static str)] = &[
+    // Default view, doesn't show everything, but still shows quite a bit.
+    (&"Default", &"FPS,|,Slow indicators,_,Time graphs,|,Frame times, ,Transaction times, ,Frame stats, ,Memory, ,Interners,_,GPU time queries,_,Paint phase graph"),
+    // Smaller, less intrusive overview
+    (&"Compact", &"FPS, ,Frame times, ,Frame stats"),
+    // Even less intrusive, only slow transactions and frame indicators.
+    (&"Slow indicators", &"*Slow transaction,*Slow frame"),
+
+    // Counters:
+
+    // Timing information for per layout transaction stages.
+    (&"Transaction times", &"DisplayList,Scene building,Content send,API send"),
+    // Timing information for per-frame stages.
+    (&"Frame times", &"Frame CPU total,Frame building,Visibility,Prepare,Batching,Glyph resolve,Texture cache update,Shader build time,Renderer,GPU"),
+    // Stats about the content of the frame.
+    (&"Frame stats", &"Primitives,Visible primitives,Draw calls,Vertices,Color passes,Alpha passes,Rendered picture tiles,Rasterized glyphs"),
+    // Texture cache allocation stats.
+    (&"Texture cache stats", &"Atlas textures mem, Standalone textures mem, Picture tiles mem, Render targets mem, Depth targets mem, Atlas items mem,
+        Texture cache standalone pressure, Texture cache eviction count, Texture cache youngest evicted, ,
+        Atlas RGBA8 linear pixels, Atlas RGBA8 glyphs pixels, Atlas A8 glyphs pixels, Atlas A8 pixels, Atlas A16 pixels, Atlas RGBA8 nearest pixels,
+        Atlas RGBA8 linear textures, Atlas RGBA8 glyphs textures, Atlas A8 glyphs textures, Atlas A8 textures, Atlas A16 textures, Atlas RGBA8 nearest textures,
+        Atlas RGBA8 linear pressure, Atlas RGBA8 glyphs pressure, Atlas A8 glyphs pressure, Atlas A8 pressure, Atlas A16 pressure, Atlas RGBA8 nearest pressure,"
+    ),
+    // Graphs to investigate driver overhead of texture cache updates.
+    (&"Texture upload perf", &"#Texture cache update,#Texture cache upload, ,#Staging CPU allocation,#Staging GPU allocation,#Staging CPU copy,#Staging GPU copy,#Upload time, ,#Upload copy batches,#Rasterized glyphs, ,#Cache texture creation,#Cache texture deletion"),
+
+    // Graphs:
+
+    // Graph overview of time spent in WebRender's main stages.
+    (&"Time graphs", &"#DisplayList,#Scene building,#Blob rasterization, ,#Frame CPU total,#Frame building,#Renderer,#Texture cache update, ,#GPU,"),
+    // Useful when investigating render backend bottlenecks.
+    (&"Backend graphs", &"#Frame building, #Visibility, #Prepare, #Batching, #Glyph resolve"),
+    // Useful when investigating renderer bottlenecks.
+    (&"Renderer graphs", &"#Rendered picture tiles,#Draw calls,#Rasterized glyphs,#Texture uploads,#Texture uploads mem, ,#Texture cache update,#Renderer,"),
+
+    // Misc:
+
+    (&"Memory", &"Image templates,Image templates mem,Font templates,Font templates mem,DisplayList mem,Picture tiles mem"),
+    (&"Interners", "Interned primitives,Interned clips,Interned pictures,Interned text runs,Interned normal borders,Interned image borders,Interned images,Interned YUV images,Interned line decorations,Interned linear gradients,Interned radial gradients,Interned conic gradients,Interned filter data,Interned backdrops"),
+    // Gpu sampler queries (need the pref gfx.webrender.debug.gpu-sampler-queries).
+    (&"GPU samplers", &"Alpha targets samplers,Transparent pass samplers,Opaque pass samplers,Total samplers"),
+
+    (&"Render reasons", &"Reason scene, Reason animated property, Reason resource update, Reason async image, Reason clear resources, Reason APZ, Reason resize, Reason widget, Reason cache flush, Reason snapshot, Reason resource hook, Reason config change, Reason content sync, Reason flush, On vsync, Reason testing, Reason other"),
+];
+
+fn find_preset(name: &str) -> Option<&'static str> {
+    for preset in PROFILER_PRESETS {
+        if preset.0 == name {
+            return Some(preset.1);
+        }
+    }
+
+    None
+}
+
+// The indices here must match the PROFILE_COUNTERS array (checked at runtime).
+pub const FRAME_BUILDING_TIME: usize = 0;
+pub const FRAME_VISIBILITY_TIME: usize = 1;
+pub const FRAME_PREPARE_TIME: usize = 2;
+pub const FRAME_BATCHING_TIME: usize = 3;
+
+pub const RENDERER_TIME: usize = 4;
+pub const TOTAL_FRAME_CPU_TIME: usize = 5;
+pub const GPU_TIME: usize = 6;
+
+pub const CONTENT_SEND_TIME: usize = 7;
+pub const API_SEND_TIME: usize = 8;
+
+pub const DISPLAY_LIST_BUILD_TIME: usize = 9;
+pub const DISPLAY_LIST_MEM: usize = 10;
+
+pub const SCENE_BUILD_TIME: usize = 11;
+
+pub const SLOW_FRAME: usize = 12;
+pub const SLOW_TXN: usize = 13;
+
+pub const FRAME_TIME: usize = 14;
+
+pub const TEXTURE_UPLOADS: usize = 15;
+pub const TEXTURE_UPLOADS_MEM: usize = 16;
+pub const TEXTURE_CACHE_UPDATE_TIME: usize = 17;
+pub const CPU_TEXTURE_ALLOCATION_TIME: usize = 18;
+pub const STAGING_TEXTURE_ALLOCATION_TIME: usize = 19;
+pub const UPLOAD_CPU_COPY_TIME: usize = 20;
+pub const UPLOAD_GPU_COPY_TIME: usize = 21;
+pub const UPLOAD_TIME: usize = 22;
+pub const UPLOAD_NUM_COPY_BATCHES: usize = 23;
+pub const TOTAL_UPLOAD_TIME: usize = 24;
+pub const CREATE_CACHE_TEXTURE_TIME: usize = 25;
+pub const DELETE_CACHE_TEXTURE_TIME: usize = 26;
+pub const GPU_CACHE_UPLOAD_TIME: usize = 27;
+
+pub const RASTERIZED_BLOBS: usize = 28;
+pub const RASTERIZED_BLOB_TILES: usize = 29;
+pub const RASTERIZED_BLOBS_PX: usize = 30;
+pub const BLOB_RASTERIZATION_TIME: usize = 31;
+
+pub const RASTERIZED_GLYPHS: usize = 32;
+pub const GLYPH_RESOLVE_TIME: usize = 33;
+
+pub const DRAW_CALLS: usize = 34;
+pub const VERTICES: usize = 35;
+pub const PRIMITIVES: usize = 36;
+pub const VISIBLE_PRIMITIVES: usize = 37;
+
+pub const USED_TARGETS: usize = 38;
+pub const CREATED_TARGETS: usize = 39;
+pub const PICTURE_CACHE_SLICES: usize = 40;
+
+pub const COLOR_PASSES: usize = 41;
+pub const ALPHA_PASSES: usize = 42;
+pub const PICTURE_TILES: usize = 43;
+pub const RENDERED_PICTURE_TILES: usize = 44;
+
+pub const FONT_TEMPLATES: usize = 45;
+pub const FONT_TEMPLATES_MEM: usize = 46;
+pub const IMAGE_TEMPLATES: usize = 47;
+pub const IMAGE_TEMPLATES_MEM: usize = 48;
+
+pub const GPU_CACHE_ROWS_TOTAL: usize = 49;
+pub const GPU_CACHE_ROWS_UPDATED: usize = 50;
+pub const GPU_CACHE_BLOCKS_TOTAL: usize = 51;
+pub const GPU_CACHE_BLOCKS_UPDATED: usize = 52;
+pub const GPU_CACHE_BLOCKS_SAVED: usize = 53;
+
+// Atlas items represents the area occupied by items in the cache textures.
+// The actual texture memory allocated is ATLAS_TEXTURES_MEM.
+pub const ATLAS_ITEMS_MEM: usize = 54;
+pub const ATLAS_A8_PIXELS: usize = 55;
+pub const ATLAS_A8_TEXTURES: usize = 56;
+pub const ATLAS_A16_PIXELS: usize = 57;
+pub const ATLAS_A16_TEXTURES: usize = 58;
+pub const ATLAS_RGBA8_LINEAR_PIXELS: usize = 59;
+pub const ATLAS_RGBA8_LINEAR_TEXTURES: usize = 60;
+pub const ATLAS_RGBA8_NEAREST_PIXELS: usize = 61;
+pub const ATLAS_RGBA8_NEAREST_TEXTURES: usize = 62;
+pub const ATLAS_RGBA8_GLYPHS_PIXELS: usize = 63;
+pub const ATLAS_RGBA8_GLYPHS_TEXTURES: usize = 64;
+pub const ATLAS_A8_GLYPHS_PIXELS: usize = 65;
+pub const ATLAS_A8_GLYPHS_TEXTURES: usize = 66;
+pub const ATLAS_COLOR8_LINEAR_PRESSURE: usize = 67;
+pub const ATLAS_COLOR8_NEAREST_PRESSURE: usize = 68;
+pub const ATLAS_COLOR8_GLYPHS_PRESSURE: usize = 69;
+pub const ATLAS_ALPHA8_PRESSURE: usize = 70;
+pub const ATLAS_ALPHA8_GLYPHS_PRESSURE: usize = 71;
+pub const ATLAS_ALPHA16_PRESSURE: usize = 72;
+pub const ATLAS_STANDALONE_PRESSURE: usize = 73;
+
+pub const TEXTURE_CACHE_EVICTION_COUNT: usize = 74;
+pub const TEXTURE_CACHE_YOUNGEST_EVICTION: usize = 75;
+pub const EXTERNAL_IMAGE_BYTES: usize = 76;
+pub const ATLAS_TEXTURES_MEM: usize = 77;
+pub const STANDALONE_TEXTURES_MEM: usize = 78;
+pub const PICTURE_TILES_MEM: usize = 79;
+pub const RENDER_TARGET_MEM: usize = 80;
+
+pub const ALPHA_TARGETS_SAMPLERS: usize = 81;
+pub const TRANSPARENT_PASS_SAMPLERS: usize = 82;
+pub const OPAQUE_PASS_SAMPLERS: usize = 83;
+pub const TOTAL_SAMPLERS: usize = 84;
+
+pub const INTERNED_PRIMITIVES: usize = 85;
+pub const INTERNED_CLIPS: usize = 86;
+pub const INTERNED_TEXT_RUNS: usize = 87;
+pub const INTERNED_NORMAL_BORDERS: usize = 88;
+pub const INTERNED_IMAGE_BORDERS: usize = 89;
+pub const INTERNED_IMAGES: usize = 90;
+pub const INTERNED_YUV_IMAGES: usize = 91;
+pub const INTERNED_LINE_DECORATIONS: usize = 92;
+pub const INTERNED_LINEAR_GRADIENTS: usize = 93;
+pub const INTERNED_RADIAL_GRADIENTS: usize = 94;
+pub const INTERNED_CONIC_GRADIENTS: usize = 95;
+pub const INTERNED_PICTURES: usize = 96;
+pub const INTERNED_FILTER_DATA: usize = 97;
+pub const INTERNED_BACKDROP_CAPTURES: usize = 98;
+pub const INTERNED_BACKDROP_RENDERS: usize = 99;
+pub const INTERNED_POLYGONS: usize = 100;
+
+pub const DEPTH_TARGETS_MEM: usize = 101;
+
+pub const SHADER_BUILD_TIME: usize = 102;
+
+pub const RENDER_REASON_FIRST: usize = 103;
+pub const RENDER_REASON_SCENE: usize = 103;
+pub const RENDER_REASON_ANIMATED_PROPERTY: usize = 104;
+pub const RENDER_REASON_RESOURCE_UPDATE: usize = 105;
+pub const RENDER_REASON_ASYNC_IMAGE: usize = 106;
+pub const RENDER_REASON_CLEAR_RESOURCES: usize = 107;
+pub const RENDER_REASON_APZ: usize = 108;
+pub const RENDER_REASON_RESIZE: usize = 109;
+pub const RENDER_REASON_WIDGET: usize = 110;
+pub const RENDER_REASON_TEXTURE_CACHE_FLUSH: usize = 111;
+pub const RENDER_REASON_SNAPSHOT: usize = 112;
+pub const RENDER_REASON_POST_RESOURCE_UPDATE_HOOKS: usize = 113;
+pub const RENDER_REASON_CONFIG_CHANGE: usize = 114;
+pub const RENDER_REASON_CONTENT_SYNC: usize = 115;
+pub const RENDER_REASON_FLUSH: usize = 116;
+pub const RENDER_REASON_TESTING: usize = 117;
+pub const RENDER_REASON_OTHER: usize = 118;
+pub const RENDER_REASON_VSYNC: usize = 119;
+
+pub const TEXTURES_CREATED: usize = 120;
+pub const TEXTURES_DELETED: usize = 121;
+
+pub const NUM_PROFILER_EVENTS: usize = 122;
+
+pub struct Profiler {
+    counters: Vec<Counter>,
+    gpu_frames: ProfilerFrameCollection,
+    frame_stats: ProfilerFrameCollection,
+
+    start: u64,
+    avg_over_period: u64,
+    num_graph_samples: usize,
+
+    // For FPS computation. Updated in update().
+    frame_timestamps_within_last_second: Vec<u64>,
+
+    ui: Vec<Item>,
+}
+
+impl Profiler {
+    pub fn new() -> Self {
+
+        fn float(name: &'static str, unit: &'static str, index: usize, expected: Expected<f64>) -> CounterDescriptor {
+            CounterDescriptor { name, unit, show_as: ShowAs::Float, index, expected }
+        }
+
+        fn int(name: &'static str, unit: &'static str, index: usize, expected: Expected<i64>) -> CounterDescriptor {
+            CounterDescriptor { name, unit, show_as: ShowAs::Int, index, expected: expected.into_float() }
+        }
+
+        // Not in the list below:
+        // - "GPU time queries" shows the details of the GPU time queries if selected as a graph.
+        // - "GPU cache bars" shows some info about the GPU cache.
+
+        // TODO: This should be a global variable but to keep things readable we need to be able to
+        // use match in const fn which isn't supported by the current rustc version in gecko's build
+        // system.
+        let profile_counters = &[
+            float("Frame building", "ms", FRAME_BUILDING_TIME, expected(0.0..6.0).avg(0.0..3.0)),
+            float("Visibility", "ms", FRAME_VISIBILITY_TIME, expected(0.0..3.0).avg(0.0..2.0)),
+            float("Prepare", "ms", FRAME_PREPARE_TIME, expected(0.0..3.0).avg(0.0..2.0)),
+            float("Batching", "ms", FRAME_BATCHING_TIME, expected(0.0..3.0).avg(0.0..2.0)),
+
+            float("Renderer", "ms", RENDERER_TIME, expected(0.0..8.0).avg(0.0..5.0)),
+            float("Frame CPU total", "ms", TOTAL_FRAME_CPU_TIME, expected(0.0..15.0).avg(0.0..6.0)),
+            float("GPU", "ms", GPU_TIME, expected(0.0..15.0).avg(0.0..8.0)),
+
+            float("Content send", "ms", CONTENT_SEND_TIME, expected(0.0..1.0).avg(0.0..1.0)),
+            float("API send", "ms", API_SEND_TIME, expected(0.0..1.0).avg(0.0..0.4)),
+            float("DisplayList", "ms", DISPLAY_LIST_BUILD_TIME, expected(0.0..5.0).avg(0.0..3.0)),
+            float("DisplayList mem", "MB", DISPLAY_LIST_MEM, expected(0.0..20.0)),
+            float("Scene building", "ms", SCENE_BUILD_TIME, expected(0.0..4.0).avg(0.0..3.0)),
+
+            float("Slow frame", "", SLOW_FRAME, expected(0.0..0.0)),
+            float("Slow transaction", "", SLOW_TXN, expected(0.0..0.0)),
+
+            float("Frame", "ms", FRAME_TIME, Expected::none()),
+
+            int("Texture uploads", "", TEXTURE_UPLOADS, expected(0..10)),
+            float("Texture uploads mem", "MB", TEXTURE_UPLOADS_MEM, expected(0.0..10.0)),
+            float("Texture cache update", "ms", TEXTURE_CACHE_UPDATE_TIME, expected(0.0..3.0)),
+            float("Staging CPU allocation", "ms", CPU_TEXTURE_ALLOCATION_TIME, Expected::none()),
+            float("Staging GPU allocation", "ms", STAGING_TEXTURE_ALLOCATION_TIME, Expected::none()),
+            float("Staging CPU copy", "ms", UPLOAD_CPU_COPY_TIME, Expected::none()),
+            float("Staging GPU copy", "ms", UPLOAD_GPU_COPY_TIME, Expected::none()),
+            float("Upload time", "ms", UPLOAD_TIME, Expected::none()),
+            int("Upload copy batches", "", UPLOAD_NUM_COPY_BATCHES, Expected::none()),
+            float("Texture cache upload", "ms", TOTAL_UPLOAD_TIME, expected(0.0..5.0)),
+            float("Cache texture creation", "ms", CREATE_CACHE_TEXTURE_TIME, expected(0.0..2.0)),
+            float("Cache texture deletion", "ms", DELETE_CACHE_TEXTURE_TIME, expected(0.0..1.0)),
+            float("GPU cache upload", "ms", GPU_CACHE_UPLOAD_TIME, expected(0.0..2.0)),
+
+            int("Rasterized blobs", "", RASTERIZED_BLOBS, expected(0..15)),
+            int("Rasterized blob tiles", "", RASTERIZED_BLOB_TILES, expected(0..15)),
+            int("Rasterized blob pixels", "px", RASTERIZED_BLOBS_PX, expected(0..300_000)),
+            float("Blob rasterization", "ms", BLOB_RASTERIZATION_TIME, expected(0.0..8.0)),
+
+            int("Rasterized glyphs", "", RASTERIZED_GLYPHS, expected(0..15)),
+            float("Glyph resolve", "ms", GLYPH_RESOLVE_TIME, expected(0.0..4.0)),
+
+            int("Draw calls", "", DRAW_CALLS, expected(1..120).avg(1..90)),
+            int("Vertices", "", VERTICES, expected(10..5000)),
+            int("Primitives", "", PRIMITIVES, expected(10..5000)),
+            int("Visible primitives", "", VISIBLE_PRIMITIVES, expected(1..5000)),
+
+            int("Used targets", "", USED_TARGETS, expected(1..4)),
+            int("Created targets", "", CREATED_TARGETS, expected(0..3)),
+            int("Picture cache slices", "", PICTURE_CACHE_SLICES, expected(0..5)),
+
+            int("Color passes", "", COLOR_PASSES, expected(1..4)),
+            int("Alpha passes", "", ALPHA_PASSES, expected(0..3)),
+            int("Picture tiles", "", PICTURE_TILES, expected(0..15)),
+            int("Rendered picture tiles", "", RENDERED_PICTURE_TILES, expected(0..5)),
+
+            int("Font templates", "", FONT_TEMPLATES, expected(0..40)),
+            float("Font templates mem", "MB", FONT_TEMPLATES_MEM, expected(0.0..20.0)),
+            int("Image templates", "", IMAGE_TEMPLATES, expected(0..100)),
+            float("Image templates mem", "MB", IMAGE_TEMPLATES_MEM, expected(0.0..50.0)),
+
+            int("GPU cache rows total", "", GPU_CACHE_ROWS_TOTAL, expected(1..50)),
+            int("GPU cache rows updated", "", GPU_CACHE_ROWS_UPDATED, expected(0..25)),
+            int("GPU blocks total", "", GPU_CACHE_BLOCKS_TOTAL, expected(1..65_000)),
+            int("GPU blocks updated", "", GPU_CACHE_BLOCKS_UPDATED, expected(0..1000)),
+            int("GPU blocks saved", "", GPU_CACHE_BLOCKS_SAVED, expected(0..50_000)),
+
+            float("Atlas items mem", "MB", ATLAS_ITEMS_MEM, expected(0.0..100.0)),
+            int("Atlas A8 pixels", "px", ATLAS_A8_PIXELS, expected(0..1_000_000)),
+            int("Atlas A8 textures", "", ATLAS_A8_TEXTURES, expected(0..2)),
+            int("Atlas A16 pixels", "px", ATLAS_A16_PIXELS, expected(0..260_000)),
+            int("Atlas A16 textures", "", ATLAS_A16_TEXTURES, expected(0..2)),
+            int("Atlas RGBA8 linear pixels", "px", ATLAS_RGBA8_LINEAR_PIXELS, expected(0..8_000_000)),
+            int("Atlas RGBA8 linear textures", "", ATLAS_RGBA8_LINEAR_TEXTURES, expected(0..3)),
+            int("Atlas RGBA8 nearest pixels", "px", ATLAS_RGBA8_NEAREST_PIXELS, expected(0..260_000)),
+            int("Atlas RGBA8 nearest textures", "", ATLAS_RGBA8_NEAREST_TEXTURES, expected(0..2)),
+            int("Atlas RGBA8 glyphs pixels", "px", ATLAS_RGBA8_GLYPHS_PIXELS, expected(0..4_000_000)),
+            int("Atlas RGBA8 glyphs textures", "", ATLAS_RGBA8_GLYPHS_TEXTURES, expected(0..2)),
+            int("Atlas A8 glyphs pixels", "px", ATLAS_A8_GLYPHS_PIXELS, expected(0..4_000_000)),
+            int("Atlas A8 glyphs textures", "", ATLAS_A8_GLYPHS_TEXTURES, expected(0..2)),
+            float("Atlas RGBA8 linear pressure", "", ATLAS_COLOR8_LINEAR_PRESSURE, expected(0.0..1.0)),
+            float("Atlas RGBA8 nearest pressure", "", ATLAS_COLOR8_NEAREST_PRESSURE, expected(0.0..1.0)),
+            float("Atlas RGBA8 glyphs pressure", "", ATLAS_COLOR8_GLYPHS_PRESSURE, expected(0.0..1.0)),
+            float("Atlas A8 pressure", "", ATLAS_ALPHA8_PRESSURE, expected(0.0..1.0)),
+            float("Atlas A8 glyphs pressure", "", ATLAS_ALPHA8_GLYPHS_PRESSURE, expected(0.0..1.0)),
+            float("Atlas A16 pressure", "", ATLAS_ALPHA16_PRESSURE, expected(0.0..1.0)),
+            float("Texture cache standalone pressure", "", ATLAS_STANDALONE_PRESSURE, expected(0.0..1.0)),
+
+            int("Texture cache eviction count", "items", TEXTURE_CACHE_EVICTION_COUNT, Expected::none()),
+            int("Texture cache youngest evicted", "frames", TEXTURE_CACHE_YOUNGEST_EVICTION, Expected::none()),
+            float("External image mem", "MB", EXTERNAL_IMAGE_BYTES, Expected::none()),
+            float("Atlas textures mem", "MB", ATLAS_TEXTURES_MEM, Expected::none()),
+            float("Standalone textures mem", "MB", STANDALONE_TEXTURES_MEM, Expected::none()),
+            float("Picture tiles mem", "MB", PICTURE_TILES_MEM, expected(0.0..150.0)),
+            float("Render targets mem", "MB", RENDER_TARGET_MEM, Expected::none()),
+
+            float("Alpha targets samplers", "%", ALPHA_TARGETS_SAMPLERS, Expected::none()),
+            float("Transparent pass samplers", "%", TRANSPARENT_PASS_SAMPLERS, Expected::none()),
+            float("Opaque pass samplers", "%", OPAQUE_PASS_SAMPLERS, Expected::none()),
+            float("Total samplers", "%", TOTAL_SAMPLERS, Expected::none()),
+
+            int("Interned primitives", "", INTERNED_PRIMITIVES, Expected::none()),
+            int("Interned clips", "", INTERNED_CLIPS, Expected::none()),
+            int("Interned text runs", "", INTERNED_TEXT_RUNS, Expected::none()),
+            int("Interned normal borders", "", INTERNED_NORMAL_BORDERS, Expected::none()),
+            int("Interned image borders", "", INTERNED_IMAGE_BORDERS, Expected::none()),
+            int("Interned images", "", INTERNED_IMAGES, Expected::none()),
+            int("Interned YUV images", "", INTERNED_YUV_IMAGES, Expected::none()),
+            int("Interned line decorations", "", INTERNED_LINE_DECORATIONS, Expected::none()),
+            int("Interned linear gradients", "", INTERNED_LINEAR_GRADIENTS, Expected::none()),
+            int("Interned radial gradients", "", INTERNED_RADIAL_GRADIENTS, Expected::none()),
+            int("Interned conic gradients", "", INTERNED_CONIC_GRADIENTS, Expected::none()),
+            int("Interned pictures", "", INTERNED_PICTURES, Expected::none()),
+            int("Interned filter data", "", INTERNED_FILTER_DATA, Expected::none()),
+            int("Interned backdrop captures", "", INTERNED_BACKDROP_CAPTURES, Expected::none()),
+            int("Interned backdrop renders", "", INTERNED_BACKDROP_RENDERS, Expected::none()),
+            int("Interned polygons", "", INTERNED_POLYGONS, Expected::none()),
+
+            float("Depth targets mem", "MB", DEPTH_TARGETS_MEM, Expected::none()),
+            float("Shader build time", "ms", SHADER_BUILD_TIME, Expected::none()),
+            // We use the expected range to highlight render reasons that are happening.
+            float("Reason scene", "", RENDER_REASON_SCENE, expected(0.0..0.01)),
+            float("Reason animated property", "", RENDER_REASON_ANIMATED_PROPERTY, expected(0.0..0.01)),
+            float("Reason resource update", "", RENDER_REASON_RESOURCE_UPDATE, expected(0.0..0.01)),
+            float("Reason async image", "", RENDER_REASON_ASYNC_IMAGE, expected(0.0..0.01)),
+            float("Reason clear resources", "", RENDER_REASON_CLEAR_RESOURCES, expected(0.0..0.01)),
+            float("Reason APZ", "", RENDER_REASON_APZ, expected(0.0..0.01)),
+            float("Reason resize", "", RENDER_REASON_RESIZE, expected(0.0..0.01)),
+            float("Reason widget", "", RENDER_REASON_WIDGET, expected(0.0..0.01)),
+            float("Reason cache flush", "", RENDER_REASON_TEXTURE_CACHE_FLUSH, expected(0.0..0.01)),
+            float("Reason snapshot", "", RENDER_REASON_SNAPSHOT, expected(0.0..0.01)),
+            float("Reason resource hook", "", RENDER_REASON_POST_RESOURCE_UPDATE_HOOKS, expected(0.0..0.01)),
+            float("Reason config change", "", RENDER_REASON_CONFIG_CHANGE, expected(0.0..0.01)),
+            float("Reason content sync", "", RENDER_REASON_CONTENT_SYNC, expected(0.0..0.01)),
+            float("Reason flush", "", RENDER_REASON_FLUSH, expected(0.0..0.01)),
+            float("Reason testing", "", RENDER_REASON_TESTING, expected(0.0..0.01)),
+            float("Reason other", "", RENDER_REASON_OTHER, expected(0.0..0.01)),
+            float("On vsync", "", RENDER_REASON_VSYNC, expected(0.0..0.01)),
+
+            int("Textures created", "", TEXTURES_CREATED, expected(0..5)),
+            int("Textures deleted", "", TEXTURES_DELETED, Expected::none()),
+        ];
+
+        let mut counters = Vec::with_capacity(profile_counters.len());
+
+        for (idx, descriptor) in profile_counters.iter().enumerate() {
+            debug_assert_eq!(descriptor.index, idx);
+            counters.push(Counter::new(descriptor));
+        }
+
+        Profiler {
+            gpu_frames: ProfilerFrameCollection::new(),
+            frame_stats: ProfilerFrameCollection::new(),
+
+            counters,
+            start: precise_time_ns(),
+            avg_over_period: ONE_SECOND_NS / 2,
+
+            num_graph_samples: 500, // Would it be useful to control this via a pref?
+            frame_timestamps_within_last_second: Vec::new(),
+            ui: Vec::new(),
+        }
+    }
+
+    /// Sum a few counters and if the total amount is larger than a threshold, update
+    /// a specific counter.
+    ///
+    /// This is useful to monitor slow frame and slow transactions.
+    fn update_slow_event(&mut self, dst_counter: usize, counters: &[usize], threshold: f64) {
+        let mut total = 0.0;
+        for &counter in counters {
+            if self.counters[counter].value.is_finite() {
+                total += self.counters[counter].value;
+            }
+        }
+
+        if total > threshold {
+            self.counters[dst_counter].set(total);
+        }
+    }
+
+    // Call at the end of every frame, after setting the counter values and before drawing the counters.
+    pub fn update(&mut self) {
+        let now = precise_time_ns();
+        let update_avg = (now - self.start) > self.avg_over_period;
+        if update_avg {
+            self.start = now;
+        }
+        let one_second_ago = now - ONE_SECOND_NS;
+        self.frame_timestamps_within_last_second.retain(|t| *t > one_second_ago);
+        self.frame_timestamps_within_last_second.push(now);
+
+        self.update_slow_event(
+            SLOW_FRAME,
+            &[TOTAL_FRAME_CPU_TIME],
+            15.0,
+        );
+        self.update_slow_event(
+            SLOW_TXN,
+            &[DISPLAY_LIST_BUILD_TIME, CONTENT_SEND_TIME, SCENE_BUILD_TIME],
+            80.0
+        );
+
+        for counter in &mut self.counters {
+            counter.update(update_avg);
+        }
+    }
+
+    pub fn update_frame_stats(&mut self, stats: FullFrameStats) {
+        if stats.gecko_display_list_time != 0.0 {
+          self.frame_stats.push(stats.into());
+        }
+    }
+
+    pub fn set_gpu_time_queries(&mut self, gpu_queries: Vec<GpuTimer>) {
+        let mut gpu_time_ns = 0;
+        for sample in &gpu_queries {
+            gpu_time_ns += sample.time_ns;
+        }
+
+        self.gpu_frames.push(ProfilerFrame {
+          total_time: gpu_time_ns,
+          samples: gpu_queries
+        });
+
+        self.counters[GPU_TIME].set_f64(ns_to_ms(gpu_time_ns));
+    }
+
+    // Find the index of a counter by its name.
+    pub fn index_of(&self, name: &str) -> Option<usize> {
+        self.counters.iter().position(|counter| counter.name == name)
+    }
+
+    // Define the profiler UI, see comment about the syntax at the top of this file.
+    pub fn set_ui(&mut self, names: &str) {
+        let mut selection = Vec::new();
+
+        self.append_to_ui(&mut selection, names);
+
+        if selection == self.ui {
+            return;
+        }
+
+        for counter in &mut self.counters {
+            counter.disable_graph();
+        }
+
+        for item in &selection {
+            if let Item::Graph(idx) = item {
+                self.counters[*idx].enable_graph(self.num_graph_samples);
+            }
+        }
+
+        self.ui = selection;
+    }
+
+    fn append_to_ui(&mut self, selection: &mut Vec<Item>, names: &str) {
+        // Group successive counters together.
+        fn flush_counters(counters: &mut Vec<usize>, selection: &mut Vec<Item>) {
+            if !counters.is_empty() {
+                selection.push(Item::Counters(std::mem::take(counters)))
+            }
+        }
+
+        let mut counters = Vec::new();
+
+        for name in names.split(",") {
+            let name = name.trim();
+            let is_graph = name.starts_with("#");
+            let is_indicator = name.starts_with("*");
+            let name = if is_graph || is_indicator {
+                &name[1..]
+            } else {
+                name
+            };
+            // See comment about the ui string syntax at the top of this file.
+            match name {
+                "" => {
+                    flush_counters(&mut counters, selection);
+                    selection.push(Item::Space);
+                }
+                "|" => {
+                    flush_counters(&mut counters, selection);
+                    selection.push(Item::Column);
+                }
+                "_" => {
+                    flush_counters(&mut counters, selection);
+                    selection.push(Item::Row);
+                }
+                "FPS" => {
+                    flush_counters(&mut counters, selection);
+                    selection.push(Item::Fps);
+                }
+                "GPU time queries" => {
+                    flush_counters(&mut counters, selection);
+                    selection.push(Item::GpuTimeQueries);
+                }
+                "GPU cache bars" => {
+                    flush_counters(&mut counters, selection);
+                    selection.push(Item::GpuCacheBars);
+                }
+                "Paint phase graph" => {
+                    flush_counters(&mut counters, selection);
+                    selection.push(Item::PaintPhaseGraph);
+                }
+                _ => {
+                    if let Some(idx) = self.index_of(name) {
+                        if is_graph {
+                            flush_counters(&mut counters, selection);
+                            selection.push(Item::Graph(idx));
+                        } else if is_indicator {
+                            flush_counters(&mut counters, selection);
+                            selection.push(Item::ChangeIndicator(idx));
+                        } else {
+                            counters.push(idx);
+                        }
+                    } else if let Some(preset_str) = find_preset(name) {
+                        flush_counters(&mut counters, selection);
+                        self.append_to_ui(selection, preset_str);
+                    } else {
+                        selection.push(Item::Text(format!("Unknonw counter: {}", name)));
+                    }
+                }
+            }
+        }
+
+        flush_counters(&mut counters, selection);
+    }
+
+    pub fn set_counters(&mut self, counters: &mut TransactionProfile) {
+        for (id, evt) in counters.events.iter_mut().enumerate() {
+            if let Event::Value(val) = *evt {
+                self.counters[id].set(val);
+            }
+            *evt = Event::None;
+        }
+    }
+
+    pub fn get(&self, id: usize) -> Option<f64> {
+        self.counters[id].get()
+    }
+
+    fn draw_counters(
+        counters: &[Counter],
+        selected: &[usize],
+        mut x: f32, mut y: f32,
+        text_buffer: &mut String,
+        debug_renderer: &mut DebugRenderer,
+    ) -> default::Rect<f32> {
+        let line_height = debug_renderer.line_height();
+
+        x += PROFILE_PADDING;
+        y += PROFILE_PADDING;
+        let origin = default::Point2D::new(x, y);
+        y += line_height * 0.5;
+
+        let mut total_rect = Rect::zero();
+
+        let mut color_index = 0;
+        let colors = [
+            // Regular values,
+            ColorU::new(255, 255, 255, 255),
+            ColorU::new(255, 255, 0, 255),
+            // Unexpected values,
+            ColorU::new(255, 80, 0, 255),
+            ColorU::new(255, 0, 0, 255),
+        ];
+
+        for idx in selected {
+            // If The index is invalid, add some vertical space.
+            let counter = &counters[*idx];
+
+            let rect = debug_renderer.add_text(
+                x, y,
+                counter.name,
+                colors[color_index],
+                None,
+            );
+            color_index = (color_index + 1) % 2;
+
+            total_rect = total_rect.union(&rect);
+            y += line_height;
+        }
+
+        color_index = 0;
+        x = total_rect.max_x() + 60.0;
+        y = origin.y + line_height * 0.5;
+
+        for idx in selected {
+            let counter = &counters[*idx];
+            let expected_offset = if counter.has_unexpected_avg_max() { 2 } else { 0 };
+
+            counter.write_value(text_buffer);
+
+            let rect = debug_renderer.add_text(
+                x,
+                y,
+                &text_buffer,
+                colors[color_index + expected_offset],
+                None,
+            );
+            color_index = (color_index + 1) % 2;
+
+            total_rect = total_rect.union(&rect);
+            y += line_height;
+        }
+
+        total_rect = total_rect
+            .union(&Rect { origin, size: Size2D::new(1.0, 1.0) })
+            .inflate(PROFILE_PADDING, PROFILE_PADDING);
+
+        debug_renderer.add_quad(
+            total_rect.min_x(),
+            total_rect.min_y(),
+            total_rect.max_x(),
+            total_rect.max_y(),
+            BACKGROUND_COLOR,
+            BACKGROUND_COLOR,
+        );
+
+        total_rect
+    }
+
+    fn draw_graph(
+        counter: &Counter,
+        x: f32,
+        y: f32,
+        text_buffer: &mut String,
+        debug_renderer: &mut DebugRenderer,
+    ) -> default::Rect<f32> {
+        let graph = counter.graph.as_ref().unwrap();
+
+        let max_samples = graph.values.capacity() as f32;
+
+        let size = Size2D::new(max_samples, 100.0);
+        let line_height = debug_renderer.line_height();
+        let graph_rect = Rect::new(Point2D::new(x + PROFILE_PADDING, y + PROFILE_PADDING), size);
+        let mut rect = graph_rect.inflate(PROFILE_PADDING, PROFILE_PADDING);
+
+        let stats = graph.stats();
+
+        let text_color = ColorU::new(255, 255, 0, 255);
+        let text_origin = rect.origin + vec2(rect.size.width, 25.0);
+        set_text!(text_buffer, "{} ({})", counter.name, counter.unit);
+        debug_renderer.add_text(
+            text_origin.x,
+            text_origin.y,
+            if counter.unit == "" { counter.name } else { text_buffer },
+            ColorU::new(0, 255, 0, 255),
+            None,
+        );
+
+        set_text!(text_buffer, "Samples: {}", stats.samples);
+
+        debug_renderer.add_text(
+            text_origin.x,
+            text_origin.y + line_height,
+            text_buffer,
+            text_color,
+            None,
+        );
+
+        if stats.samples > 0 {
+            set_text!(text_buffer, "Min: {:.2} {}", stats.min, counter.unit);
+            debug_renderer.add_text(
+                text_origin.x,
+                text_origin.y + line_height * 2.0,
+                text_buffer,
+                text_color,
+                None,
+            );
+
+            set_text!(text_buffer, "Avg: {:.2} {}", stats.avg, counter.unit);
+            debug_renderer.add_text(
+                text_origin.x,
+                text_origin.y + line_height * 3.0,
+                text_buffer,
+                text_color,
+                None,
+            );
+
+            set_text!(text_buffer, "Max: {:.2} {}", stats.max, counter.unit);
+            debug_renderer.add_text(
+                text_origin.x,
+                text_origin.y + line_height * 4.0,
+                text_buffer,
+                text_color,
+                None,
+            );
+        }
+
+        rect.size.width += 220.0;
+        debug_renderer.add_quad(
+            rect.min_x(),
+            rect.min_y(),
+            rect.max_x(),
+            rect.max_y(),
+            BACKGROUND_COLOR,
+            BACKGROUND_COLOR,
+        );
+
+        let bx1 = graph_rect.max_x();
+        let by1 = graph_rect.max_y();
+
+        let w = graph_rect.size.width / max_samples;
+        let h = graph_rect.size.height;
+
+        let color_t0 = ColorU::new(0, 255, 0, 255);
+        let color_b0 = ColorU::new(0, 180, 0, 255);
+
+        let color_t2 = ColorU::new(255, 0, 0, 255);
+        let color_b2 = ColorU::new(180, 0, 0, 255);
+
+        for (index, sample) in graph.values.iter().enumerate() {
+            if !sample.is_finite() {
+                // NAN means no sample this frame.
+                continue;
+            }
+            let sample = *sample as f32;
+            let x1 = bx1 - index as f32 * w;
+            let x0 = x1 - w;
+
+            let y0 = by1 - (sample / stats.max as f32) as f32 * h;
+            let y1 = by1;
+
+            let (color_top, color_bottom) = if counter.is_unexpected_value(sample as f64) {
+                (color_t2, color_b2)
+            } else {
+                (color_t0, color_b0)
+            };
+
+            debug_renderer.add_quad(x0, y0, x1, y1, color_top, color_bottom);
+        }
+
+        rect
+    }
+
+
+    fn draw_change_indicator(
+        counter: &Counter,
+        x: f32, y: f32,
+        debug_renderer: &mut DebugRenderer
+    ) -> default::Rect<f32> {
+        let height = 10.0;
+        let width = 20.0;
+
+        // Draw the indicator red instead of blue if is is not within expected ranges.
+        let color = if counter.has_unexpected_value() || counter.has_unexpected_avg_max() {
+            ColorU::new(255, 20, 20, 255)
+        } else {
+            ColorU::new(0, 100, 250, 255)
+        };
+
+        let tx = counter.change_indicator as f32 * width;
+        debug_renderer.add_quad(
+            x,
+            y,
+            x + 15.0 * width,
+            y + height,
+            ColorU::new(0, 0, 0, 150),
+            ColorU::new(0, 0, 0, 150),
+        );
+
+        debug_renderer.add_quad(
+            x + tx,
+            y,
+            x + tx + width,
+            y + height,
+            color,
+            ColorU::new(25, 25, 25, 255),
+        );
+
+        Rect {
+            origin: Point2D::new(x, y),
+            size: Size2D::new(15.0 * width + 20.0, height),
+        }
+    }
+
+    fn draw_bar(
+        label: &str,
+        label_color: ColorU,
+        counters: &[(ColorU, usize)],
+        x: f32, y: f32,
+        debug_renderer: &mut DebugRenderer,
+    ) -> default::Rect<f32> {
+        let x = x + 8.0;
+        let y = y + 24.0;
+        let text_rect = debug_renderer.add_text(
+            x, y,
+            label,
+            label_color,
+            None,
+        );
+
+        let x_base = text_rect.max_x() + 10.0;
+        let width = 300.0;
+        let total_value = counters.last().unwrap().1;
+        let scale = width / total_value as f32;
+        let mut x_current = x_base;
+
+        for &(color, counter) in counters {
+            let x_stop = x_base + counter as f32 * scale;
+            debug_renderer.add_quad(
+                x_current,
+                text_rect.origin.y,
+                x_stop,
+                text_rect.max_y(),
+                color,
+                color,
+            );
+            x_current = x_stop;
+
+        }
+
+        let mut total_rect = text_rect;
+        total_rect.size.width += width + 10.0;
+
+        total_rect
+    }
+
+    fn draw_gpu_cache_bars(&self, x: f32, mut y: f32, text_buffer: &mut String, debug_renderer: &mut DebugRenderer) -> default::Rect<f32> {
+        let color_updated = ColorU::new(0xFF, 0, 0, 0xFF);
+        let color_free = ColorU::new(0, 0, 0xFF, 0xFF);
+        let color_saved = ColorU::new(0, 0xFF, 0, 0xFF);
+
+        let updated_blocks = self.get(GPU_CACHE_BLOCKS_UPDATED).unwrap_or(0.0) as usize;
+        let saved_blocks = self.get(GPU_CACHE_BLOCKS_SAVED).unwrap_or(0.0) as usize;
+        let allocated_blocks = self.get(GPU_CACHE_BLOCKS_TOTAL).unwrap_or(0.0) as usize;
+        let allocated_rows = self.get(GPU_CACHE_ROWS_TOTAL).unwrap_or(0.0) as usize;
+        let updated_rows = self.get(GPU_CACHE_ROWS_UPDATED).unwrap_or(0.0) as usize;
+        let requested_blocks = updated_blocks + saved_blocks;
+        let total_blocks = allocated_rows * MAX_VERTEX_TEXTURE_WIDTH;
+
+        set_text!(text_buffer, "GPU cache rows ({}):", allocated_rows);
+
+        let rect0 = Profiler::draw_bar(
+            text_buffer,
+            ColorU::new(0xFF, 0xFF, 0xFF, 0xFF),
+            &[
+                (color_updated, updated_rows),
+                (color_free, allocated_rows),
+            ],
+            x, y,
+            debug_renderer,
+        );
+
+        y = rect0.max_y();
+
+        let rect1 = Profiler::draw_bar(
+            "GPU cache blocks",
+            ColorU::new(0xFF, 0xFF, 0, 0xFF),
+            &[
+                (color_updated, updated_blocks),
+                (color_saved, requested_blocks),
+                (color_free, allocated_blocks),
+                (ColorU::new(0, 0, 0, 0xFF), total_blocks),
+            ],
+            x, y,
+            debug_renderer,
+        );
+
+        let total_rect = rect0.union(&rect1).inflate(10.0, 10.0);
+        debug_renderer.add_quad(
+            total_rect.origin.x,
+            total_rect.origin.y,
+            total_rect.origin.x + total_rect.size.width,
+            total_rect.origin.y + total_rect.size.height,
+            ColorF::new(0.1, 0.1, 0.1, 0.8).into(),
+            ColorF::new(0.2, 0.2, 0.2, 0.8).into(),
+        );
+
+        total_rect
+    }
+
+    // Draws a frame graph for a given frame collection.
+    fn draw_frame_graph(
+        frame_collection: &ProfilerFrameCollection,
+        x: f32, y: f32,
+        debug_renderer: &mut DebugRenderer,
+    ) -> default::Rect<f32> {
+        let mut has_data = false;
+        for frame in &frame_collection.frames {
+            if !frame.samples.is_empty() {
+                has_data = true;
+                break;
+            }
+        }
+
+        if !has_data {
+            return Rect::zero();
+        }
+
+        let graph_rect = Rect::new(
+            Point2D::new(x + GRAPH_PADDING, y + GRAPH_PADDING),
+            Size2D::new(GRAPH_WIDTH, GRAPH_HEIGHT),
+        );
+        let bounding_rect = graph_rect.inflate(GRAPH_PADDING, GRAPH_PADDING);
+
+        debug_renderer.add_quad(
+            bounding_rect.origin.x,
+            bounding_rect.origin.y,
+            bounding_rect.origin.x + bounding_rect.size.width,
+            bounding_rect.origin.y + bounding_rect.size.height,
+            BACKGROUND_COLOR,
+            BACKGROUND_COLOR,
+        );
+
+        let w = graph_rect.size.width;
+        let mut y0 = graph_rect.origin.y;
+
+        let mut max_time = frame_collection.frames
+            .iter()
+            .max_by_key(|f| f.total_time)
+            .unwrap()
+            .total_time as f32;
+
+        // If the max time is lower than 16ms, fix the scale
+        // at 16ms so that the graph is easier to interpret.
+        let baseline_ns = 16_000_000.0; // 16ms
+        max_time = max_time.max(baseline_ns);
+
+        let mut tags_present = FastHashMap::default();
+
+        for frame in &frame_collection.frames {
+            let y1 = y0 + GRAPH_FRAME_HEIGHT;
+
+            let mut current_ns = 0;
+            for sample in &frame.samples {
+                let x0 = graph_rect.origin.x + w * current_ns as f32 / max_time;
+                current_ns += sample.time_ns;
+                let x1 = graph_rect.origin.x + w * current_ns as f32 / max_time;
+                let mut bottom_color = sample.tag.color;
+                bottom_color.a *= 0.5;
+
+                debug_renderer.add_quad(
+                    x0,
+                    y0,
+                    x1,
+                    y1,
+                    sample.tag.color.into(),
+                    bottom_color.into(),
+                );
+
+                tags_present.insert(sample.tag.label, sample.tag.color);
+            }
+
+            y0 = y1;
+        }
+
+        // If the max time is higher than 16ms, show a vertical line at the
+        // 16ms mark.
+        if max_time > baseline_ns {
+            let x = graph_rect.origin.x + w * baseline_ns as f32 / max_time;
+            let height = frame_collection.frames.len() as f32 * GRAPH_FRAME_HEIGHT;
+
+            debug_renderer.add_quad(
+                x,
+                graph_rect.origin.y,
+                x + 4.0,
+                graph_rect.origin.y + height,
+                ColorU::new(120, 00, 00, 150),
+                ColorU::new(120, 00, 00, 100),
+            );
+        }
+
+
+        // Add a legend to see which color correspond to what primitive.
+        const LEGEND_SIZE: f32 = 20.0;
+        const PADDED_LEGEND_SIZE: f32 = 25.0;
+        if !tags_present.is_empty() {
+            debug_renderer.add_quad(
+                bounding_rect.max_x() + GRAPH_PADDING,
+                bounding_rect.origin.y,
+                bounding_rect.max_x() + GRAPH_PADDING + 200.0,
+                bounding_rect.origin.y + tags_present.len() as f32 * PADDED_LEGEND_SIZE + GRAPH_PADDING,
+                BACKGROUND_COLOR,
+                BACKGROUND_COLOR,
+            );
+        }
+
+        for (i, (label, &color)) in tags_present.iter().enumerate() {
+            let x0 = bounding_rect.origin.x + bounding_rect.size.width + GRAPH_PADDING * 2.0;
+            let y0 = bounding_rect.origin.y + GRAPH_PADDING + i as f32 * PADDED_LEGEND_SIZE;
+
+            debug_renderer.add_quad(
+                x0, y0, x0 + LEGEND_SIZE, y0 + LEGEND_SIZE,
+                color.into(),
+                color.into(),
+            );
+
+            debug_renderer.add_text(
+                x0 + PADDED_LEGEND_SIZE,
+                y0 + LEGEND_SIZE * 0.75,
+                label,
+                ColorU::new(255, 255, 0, 255),
+                None,
+            );
+        }
+
+        bounding_rect
+    }
+
+    pub fn draw_profile(
+        &mut self,
+        _frame_index: u64,
+        debug_renderer: &mut DebugRenderer,
+        device_size: DeviceIntSize,
+    ) {
+        let x_start = 20.0;
+        let mut y_start = 150.0;
+        let default_column_width = 400.0;
+
+        // set_text!(..) into this string instead of using format!(..) to avoid
+        // unnecessary allocations.
+        let mut text_buffer = String::with_capacity(32);
+
+        let mut column_width = default_column_width;
+        let mut max_y = y_start;
+
+        let mut x = x_start;
+        let mut y = y_start;
+
+        for elt in &self.ui {
+            let rect = match elt {
+                Item::Counters(indices) => {
+                    Profiler::draw_counters(&self.counters, &indices, x, y, &mut text_buffer, debug_renderer)
+                }
+                Item::Graph(idx) => {
+                    Profiler::draw_graph(&self.counters[*idx], x, y, &mut text_buffer, debug_renderer)
+                }
+                Item::ChangeIndicator(idx) => {
+                    Profiler::draw_change_indicator(&self.counters[*idx], x, y, debug_renderer)
+                }
+                Item::GpuTimeQueries => {
+                    Profiler::draw_frame_graph(&self.gpu_frames, x, y, debug_renderer)
+                }
+                Item::GpuCacheBars => {
+                    self.draw_gpu_cache_bars(x, y, &mut text_buffer, debug_renderer)
+                }
+                Item::PaintPhaseGraph => {
+                    Profiler::draw_frame_graph(&self.frame_stats, x, y, debug_renderer)
+                }
+                Item::Text(text) => {
+                    let p = 10.0;
+                    let mut rect = debug_renderer.add_text(
+                        x + p,
+                        y + p,
+                        &text,
+                        ColorU::new(255, 255, 255, 255),
+                        None,
+                    );
+                    rect = rect.inflate(p, p);
+
+                    debug_renderer.add_quad(
+                        rect.origin.x,
+                        rect.origin.y,
+                        rect.max_x(),
+                        rect.max_y(),
+                        BACKGROUND_COLOR,
+                        BACKGROUND_COLOR,
+                    );
+
+                    rect
+                }
+                Item::Fps => {
+                    let fps = self.frame_timestamps_within_last_second.len();
+                    set_text!(&mut text_buffer, "{} fps", fps);
+                    let mut rect = debug_renderer.add_text(
+                        x + PROFILE_PADDING,
+                        y + PROFILE_PADDING + 5.0,
+                        &text_buffer,
+                        ColorU::new(255, 255, 255, 255),
+                        None,
+                    );
+                    rect = rect.inflate(PROFILE_PADDING, PROFILE_PADDING);
+
+                    debug_renderer.add_quad(
+                        rect.min_x(),
+                        rect.min_y(),
+                        rect.max_x(),
+                        rect.max_y(),
+                        BACKGROUND_COLOR,
+                        BACKGROUND_COLOR,
+                    );
+
+                    rect
+                }
+                Item::Space => {
+                    Rect { origin: Point2D::new(x, y), size: Size2D::new(0.0, PROFILE_SPACING) }
+                }
+                Item::Column => {
+                    max_y = max_y.max(y);
+                    x += column_width + PROFILE_SPACING;
+                    y = y_start;
+                    column_width = default_column_width;
+
+                    continue;
+                }
+                Item::Row => {
+                    max_y = max_y.max(y);
+                    y_start = max_y + PROFILE_SPACING;
+                    y = y_start;
+                    x = x_start;
+                    column_width = default_column_width;
+
+                    continue;
+                }
+            };
+
+            column_width = column_width.max(rect.size.width);
+            y = rect.max_y();
+
+            if y > device_size.height as f32 - 100.0 {
+                max_y = max_y.max(y);
+                x += column_width + PROFILE_SPACING;
+                y = y_start;
+                column_width = default_column_width;
+            }
+        }
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn dump_stats(&self, sink: &mut dyn std::io::Write) -> std::io::Result<()> {
+        for counter in &self.counters {
+            if counter.value.is_finite() {
+                writeln!(sink, "{} {:?}{}", counter.name, counter.value, counter.unit)?;
+            }
+        }
+
+        Ok(())
+    }
+}
+
+/// Defines the interface for hooking up an external profiler to WR.
+pub trait ProfilerHooks : Send + Sync {
+    /// Register a thread with the profiler.
+    fn register_thread(&self, thread_name: &str);
+
+    /// Unregister a thread with the profiler.
+    fn unregister_thread(&self);
+
+    /// Called at the beginning of a profile scope.
+    fn begin_marker(&self, label: &str);
+
+    /// Called at the end of a profile scope.
+    fn end_marker(&self, label: &str);
+
+    /// Called to mark an event happening.
+    fn event_marker(&self, label: &str);
+
+    /// Called with a duration to indicate a text marker that just ended. Text
+    /// markers allow different types of entries to be recorded on the same row
+    /// in the timeline, by adding labels to the entry.
+    ///
+    /// This variant is also useful when the caller only wants to record events
+    /// longer than a certain threshold, and thus they don't know in advance
+    /// whether the event will qualify.
+    fn add_text_marker(&self, label: &str, text: &str, duration: Duration);
+
+    /// Returns true if the current thread is being profiled.
+    fn thread_is_being_profiled(&self) -> bool;
+}
+
+/// The current global profiler callbacks, if set by embedder.
+pub static mut PROFILER_HOOKS: Option<&'static dyn ProfilerHooks> = None;
+
+/// Set the profiler callbacks, or None to disable the profiler.
+/// This function must only ever be called before any WR instances
+/// have been created, or the hooks will not be set.
+pub fn set_profiler_hooks(hooks: Option<&'static dyn ProfilerHooks>) {
+    if !wr_has_been_initialized() {
+        unsafe {
+            PROFILER_HOOKS = hooks;
+        }
+    }
+}
+
+/// A simple RAII style struct to manage a profile scope.
+pub struct ProfileScope {
+    name: &'static str,
+}
+
+
+/// Register a thread with the Gecko Profiler.
+pub fn register_thread(thread_name: &str) {
+    unsafe {
+        if let Some(ref hooks) = PROFILER_HOOKS {
+            hooks.register_thread(thread_name);
+        }
+    }
+}
+
+
+/// Unregister a thread with the Gecko Profiler.
+pub fn unregister_thread() {
+    unsafe {
+        if let Some(ref hooks) = PROFILER_HOOKS {
+            hooks.unregister_thread();
+        }
+    }
+}
+
+/// Records a marker of the given duration that just ended.
+pub fn add_text_marker(label: &str, text: &str, duration: Duration) {
+    unsafe {
+        if let Some(ref hooks) = PROFILER_HOOKS {
+            hooks.add_text_marker(label, text, duration);
+        }
+    }
+}
+
+/// Records a marker of the given duration that just ended.
+pub fn add_event_marker(label: &str) {
+    unsafe {
+        if let Some(ref hooks) = PROFILER_HOOKS {
+            hooks.event_marker(label);
+        }
+    }
+}
+
+/// Returns true if the current thread is being profiled.
+pub fn thread_is_being_profiled() -> bool {
+    unsafe {
+        PROFILER_HOOKS.map_or(false, |h| h.thread_is_being_profiled())
+    }
+}
+
+impl ProfileScope {
+    /// Begin a new profile scope
+    pub fn new(name: &'static str) -> Self {
+        unsafe {
+            if let Some(ref hooks) = PROFILER_HOOKS {
+                hooks.begin_marker(name);
+            }
+        }
+
+        ProfileScope {
+            name,
+        }
+    }
+}
+
+impl Drop for ProfileScope {
+    fn drop(&mut self) {
+        unsafe {
+            if let Some(ref hooks) = PROFILER_HOOKS {
+                hooks.end_marker(self.name);
+            }
+        }
+    }
+}
+
+/// A helper macro to define profile scopes.
+macro_rules! profile_marker {
+    ($string:expr) => {
+        let _scope = $crate::profiler::ProfileScope::new($string);
+    };
+}
+
+#[derive(Debug, Clone)]
+pub struct GpuProfileTag {
+    pub label: &'static str,
+    pub color: ColorF,
+}
+
+/// Ranges of expected value for a profile counter.
+#[derive(Clone, Debug)]
+pub struct Expected<T> {
+    pub range: Option<Range<T>>,
+    pub avg: Option<Range<T>>,
+}
+
+impl<T> Expected<T> {
+     const fn none() -> Self {
+        Expected {
+            range: None,
+            avg: None,
+        }
+    }
+}
+
+const fn expected<T>(range: Range<T>) -> Expected<T> {
+    Expected {
+        range: Some(range),
+        avg: None,
+    }
+}
+
+impl Expected<f64> {
+    const fn avg(mut self, avg: Range<f64>) -> Self {
+        self.avg = Some(avg);
+        self
+    }
+}
+
+impl Expected<i64> {
+    const fn avg(mut self, avg: Range<i64>) -> Self {
+        self.avg = Some(avg);
+        self
+    }
+
+    fn into_float(self) -> Expected<f64> {
+        Expected {
+            range: match self.range {
+                Some(r) => Some(r.start as f64 .. r.end as f64),
+                None => None,
+            },
+            avg: match self.avg {
+                Some(r) => Some(r.start as f64 .. r.end as f64),
+                None => None,
+            },
+        }
+    }
+}
+
+pub struct CounterDescriptor {
+    pub name: &'static str,
+    pub unit: &'static str,
+    pub index: usize,
+    pub show_as: ShowAs,
+    pub expected: Expected<f64>,
+}
+
+#[derive(Debug)]
+pub struct Counter {
+    pub name: &'static str,
+    pub unit: &'static str,
+    pub show_as: ShowAs,
+    pub expected: Expected<f64>,
+
+    ///
+    value: f64,
+    /// Number of samples in the current time slice.
+    num_samples: u64,
+    /// Sum of the values recorded during the current time slice.
+    sum: f64,
+    /// The max value in in-progress time slice.
+    next_max: f64,
+    /// The max value of the previous time slice (displayed).
+    max: f64,
+    /// The average value of the previous time slice (displayed).
+    avg: f64,
+    /// Incremented when the counter changes.
+    change_indicator: u8,
+
+    graph: Option<Graph>,
+}
+
+impl Counter {
+    pub fn new(descriptor: &CounterDescriptor) -> Self {
+        Counter {
+            name: descriptor.name,
+            unit: descriptor.unit,
+            show_as: descriptor.show_as,
+            expected: descriptor.expected.clone(),
+            value: std::f64::NAN,
+            num_samples: 0,
+            sum: 0.0,
+            next_max: 0.0,
+            max: 0.0,
+            avg: 0.0,
+            change_indicator: 0,
+            graph: None,
+        }
+    }
+    pub fn set_f64(&mut self, val: f64) {
+        self.value = val;
+    }
+
+    pub fn set<T>(&mut self, val: T) where T: Into<f64> {
+        self.set_f64(val.into());
+    }
+
+    pub fn get(&self) -> Option<f64> {
+        if self.value.is_finite() {
+            Some(self.value)
+        } else {
+            None
+        }
+    }
+
+    pub fn write_value(&self, output: &mut String) {
+        match self.show_as {
+            ShowAs::Float => {
+                set_text!(output, "{:.2} {} (max: {:.2})", self.avg, self.unit, self.max);
+            }
+            ShowAs::Int => {
+                set_text!(output, "{:.0} {} (max: {:.0})", self.avg.round(), self.unit, self.max.round());
+            }
+        }
+    }
+
+    pub fn enable_graph(&mut self, max_samples: usize) {
+        if self.graph.is_some() {
+            return;
+        }
+
+        self.graph = Some(Graph::new(max_samples));
+    }
+
+    pub fn disable_graph(&mut self) {
+        self.graph = None;
+    }
+
+    pub fn is_unexpected_value(&self, value: f64) -> bool {
+        if let Some(range) = &self.expected.range {
+            return value.is_finite() && value >= range.end;
+        }
+
+        false
+    }
+
+    pub fn has_unexpected_value(&self) -> bool {
+        self.is_unexpected_value(self.value)
+    }
+
+    pub fn has_unexpected_avg_max(&self) -> bool {
+        if let Some(range) = &self.expected.range {
+            if self.max.is_finite() && self.max >= range.end {
+                return true;
+            }
+        }
+
+        if let Some(range) = &self.expected.avg {
+            if self.avg < range.start || self.avg >= range.end {
+                return true;
+            }
+        }
+
+        false
+    }
+
+    fn update(&mut self, update_avg: bool) {
+        let updated = self.value.is_finite();
+        if updated {
+            self.next_max = self.next_max.max(self.value);
+            self.sum += self.value;
+            self.num_samples += 1;
+            self.change_indicator = (self.change_indicator + 1) % 15;
+        }
+
+        if let Some(graph) = &mut self.graph {
+            graph.set(self.value);
+        }
+
+        self.value = std::f64::NAN;
+
+        if update_avg {
+            if self.num_samples > 0 {
+                self.avg = self.sum / self.num_samples as f64;
+                self.max = self.next_max;
+            } else {
+                // There has been no sample in the averaging window, just show zero.
+                self.avg = 0.0;
+                self.max = 0.0;
+            }
+            self.sum = 0.0;
+            self.num_samples = 0;
+            self.next_max = std::f64::MIN;
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+pub enum Event {
+    Start(u64),
+    Value(f64),
+    None,
+}
+
+// std::convert::From/TryFrom can't deal with integer to f64 so we roll our own...
+pub trait EventValue {
+    fn into_f64(self) -> f64;
+}
+
+impl EventValue for f64 { fn into_f64(self) -> f64 { self } }
+impl EventValue for f32 { fn into_f64(self) -> f64 { self as f64 } }
+impl EventValue for u32 { fn into_f64(self) -> f64 { self as f64 } }
+impl EventValue for i32 { fn into_f64(self) -> f64 { self as f64 } }
+impl EventValue for u64 { fn into_f64(self) -> f64 { self as f64 } }
+impl EventValue for usize { fn into_f64(self) -> f64 { self as f64 } }
+
+/// A container for profiling information that moves along the rendering pipeline
+/// and is handed off to the profiler at the end.
+pub struct TransactionProfile {
+    pub events: Vec<Event>,
+}
+
+impl TransactionProfile {
+    pub fn new() -> Self {
+        TransactionProfile {
+            events: vec![Event::None; NUM_PROFILER_EVENTS],
+        }
+    }
+
+    pub fn start_time(&mut self, id: usize) {
+        let ns = precise_time_ns();
+        self.events[id] = Event::Start(ns);
+    }
+
+    pub fn end_time(&mut self, id: usize) -> f64 {
+        self.end_time_if_started(id).unwrap()
+    }
+
+    /// Similar to end_time, but doesn't panic if not matched with start_time.
+    pub fn end_time_if_started(&mut self, id: usize) -> Option<f64> {
+        if let Event::Start(start) = self.events[id] {
+            let now = precise_time_ns();
+            let time_ns = now - start;
+
+            let time_ms = ns_to_ms(time_ns);
+            self.events[id] = Event::Value(time_ms);
+
+            Some(time_ms)
+        } else {
+            None
+        }
+    }
+
+    pub fn set<T>(&mut self, id: usize, value: T) where T: EventValue {
+        self.set_f64(id, value.into_f64());
+    }
+
+
+    pub fn set_f64(&mut self, id: usize, value: f64) {
+        self.events[id] = Event::Value(value);
+    }
+
+    pub fn get(&self, id: usize) -> Option<f64> {
+        if let Event::Value(val) = self.events[id] {
+            Some(val)
+        } else {
+            None
+        }
+    }
+
+    pub fn get_or(&self, id: usize, or: f64) -> f64 {
+        self.get(id).unwrap_or(or)
+    }
+
+    pub fn add<T>(&mut self, id: usize, n: T) where T: EventValue {
+        let n = n.into_f64();
+
+        let evt = &mut self.events[id];
+
+        let val = match *evt {
+            Event::Value(v) => v + n,
+            Event::None => n,
+            Event::Start(..) => { panic!(); }
+        };
+
+        *evt = Event::Value(val);
+    }
+
+    pub fn inc(&mut self, id: usize) {
+        self.add(id, 1.0);
+    }
+
+    pub fn take(&mut self) -> Self {
+        TransactionProfile {
+            events: std::mem::take(&mut self.events),
+        }
+    }
+
+    pub fn take_and_reset(&mut self) -> Self {
+        let events = std::mem::take(&mut self.events);
+
+        *self = TransactionProfile::new();
+
+        TransactionProfile { events }
+    }
+
+    pub fn merge(&mut self, other: &mut Self) {
+        for i in 0..self.events.len() {
+            match (self.events[i], other.events[i]) {
+                (Event::Value(v1), Event::Value(v2)) => {
+                    self.events[i] = Event::Value(v1.max(v2));
+                }
+                (Event::Value(_), _) => {}
+                (_, Event::Value(v2)) => {
+                    self.events[i] = Event::Value(v2);
+                }
+                (Event::None, evt) => {
+                    self.events[i] = evt;
+                }
+                (Event::Start(s1), Event::Start(s2)) => {
+                    self.events[i] = Event::Start(s1.max(s2));
+                }
+                _=> {}
+            }
+            other.events[i] = Event::None;
+        }
+    }
+
+    pub fn clear(&mut self) {
+        for evt in &mut self.events {
+            *evt = Event::None;
+        }
+    }
+}
+
+impl GlyphRasterizeProfiler for TransactionProfile {
+    fn start_time(&mut self) {
+        let id = GLYPH_RESOLVE_TIME;
+        let ns = precise_time_ns();
+        self.events[id] = Event::Start(ns);
+    }
+
+    fn end_time(&mut self) -> f64 {
+        let id = GLYPH_RESOLVE_TIME;
+        self.end_time_if_started(id).unwrap()
+    }
+
+    fn set(&mut self, value: f64) {
+        let id = RASTERIZED_GLYPHS;
+        self.set_f64(id, value);
+    }
+}
+
+#[derive(Debug)]
+pub struct GraphStats {
+    pub min: f64,
+    pub avg: f64,
+    pub max: f64,
+    pub sum: f64,
+    pub samples: usize,
+}
+
+#[derive(Debug)]
+pub struct Graph {
+    values: VecDeque<f64>,
+}
+
+impl Graph {
+    fn new(max_samples: usize) -> Self {
+        let mut values = VecDeque::new();
+        values.reserve(max_samples);
+
+        Graph { values }
+    }
+
+    fn set(&mut self, val: f64) {
+        if self.values.len() == self.values.capacity() {
+            self.values.pop_back();
+        }
+        self.values.push_front(val);
+    }
+
+    pub fn stats(&self) -> GraphStats {
+        let mut stats = GraphStats {
+            min: f64::MAX,
+            avg: 0.0,
+            max: -f64::MAX,
+            sum: 0.0,
+            samples: 0,
+        };
+
+        let mut samples = 0;
+        for value in &self.values {
+            if value.is_finite() {
+                stats.min = stats.min.min(*value);
+                stats.max = stats.max.max(*value);
+                stats.sum += *value;
+                samples += 1;
+            }
+        }
+
+        if samples > 0 {
+            stats.avg = stats.sum / samples as f64;
+            stats.samples = samples;
+        }
+
+        stats
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum ShowAs {
+    Float,
+    Int,
+}
+
+struct ProfilerFrame {
+    total_time: u64,
+    samples: Vec<GpuTimer>,
+}
+
+struct ProfilerFrameCollection {
+    frames: VecDeque<ProfilerFrame>,
+}
+
+impl ProfilerFrameCollection {
+    fn new() -> Self {
+        ProfilerFrameCollection {
+            frames: VecDeque::new(),
+        }
+    }
+
+    fn push(&mut self, frame: ProfilerFrame) {
+        if self.frames.len() == 20 {
+            self.frames.pop_back();
+        }
+        self.frames.push_front(frame);
+    }
+}
+
+impl From<FullFrameStats> for ProfilerFrame {
+  fn from(stats: FullFrameStats) -> ProfilerFrame {
+    let new_sample = |time, label, color| -> GpuTimer {
+      let tag = GpuProfileTag {
+        label,
+        color
+      };
+
+      let time_ns = ms_to_ns(time);
+
+      GpuTimer {
+        tag, time_ns
+      }
+    };
+
+    let samples = vec![
+      new_sample(stats.gecko_display_list_time, "Gecko DL", ColorF { r: 0.0, g: 1.0, b: 0.0, a: 1.0 }),
+      new_sample(stats.wr_display_list_time, "WR DL", ColorF { r: 0.0, g: 1.0, b: 1.0, a: 1.0 }),
+      new_sample(stats.scene_build_time, "Scene Build", ColorF { r: 1.0, g: 0.0, b: 1.0, a: 1.0 }),
+      new_sample(stats.frame_build_time, "Frame Build", ColorF { r: 1.0, g: 0.0, b: 0.0, a: 1.0 }),
+    ];
+
+    ProfilerFrame {
+      total_time: ms_to_ns(stats.total()),
+      samples
+    }
+  }
+}
+
+pub fn ns_to_ms(ns: u64) -> f64 {
+    ns as f64 / 1_000_000.0
+}
+
+pub fn ms_to_ns(ms: f64) -> u64 {
+  (ms * 1_000_000.0) as u64
+}
+
+pub fn bytes_to_mb(bytes: usize) -> f64 {
+    bytes as f64 / 1_000_000.0
+}
+
+#[derive(Debug, PartialEq)]
+enum Item {
+    Counters(Vec<usize>),
+    Graph(usize),
+    ChangeIndicator(usize),
+    Fps,
+    GpuTimeQueries,
+    GpuCacheBars,
+    PaintPhaseGraph,
+    Text(String),
+    Space,
+    Column,
+    Row,
+}
+
diff --git a/gfx/wr/webrender/src/rectangle_occlusion.rs b/gfx/wr/webrender/src/rectangle_occlusion.rs
new file mode 100644
index 0000000000..a79e4ba026
--- /dev/null
+++ b/gfx/wr/webrender/src/rectangle_occlusion.rs
@@ -0,0 +1,208 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! A simple occlusion culling algorithm for axis-aligned rectangles.
+//!
+//! ## Output
+//!
+//! Occlusion culling results in two lists of rectangles:
+//! 
+//! - The opaque list should be rendered first. None of its rectangles overlap so order doesn't matter
+//!   within the opaque pass.
+//! - The non-opaque list (or alpha list) which should be rendered in back-to-front order after the opaque pass.
+//!
+//! The output has minimal overdraw (no overdraw at all for opaque items and as little as possible for alpha ones).
+//!
+//! ## Algorithm overview
+//!
+//! The occlusion culling algorithm works in front-to-back order, accumulating rectangle in opaque and non-opaque lists.
+//! Each time a rectangle is added, it is first tested against existing opaque rectangles and potentially split into visible
+//! sub-rectangles, or even discarded completely. The front-to-back order ensures that once a rectangle is added it does not
+//! have to be modified again, making the underlying data structure trivial (append-only).
+//!
+//! ## splitting
+//!
+//! Partially visible rectangles are split into up to 4 visible sub-rectangles by each intersecting occluder.
+//!
+//! ```ascii
+//!  +----------------------+       +----------------------+
+//!  | rectangle            |       |                      |
+//!  |                      |       |                      |
+//!  |  +-----------+       |       +--+-----------+-------+
+//!  |  |occluder   |       |  -->  |  |\\\\\\\\\\\|       |
+//!  |  +-----------+       |       +--+-----------+-------+
+//!  |                      |       |                      |
+//!  +----------------------+       +----------------------+
+//! ```
+//!
+//! In the example above the rectangle is split into 4 visible parts with the central occluded part left out.
+//!
+//! This implementation favors longer horizontal bands instead creating nine-patches to deal with the corners.
+//! The advantage is that it produces less rectangles which is good for the performance of the algorithm and
+//! for SWGL which likes long horizontal spans, however it would cause artifacts if the resulting rectangles
+//! were to be drawn with a non-axis-aligned transformation.
+//!
+//! ## Performance
+//!
+//! The cost of the algorithm grows with the number of opaque rectangle as each new rectangle is tested against
+//! all previously added opaque rectangles.
+//!
+//! Note that opaque rectangles can either be added as opaque or non-opaque. This means a trade-off between
+//! overdraw and number of rectangles can be explored to adjust performance: Small opaque rectangles, especially
+//! towards the front of the scene, could be added as non-opaque to avoid causing many splits while adding only 
+//! a small amount of overdraw.
+//!
+//! This implementation is intended to be used with a small number of (opaque) items. A similar implementation
+//! could use a spatial acceleration structure for opaque rectangles to perform better with a large amount of
+//! occluders.
+//!
+
+use euclid::point2;
+use smallvec::SmallVec;
+use api::units::*;
+
+/// A visible part of a rectangle after occlusion culling.
+#[derive(Debug, PartialEq)]
+pub struct Item {
+    pub rectangle: DeviceBox2D,
+    pub key: usize,
+}
+
+/// A builder that applies occlusion culling with rectangles provided in front-to-back order.
+pub struct FrontToBackBuilder {
+    opaque_items: Vec<Item>,
+    alpha_items: Vec<Item>,
+}
+
+impl FrontToBackBuilder {
+
+    /// Pre-allocating constructor.
+    pub fn with_capacity(opaque: usize, alpha: usize) -> Self {
+        FrontToBackBuilder {
+            opaque_items: Vec::with_capacity(opaque),
+            alpha_items: Vec::with_capacity(alpha),
+        }
+    }
+
+    /// Add a rectangle, potentially splitting it and discarding the occluded parts if any.
+    ///
+    /// Returns true the rectangle is at least partially visible.
+    pub fn add(&mut self, rect: &DeviceBox2D, is_opaque: bool, key: usize) -> bool {
+        let mut fragments: SmallVec<[DeviceBox2D; 16]> = SmallVec::new();
+        fragments.push(*rect);
+
+        for item in &self.opaque_items {
+            if fragments.is_empty() {
+                break;
+            }
+            if item.rectangle.intersects(rect) {
+                apply_occluder(&item.rectangle, &mut fragments);
+            }
+        }
+
+        let list = if is_opaque {
+            &mut self.opaque_items
+        } else {
+            &mut self.alpha_items
+        };
+
+        for rect in &fragments {
+            list.push(Item {
+                rectangle: *rect,
+                key,
+            });
+        }
+
+        !fragments.is_empty()
+    }
+
+    /// Returns true if the provided rect is at least partially visible, without adding it.
+    pub fn test(&self, rect: &DeviceBox2D) -> bool {
+        let mut fragments: SmallVec<[DeviceBox2D; 16]> = SmallVec::new();
+        fragments.push(*rect);
+
+        for item in &self.opaque_items {
+            if item.rectangle.intersects(rect) {
+                apply_occluder(&item.rectangle, &mut fragments);
+            }
+        }
+
+        !fragments.is_empty()
+    }
+
+    /// The visible opaque rectangles (front-to-back order).
+    pub fn opaque_items(&self) -> &[Item] {
+        &self.opaque_items
+    }
+
+    /// The visible non-opaque rectangles (front-to-back order).
+    pub fn alpha_items(&self) -> &[Item] {
+        &self.alpha_items
+    }
+}
+
+
+// Split out the parts of the rects in the provided vector
+fn apply_occluder(occluder: &DeviceBox2D, rects: &mut SmallVec<[DeviceBox2D; 16]>) {
+    // Iterate in reverse order so that we can push new rects at the back without
+    // visiting them;
+    let mut i = rects.len() - 1;
+    loop {
+        let r = rects[i];
+
+        if r.intersects(occluder) {
+            let top = r.min.y < occluder.min.y;
+            let bottom = r.max.y > occluder.max.y;
+            let left = r.min.x < occluder.min.x;
+            let right = r.max.x > occluder.max.x;
+
+            if top {
+                rects.push(DeviceBox2D {
+                    min: r.min,
+                    max: point2(r.max.x, occluder.min.y),
+                });
+            }
+
+            if bottom {
+                rects.push(DeviceBox2D {
+                    min: point2(r.min.x, occluder.max.y),
+                    max: r.max,
+                });
+            }
+
+            if left {
+                let min_y = r.min.y.max(occluder.min.y);
+                let max_y = r.max.y.min(occluder.max.y);
+                rects.push(DeviceBox2D {
+                    min: point2(r.min.x, min_y),
+                    max: point2(occluder.min.x, max_y),
+                });
+            }
+
+            if right {
+                let min_y = r.min.y.max(occluder.min.y);
+                let max_y = r.max.y.min(occluder.max.y);
+                rects.push(DeviceBox2D {
+                    min: point2(occluder.max.x, min_y),
+                    max: point2(r.max.x, max_y),
+                });
+            }
+
+            // Remove the original rectangle, replacing it with
+            // one of the new ones we just added, or popping it
+            // if it is the last item.
+            if i == rects.len() {
+                rects.pop();
+            } else {
+                rects.swap_remove(i);
+            }
+        }
+
+        if i == 0 {
+            break;
+        }
+
+        i -= 1;
+    }
+}
diff --git a/gfx/wr/webrender/src/render_api.rs b/gfx/wr/webrender/src/render_api.rs
new file mode 100644
index 0000000000..3c30d01085
--- /dev/null
+++ b/gfx/wr/webrender/src/render_api.rs
@@ -0,0 +1,1426 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#![deny(missing_docs)]
+
+use std::cell::Cell;
+use std::fmt;
+use std::marker::PhantomData;
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::u32;
+use time::precise_time_ns;
+//use crate::api::peek_poke::PeekPoke;
+use crate::api::channel::{Sender, single_msg_channel, unbounded_channel};
+use crate::api::{ColorF, BuiltDisplayList, IdNamespace, ExternalScrollId, Parameter, BoolParameter};
+use crate::api::{FontKey, FontInstanceKey, NativeFontHandle};
+use crate::api::{BlobImageData, BlobImageKey, ImageData, ImageDescriptor, ImageKey, Epoch, QualitySettings};
+use crate::api::{BlobImageParams, BlobImageRequest, BlobImageResult, AsyncBlobImageRasterizer, BlobImageHandler};
+use crate::api::{DocumentId, PipelineId, PropertyBindingId, PropertyBindingKey, ExternalEvent};
+use crate::api::{HitTestResult, HitTesterRequest, ApiHitTester, PropertyValue, DynamicProperties};
+use crate::api::{SampledScrollOffset, TileSize, NotificationRequest, DebugFlags};
+use crate::api::{GlyphDimensionRequest, GlyphIndexRequest, GlyphIndex, GlyphDimensions};
+use crate::api::{FontInstanceOptions, FontInstancePlatformOptions, FontVariation, RenderReasons};
+use crate::api::DEFAULT_TILE_SIZE;
+use crate::api::units::*;
+use crate::api_resources::ApiResources;
+use glyph_rasterizer::SharedFontResources;
+use crate::scene_builder_thread::{SceneBuilderRequest, SceneBuilderResult};
+use crate::intern::InterningMemoryReport;
+use crate::profiler::{self, TransactionProfile};
+
+#[repr(C)]
+#[derive(Clone, Copy, Debug)]
+#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
+struct ResourceId(pub u32);
+
+/// Update of a persistent resource in WebRender.
+///
+/// ResourceUpdate changes keep theirs effect across display list changes.
+#[derive(Clone)]
+#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
+pub enum ResourceUpdate {
+    /// See `AddImage`.
+    AddImage(AddImage),
+    /// See `UpdateImage`.
+    UpdateImage(UpdateImage),
+    /// Delete an existing image resource.
+    ///
+    /// It is invalid to continue referring to the image key in any display list
+    /// in the transaction that contains the `DeleteImage` message and subsequent
+    /// transactions.
+    DeleteImage(ImageKey),
+    /// See `AddBlobImage`.
+    AddBlobImage(AddBlobImage),
+    /// See `UpdateBlobImage`.
+    UpdateBlobImage(UpdateBlobImage),
+    /// Delete existing blob image resource.
+    DeleteBlobImage(BlobImageKey),
+    /// See `AddBlobImage::visible_area`.
+    SetBlobImageVisibleArea(BlobImageKey, DeviceIntRect),
+    /// See `AddFont`.
+    AddFont(AddFont),
+    /// Deletes an already existing font resource.
+    ///
+    /// It is invalid to continue referring to the font key in any display list
+    /// in the transaction that contains the `DeleteImage` message and subsequent
+    /// transactions.
+    DeleteFont(FontKey),
+    /// See `AddFontInstance`.
+    AddFontInstance(AddFontInstance),
+    /// Deletes an already existing font instance resource.
+    ///
+    /// It is invalid to continue referring to the font instance in any display
+    /// list in the transaction that contains the `DeleteImage` message and
+    /// subsequent transactions.
+    DeleteFontInstance(FontInstanceKey),
+}
+
+impl fmt::Debug for ResourceUpdate {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            ResourceUpdate::AddImage(ref i) => f.write_fmt(format_args!(
+                "ResourceUpdate::AddImage size({:?})",
+                &i.descriptor.size
+            )),
+            ResourceUpdate::UpdateImage(ref i) => f.write_fmt(format_args!(
+                "ResourceUpdate::UpdateImage size({:?})",
+                &i.descriptor.size
+            )),
+            ResourceUpdate::AddBlobImage(ref i) => f.write_fmt(format_args!(
+                "ResourceUFpdate::AddBlobImage size({:?})",
+                &i.descriptor.size
+            )),
+            ResourceUpdate::UpdateBlobImage(i) => f.write_fmt(format_args!(
+                "ResourceUpdate::UpdateBlobImage size({:?})",
+                &i.descriptor.size
+            )),
+            ResourceUpdate::DeleteImage(..) => f.write_str("ResourceUpdate::DeleteImage"),
+            ResourceUpdate::DeleteBlobImage(..) => f.write_str("ResourceUpdate::DeleteBlobImage"),
+            ResourceUpdate::SetBlobImageVisibleArea(..) => f.write_str("ResourceUpdate::SetBlobImageVisibleArea"),
+            ResourceUpdate::AddFont(..) => f.write_str("ResourceUpdate::AddFont"),
+            ResourceUpdate::DeleteFont(..) => f.write_str("ResourceUpdate::DeleteFont"),
+            ResourceUpdate::AddFontInstance(..) => f.write_str("ResourceUpdate::AddFontInstance"),
+            ResourceUpdate::DeleteFontInstance(..) => f.write_str("ResourceUpdate::DeleteFontInstance"),
+        }
+    }
+}
+
+/// Whether to generate a frame, and if so, an id that allows tracking this
+/// transaction through the various frame stages.
+#[derive(Clone, Debug)]
+pub enum GenerateFrame {
+    /// Generate a frame if something changed.
+    Yes {
+        /// An id that allows tracking the frame transaction through the various
+        /// frame stages. Specified by the caller of generate_frame().
+        id: u64,
+    },
+    /// Don't generate a frame even if something has changed.
+    No,
+}
+
+impl GenerateFrame {
+    ///
+    pub fn as_bool(&self) -> bool {
+        match self {
+            GenerateFrame::Yes { .. } => true,
+            GenerateFrame::No => false,
+        }
+    }
+
+    /// Return the frame ID, if a frame is generated.
+    pub fn id(&self) -> Option<u64> {
+        match self {
+            GenerateFrame::Yes { id } => Some(*id),
+            GenerateFrame::No => None,
+        }
+    }
+}
+
+/// A Transaction is a group of commands to apply atomically to a document.
+///
+/// This mechanism ensures that:
+///  - no other message can be interleaved between two commands that need to be applied together.
+///  - no redundant work is performed if two commands in the same transaction cause the scene or
+///    the frame to be rebuilt.
+pub struct Transaction {
+    /// Operations affecting the scene (applied before scene building).
+    scene_ops: Vec<SceneMsg>,
+    /// Operations affecting the generation of frames (applied after scene building).
+    frame_ops: Vec<FrameMsg>,
+
+    notifications: Vec<NotificationRequest>,
+
+    /// Persistent resource updates to apply as part of this transaction.
+    pub resource_updates: Vec<ResourceUpdate>,
+
+    /// True if the transaction needs the scene building thread's attention.
+    /// False for things that can skip the scene builder, like APZ changes and
+    /// async images.
+    ///
+    /// Before this `Transaction` is converted to a `TransactionMsg`, we look
+    /// over its contents and set this if we're doing anything the scene builder
+    /// needs to know about, so this is only a default.
+    use_scene_builder_thread: bool,
+
+    /// Whether to generate a frame, and if so, an id that allows tracking this
+    /// transaction through the various frame stages. Specified by the caller of
+    /// generate_frame().
+    generate_frame: GenerateFrame,
+
+    /// Time when this transaction was constructed.
+    creation_time: u64,
+
+    /// Set to true in order to force re-rendering even if WebRender can't internally
+    /// detect that something has changed.
+    pub invalidate_rendered_frame: bool,
+
+    low_priority: bool,
+
+    ///
+    pub render_reasons: RenderReasons,
+}
+
+impl Transaction {
+    /// Constructor.
+    pub fn new() -> Self {
+        Transaction {
+            scene_ops: Vec::new(),
+            frame_ops: Vec::new(),
+            resource_updates: Vec::new(),
+            notifications: Vec::new(),
+            use_scene_builder_thread: true,
+            generate_frame: GenerateFrame::No,
+            creation_time: precise_time_ns(),
+            invalidate_rendered_frame: false,
+            low_priority: false,
+            render_reasons: RenderReasons::empty(),
+        }
+    }
+
+    /// Marks this transaction to allow it to skip going through the scene builder
+    /// thread.
+    ///
+    /// This is useful to avoid jank in transaction associated with animated
+    /// property updates, panning and zooming.
+    ///
+    /// Note that transactions that skip the scene builder thread can race ahead of
+    /// transactions that don't skip it.
+    pub fn skip_scene_builder(&mut self) {
+        self.use_scene_builder_thread = false;
+    }
+
+    /// Marks this transaction to enforce going through the scene builder thread.
+    pub fn use_scene_builder_thread(&mut self) {
+        self.use_scene_builder_thread = true;
+    }
+
+    /// Returns true if the transaction has no effect.
+    pub fn is_empty(&self) -> bool {
+        !self.generate_frame.as_bool() &&
+            !self.invalidate_rendered_frame &&
+            self.scene_ops.is_empty() &&
+            self.frame_ops.is_empty() &&
+            self.resource_updates.is_empty() &&
+            self.notifications.is_empty()
+    }
+
+    /// Update a pipeline's epoch.
+    pub fn update_epoch(&mut self, pipeline_id: PipelineId, epoch: Epoch) {
+        // We track epochs before and after scene building.
+        // This one will be applied to the pending scene right away:
+        self.scene_ops.push(SceneMsg::UpdateEpoch(pipeline_id, epoch));
+        // And this one will be applied to the currently built scene at the end
+        // of the transaction (potentially long after the scene_ops one).
+        self.frame_ops.push(FrameMsg::UpdateEpoch(pipeline_id, epoch));
+        // We could avoid the duplication here by storing the epoch updates in a
+        // separate array and let the render backend schedule the updates at the
+        // proper times, but it wouldn't make things simpler.
+    }
+
+    /// Sets the root pipeline.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use webrender::api::{PipelineId};
+    /// # use webrender::api::units::{DeviceIntSize};
+    /// # use webrender::render_api::{RenderApiSender, Transaction};
+    /// # fn example() {
+    /// let pipeline_id = PipelineId(0, 0);
+    /// let mut txn = Transaction::new();
+    /// txn.set_root_pipeline(pipeline_id);
+    /// # }
+    /// ```
+    pub fn set_root_pipeline(&mut self, pipeline_id: PipelineId) {
+        self.scene_ops.push(SceneMsg::SetRootPipeline(pipeline_id));
+    }
+
+    /// Removes data associated with a pipeline from the internal data structures.
+    /// If the specified `pipeline_id` is for the root pipeline, the root pipeline
+    /// is reset back to `None`.
+    pub fn remove_pipeline(&mut self, pipeline_id: PipelineId) {
+        self.scene_ops.push(SceneMsg::RemovePipeline(pipeline_id));
+    }
+
+    /// Supplies a new frame to WebRender.
+    ///
+    /// Non-blocking, it notifies a worker process which processes the display list.
+    ///
+    /// Note: Scrolling doesn't require an own Frame.
+    ///
+    /// Arguments:
+    ///
+    /// * `epoch`: The unique Frame ID, monotonically increasing.
+    /// * `background`: The background color of this pipeline.
+    /// * `viewport_size`: The size of the viewport for this frame.
+    /// * `pipeline_id`: The ID of the pipeline that is supplying this display list.
+    /// * `display_list`: The root Display list used in this frame.
+    pub fn set_display_list(
+        &mut self,
+        epoch: Epoch,
+        background: Option<ColorF>,
+        viewport_size: LayoutSize,
+        (pipeline_id, mut display_list): (PipelineId, BuiltDisplayList),
+    ) {
+        display_list.set_send_time_ns(precise_time_ns());
+        self.scene_ops.push(
+            SceneMsg::SetDisplayList {
+                display_list,
+                epoch,
+                pipeline_id,
+                background,
+                viewport_size,
+            }
+        );
+    }
+
+    /// Add a set of persistent resource updates to apply as part of this transaction.
+    pub fn update_resources(&mut self, mut resources: Vec<ResourceUpdate>) {
+        self.resource_updates.append(&mut resources);
+    }
+
+    // Note: Gecko uses this to get notified when a transaction that contains
+    // potentially long blob rasterization or scene build is ready to be rendered.
+    // so that the tab-switching integration can react adequately when tab
+    // switching takes too long. For this use case when matters is that the
+    // notification doesn't fire before scene building and blob rasterization.
+
+    /// Trigger a notification at a certain stage of the rendering pipeline.
+    ///
+    /// Not that notification requests are skipped during serialization, so is is
+    /// best to use them for synchronization purposes and not for things that could
+    /// affect the WebRender's state.
+    pub fn notify(&mut self, event: NotificationRequest) {
+        self.notifications.push(event);
+    }
+
+    /// Setup the output region in the framebuffer for a given document.
+    pub fn set_document_view(
+        &mut self,
+        device_rect: DeviceIntRect,
+    ) {
+        window_size_sanity_check(device_rect.size());
+        self.scene_ops.push(
+            SceneMsg::SetDocumentView {
+                device_rect,
+            },
+        );
+    }
+
+    /// Set multiple scroll offsets with generations to the node identified by
+    /// the given external scroll id, the scroll offsets are relative to the
+    /// pre-scrolled offset for the scrolling layer.
+    pub fn set_scroll_offsets(
+        &mut self,
+        id: ExternalScrollId,
+        sampled_scroll_offsets: Vec<SampledScrollOffset>,
+    ) {
+        self.frame_ops.push(FrameMsg::SetScrollOffsets(id, sampled_scroll_offsets));
+    }
+
+    /// Set the current quality / performance settings for this document.
+    pub fn set_quality_settings(&mut self, settings: QualitySettings) {
+        self.scene_ops.push(SceneMsg::SetQualitySettings { settings });
+    }
+
+    ///
+    pub fn set_is_transform_async_zooming(&mut self, is_zooming: bool, animation_id: PropertyBindingId) {
+        self.frame_ops.push(FrameMsg::SetIsTransformAsyncZooming(is_zooming, animation_id));
+    }
+
+    /// Generate a new frame. When it's done and a RenderNotifier has been set
+    /// in `webrender::Renderer`, [new_frame_ready()][notifier] gets called.
+    /// Note that the notifier is called even if the frame generation was a
+    /// no-op; the arguments passed to `new_frame_ready` will provide information
+    /// as to when happened.
+    ///
+    /// [notifier]: trait.RenderNotifier.html#tymethod.new_frame_ready
+    pub fn generate_frame(&mut self, id: u64, reasons: RenderReasons) {
+        self.generate_frame = GenerateFrame::Yes{ id };
+        self.render_reasons |= reasons;
+    }
+
+    /// Invalidate rendered frame. It ensure that frame will be rendered during
+    /// next frame generation. WebRender could skip frame rendering if there
+    /// is no update.
+    /// But there are cases that needs to force rendering.
+    ///  - Content of image is updated by reusing same ExternalImageId.
+    ///  - Platform requests it if pixels become stale (like wakeup from standby).
+    pub fn invalidate_rendered_frame(&mut self, reasons: RenderReasons) {
+        self.invalidate_rendered_frame = true;
+        self.render_reasons |= reasons
+    }
+
+    /// Reset the list of animated property bindings that should be used to resolve
+    /// bindings in the current display list.
+    pub fn reset_dynamic_properties(&mut self) {
+        self.frame_ops.push(FrameMsg::ResetDynamicProperties);
+    }
+
+    /// Add to the list of animated property bindings that should be used to resolve
+    /// bindings in the current display list.
+    pub fn append_dynamic_properties(&mut self, properties: DynamicProperties) {
+        self.frame_ops.push(FrameMsg::AppendDynamicProperties(properties));
+    }
+
+    /// Add to the list of animated property bindings that should be used to
+    /// resolve bindings in the current display list. This is a convenience method
+    /// so the caller doesn't have to figure out all the dynamic properties before
+    /// setting them on the transaction but can do them incrementally.
+    pub fn append_dynamic_transform_properties(&mut self, transforms: Vec<PropertyValue<LayoutTransform>>) {
+        self.frame_ops.push(FrameMsg::AppendDynamicTransformProperties(transforms));
+    }
+
+    /// Consumes this object and just returns the frame ops.
+    pub fn get_frame_ops(self) -> Vec<FrameMsg> {
+        self.frame_ops
+    }
+
+    fn finalize(self, document_id: DocumentId) -> Box<TransactionMsg> {
+        Box::new(TransactionMsg {
+            document_id,
+            scene_ops: self.scene_ops,
+            frame_ops: self.frame_ops,
+            resource_updates: self.resource_updates,
+            notifications: self.notifications,
+            use_scene_builder_thread: self.use_scene_builder_thread,
+            generate_frame: self.generate_frame,
+            creation_time: Some(self.creation_time),
+            invalidate_rendered_frame: self.invalidate_rendered_frame,
+            low_priority: self.low_priority,
+            blob_rasterizer: None,
+            blob_requests: Vec::new(),
+            rasterized_blobs: Vec::new(),
+            profile: TransactionProfile::new(),
+            render_reasons: self.render_reasons,
+        })
+    }
+
+    /// See `ResourceUpdate::AddImage`.
+    pub fn add_image(
+        &mut self,
+        key: ImageKey,
+        descriptor: ImageDescriptor,
+        data: ImageData,
+        tiling: Option<TileSize>,
+    ) {
+        self.resource_updates.push(ResourceUpdate::AddImage(AddImage {
+            key,
+            descriptor,
+            data,
+            tiling,
+        }));
+    }
+
+    /// See `ResourceUpdate::UpdateImage`.
+    pub fn update_image(
+        &mut self,
+        key: ImageKey,
+        descriptor: ImageDescriptor,
+        data: ImageData,
+        dirty_rect: &ImageDirtyRect,
+    ) {
+        self.resource_updates.push(ResourceUpdate::UpdateImage(UpdateImage {
+            key,
+            descriptor,
+            data,
+            dirty_rect: *dirty_rect,
+        }));
+    }
+
+    /// See `ResourceUpdate::DeleteImage`.
+    pub fn delete_image(&mut self, key: ImageKey) {
+        self.resource_updates.push(ResourceUpdate::DeleteImage(key));
+    }
+
+    /// See `ResourceUpdate::AddBlobImage`.
+    pub fn add_blob_image(
+        &mut self,
+        key: BlobImageKey,
+        descriptor: ImageDescriptor,
+        data: Arc<BlobImageData>,
+        visible_rect: DeviceIntRect,
+        tile_size: Option<TileSize>,
+    ) {
+        self.resource_updates.push(
+            ResourceUpdate::AddBlobImage(AddBlobImage {
+                key,
+                descriptor,
+                data,
+                visible_rect,
+                tile_size: tile_size.unwrap_or(DEFAULT_TILE_SIZE),
+            })
+        );
+    }
+
+    /// See `ResourceUpdate::UpdateBlobImage`.
+    pub fn update_blob_image(
+        &mut self,
+        key: BlobImageKey,
+        descriptor: ImageDescriptor,
+        data: Arc<BlobImageData>,
+        visible_rect: DeviceIntRect,
+        dirty_rect: &BlobDirtyRect,
+    ) {
+        self.resource_updates.push(
+            ResourceUpdate::UpdateBlobImage(UpdateBlobImage {
+                key,
+                descriptor,
+                data,
+                visible_rect,
+                dirty_rect: *dirty_rect,
+            })
+        );
+    }
+
+    /// See `ResourceUpdate::DeleteBlobImage`.
+    pub fn delete_blob_image(&mut self, key: BlobImageKey) {
+        self.resource_updates.push(ResourceUpdate::DeleteBlobImage(key));
+    }
+
+    /// See `ResourceUpdate::SetBlobImageVisibleArea`.
+    pub fn set_blob_image_visible_area(&mut self, key: BlobImageKey, area: DeviceIntRect) {
+        self.resource_updates.push(ResourceUpdate::SetBlobImageVisibleArea(key, area));
+    }
+
+    /// See `ResourceUpdate::AddFont`.
+    pub fn add_raw_font(&mut self, key: FontKey, bytes: Vec<u8>, index: u32) {
+        self.resource_updates
+            .push(ResourceUpdate::AddFont(AddFont::Raw(key, Arc::new(bytes), index)));
+    }
+
+    /// See `ResourceUpdate::AddFont`.
+    pub fn add_native_font(&mut self, key: FontKey, native_handle: NativeFontHandle) {
+        self.resource_updates
+            .push(ResourceUpdate::AddFont(AddFont::Native(key, native_handle)));
+    }
+
+    /// See `ResourceUpdate::DeleteFont`.
+    pub fn delete_font(&mut self, key: FontKey) {
+        self.resource_updates.push(ResourceUpdate::DeleteFont(key));
+    }
+
+    /// See `ResourceUpdate::AddFontInstance`.
+    pub fn add_font_instance(
+        &mut self,
+        key: FontInstanceKey,
+        font_key: FontKey,
+        glyph_size: f32,
+        options: Option<FontInstanceOptions>,
+        platform_options: Option<FontInstancePlatformOptions>,
+        variations: Vec<FontVariation>,
+    ) {
+        self.resource_updates
+            .push(ResourceUpdate::AddFontInstance(AddFontInstance {
+                key,
+                font_key,
+                glyph_size,
+                options,
+                platform_options,
+                variations,
+            }));
+    }
+
+    /// See `ResourceUpdate::DeleteFontInstance`.
+    pub fn delete_font_instance(&mut self, key: FontInstanceKey) {
+        self.resource_updates.push(ResourceUpdate::DeleteFontInstance(key));
+    }
+
+    /// A hint that this transaction can be processed at a lower priority. High-
+    /// priority transactions can jump ahead of regular-priority transactions,
+    /// but both high- and regular-priority transactions are processed in order
+    /// relative to other transactions of the same priority.
+    pub fn set_low_priority(&mut self, low_priority: bool) {
+        self.low_priority = low_priority;
+    }
+
+    /// Returns whether this transaction is marked as low priority.
+    pub fn is_low_priority(&self) -> bool {
+        self.low_priority
+    }
+}
+
+///
+pub struct DocumentTransaction {
+    ///
+    pub document_id: DocumentId,
+    ///
+    pub transaction: Transaction,
+}
+
+/// Represents a transaction in the format sent through the channel.
+pub struct TransactionMsg {
+    ///
+    pub document_id: DocumentId,
+    /// Changes that require re-building the scene.
+    pub scene_ops: Vec<SceneMsg>,
+    /// Changes to animated properties that do not require re-building the scene.
+    pub frame_ops: Vec<FrameMsg>,
+    /// Updates to resources that persist across display lists.
+    pub resource_updates: Vec<ResourceUpdate>,
+    /// Whether to trigger frame building and rendering if something has changed.
+    pub generate_frame: GenerateFrame,
+    /// Creation time of this transaction.
+    pub creation_time: Option<u64>,
+    /// Whether to force frame building and rendering even if no changes are internally
+    /// observed.
+    pub invalidate_rendered_frame: bool,
+    /// Whether to enforce that this transaction go through the scene builder.
+    pub use_scene_builder_thread: bool,
+    ///
+    pub low_priority: bool,
+
+    /// Handlers to notify at certain points of the pipeline.
+    pub notifications: Vec<NotificationRequest>,
+    ///
+    pub blob_rasterizer: Option<Box<dyn AsyncBlobImageRasterizer>>,
+    ///
+    pub blob_requests: Vec<BlobImageParams>,
+    ///
+    pub rasterized_blobs: Vec<(BlobImageRequest, BlobImageResult)>,
+    /// Collect various data along the rendering pipeline to display it in the embedded profiler.
+    pub profile: TransactionProfile,
+    /// Keep track of who asks rendering to happen.
+    pub render_reasons: RenderReasons,
+}
+
+impl fmt::Debug for TransactionMsg {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        writeln!(f, "threaded={}, genframe={:?}, invalidate={}, low_priority={}",
+                        self.use_scene_builder_thread,
+                        self.generate_frame,
+                        self.invalidate_rendered_frame,
+                        self.low_priority,
+                    ).unwrap();
+        for scene_op in &self.scene_ops {
+            writeln!(f, "\t\t{:?}", scene_op).unwrap();
+        }
+
+        for frame_op in &self.frame_ops {
+            writeln!(f, "\t\t{:?}", frame_op).unwrap();
+        }
+
+        for resource_update in &self.resource_updates {
+            writeln!(f, "\t\t{:?}", resource_update).unwrap();
+        }
+        Ok(())
+    }
+}
+
+impl TransactionMsg {
+    /// Returns true if this transaction has no effect.
+    pub fn is_empty(&self) -> bool {
+        !self.generate_frame.as_bool() &&
+            !self.invalidate_rendered_frame &&
+            self.scene_ops.is_empty() &&
+            self.frame_ops.is_empty() &&
+            self.resource_updates.is_empty() &&
+            self.notifications.is_empty()
+    }
+}
+
+/// Creates an image resource with provided parameters.
+///
+/// Must be matched with a `DeleteImage` at some point to prevent memory leaks.
+#[derive(Clone)]
+#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
+pub struct AddImage {
+    /// A key to identify the image resource.
+    pub key: ImageKey,
+    /// Properties of the image.
+    pub descriptor: ImageDescriptor,
+    /// The pixels of the image.
+    pub data: ImageData,
+    /// An optional tiling scheme to apply when storing the image's data
+    /// on the GPU. Applies to both width and heights of the tiles.
+    ///
+    /// Note that WebRender may internally chose to tile large images
+    /// even if this member is set to `None`.
+    pub tiling: Option<TileSize>,
+}
+
+/// Updates an already existing image resource.
+#[derive(Clone)]
+#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
+pub struct UpdateImage {
+    /// The key identfying the image resource to update.
+    pub key: ImageKey,
+    /// Properties of the image.
+    pub descriptor: ImageDescriptor,
+    /// The pixels of the image.
+    pub data: ImageData,
+    /// An optional dirty rect that lets WebRender optimize the amount of
+    /// data to transfer to the GPU.
+    ///
+    /// The data provided must still represent the entire image.
+    pub dirty_rect: ImageDirtyRect,
+}
+
+/// Creates a blob-image resource with provided parameters.
+///
+/// Must be matched with a `DeleteImage` at some point to prevent memory leaks.
+#[derive(Clone)]
+#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
+pub struct AddBlobImage {
+    /// A key to identify the blob-image resource.
+    pub key: BlobImageKey,
+    /// Properties of the image.
+    pub descriptor: ImageDescriptor,
+    /// The blob-image's serialized commands.
+    pub data: Arc<BlobImageData>,
+    /// The portion of the plane in the blob-image's internal coordinate
+    /// system that is stretched to fill the image display item.
+    ///
+    /// Unlike regular images, blob images are not limited in size. The
+    /// top-left corner of their internal coordinate system is also not
+    /// necessary at (0, 0).
+    /// This means that blob images can be updated to insert/remove content
+    /// in any direction to support panning and zooming.
+    pub visible_rect: DeviceIntRect,
+    /// The blob image's tile size to apply when rasterizing the blob-image
+    /// and when storing its rasterized data on the GPU.
+    /// Applies to both width and heights of the tiles.
+    ///
+    /// All blob images are tiled.
+    pub tile_size: TileSize,
+}
+
+/// Updates an already existing blob-image resource.
+#[derive(Clone)]
+#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
+pub struct UpdateBlobImage {
+    /// The key identfying the blob-image resource to update.
+    pub key: BlobImageKey,
+    /// Properties of the image.
+    pub descriptor: ImageDescriptor,
+    /// The blob-image's serialized commands.
+    pub data: Arc<BlobImageData>,
+    /// See `AddBlobImage::visible_rect`.
+    pub visible_rect: DeviceIntRect,
+    /// An optional dirty rect that lets WebRender optimize the amount of
+    /// data to to rasterize and transfer to the GPU.
+    pub dirty_rect: BlobDirtyRect,
+}
+
+/// Creates a font resource.
+///
+/// Must be matched with a corresponding `ResourceUpdate::DeleteFont` at some point to prevent
+/// memory leaks.
+#[derive(Clone)]
+#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
+pub enum AddFont {
+    ///
+    Raw(FontKey, Arc<Vec<u8>>, u32),
+    ///
+    Native(FontKey, NativeFontHandle),
+}
+
+/// Creates a font instance resource.
+///
+/// Must be matched with a corresponding `DeleteFontInstance` at some point
+/// to prevent memory leaks.
+#[derive(Clone)]
+#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
+pub struct AddFontInstance {
+    /// A key to identify the font instance.
+    pub key: FontInstanceKey,
+    /// The font resource's key.
+    pub font_key: FontKey,
+    /// Glyph size in app units.
+    pub glyph_size: f32,
+    ///
+    pub options: Option<FontInstanceOptions>,
+    ///
+    pub platform_options: Option<FontInstancePlatformOptions>,
+    ///
+    pub variations: Vec<FontVariation>,
+}
+
+/// Frame messages affect building the scene.
+pub enum SceneMsg {
+    ///
+    UpdateEpoch(PipelineId, Epoch),
+    ///
+    SetRootPipeline(PipelineId),
+    ///
+    RemovePipeline(PipelineId),
+    ///
+    SetDisplayList {
+        ///
+        display_list: BuiltDisplayList,
+        ///
+        epoch: Epoch,
+        ///
+        pipeline_id: PipelineId,
+        ///
+        background: Option<ColorF>,
+        ///
+        viewport_size: LayoutSize,
+    },
+    ///
+    SetDocumentView {
+        ///
+        device_rect: DeviceIntRect,
+    },
+    /// Set the current quality / performance configuration for this document.
+    SetQualitySettings {
+        /// The set of available quality / performance config values.
+        settings: QualitySettings,
+    },
+}
+
+/// Frame messages affect frame generation (applied after building the scene).
+pub enum FrameMsg {
+    ///
+    UpdateEpoch(PipelineId, Epoch),
+    ///
+    HitTest(WorldPoint, Sender<HitTestResult>),
+    ///
+    RequestHitTester(Sender<Arc<dyn ApiHitTester>>),
+    ///
+    SetScrollOffsets(ExternalScrollId, Vec<SampledScrollOffset>),
+    ///
+    ResetDynamicProperties,
+    ///
+    AppendDynamicProperties(DynamicProperties),
+    ///
+    AppendDynamicTransformProperties(Vec<PropertyValue<LayoutTransform>>),
+    ///
+    SetIsTransformAsyncZooming(bool, PropertyBindingId),
+}
+
+impl fmt::Debug for SceneMsg {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(match *self {
+            SceneMsg::UpdateEpoch(..) => "SceneMsg::UpdateEpoch",
+            SceneMsg::SetDisplayList { .. } => "SceneMsg::SetDisplayList",
+            SceneMsg::RemovePipeline(..) => "SceneMsg::RemovePipeline",
+            SceneMsg::SetDocumentView { .. } => "SceneMsg::SetDocumentView",
+            SceneMsg::SetRootPipeline(..) => "SceneMsg::SetRootPipeline",
+            SceneMsg::SetQualitySettings { .. } => "SceneMsg::SetQualitySettings",
+        })
+    }
+}
+
+impl fmt::Debug for FrameMsg {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(match *self {
+            FrameMsg::UpdateEpoch(..) => "FrameMsg::UpdateEpoch",
+            FrameMsg::HitTest(..) => "FrameMsg::HitTest",
+            FrameMsg::RequestHitTester(..) => "FrameMsg::RequestHitTester",
+            FrameMsg::SetScrollOffsets(..) => "FrameMsg::SetScrollOffsets",
+            FrameMsg::ResetDynamicProperties => "FrameMsg::ResetDynamicProperties",
+            FrameMsg::AppendDynamicProperties(..) => "FrameMsg::AppendDynamicProperties",
+            FrameMsg::AppendDynamicTransformProperties(..) => "FrameMsg::AppendDynamicTransformProperties",
+            FrameMsg::SetIsTransformAsyncZooming(..) => "FrameMsg::SetIsTransformAsyncZooming",
+        })
+    }
+}
+
+bitflags!{
+    /// Bit flags for WR stages to store in a capture.
+    // Note: capturing `FRAME` without `SCENE` is not currently supported.
+    pub struct CaptureBits: u8 {
+        ///
+        const SCENE = 0x1;
+        ///
+        const FRAME = 0x2;
+        ///
+        const TILE_CACHE = 0x4;
+        ///
+        const EXTERNAL_RESOURCES = 0x8;
+    }
+}
+
+bitflags!{
+    /// Mask for clearing caches in debug commands.
+    pub struct ClearCache: u8 {
+        ///
+        const IMAGES = 0b1;
+        ///
+        const GLYPHS = 0b10;
+        ///
+        const GLYPH_DIMENSIONS = 0b100;
+        ///
+        const RENDER_TASKS = 0b1000;
+        ///
+        const TEXTURE_CACHE = 0b10000;
+        /// Clear render target pool
+        const RENDER_TARGETS = 0b100000;
+    }
+}
+
+/// Information about a loaded capture of each document
+/// that is returned by `RenderBackend`.
+#[derive(Clone, Debug)]
+pub struct CapturedDocument {
+    ///
+    pub document_id: DocumentId,
+    ///
+    pub root_pipeline_id: Option<PipelineId>,
+}
+
+/// Update of the state of built-in debugging facilities.
+#[derive(Clone)]
+pub enum DebugCommand {
+    /// Sets the provided debug flags.
+    SetFlags(DebugFlags),
+    /// Save a capture of all the documents state.
+    SaveCapture(PathBuf, CaptureBits),
+    /// Load a capture of all the documents state.
+    LoadCapture(PathBuf, Option<(u32, u32)>, Sender<CapturedDocument>),
+    /// Start capturing a sequence of scene/frame changes.
+    StartCaptureSequence(PathBuf, CaptureBits),
+    /// Stop capturing a sequence of scene/frame changes.
+    StopCaptureSequence,
+    /// Clear cached resources, forcing them to be re-uploaded from templates.
+    ClearCaches(ClearCache),
+    /// Enable/disable native compositor usage
+    EnableNativeCompositor(bool),
+    /// Sets the maximum amount of existing batches to visit before creating a new one.
+    SetBatchingLookback(u32),
+    /// Invalidate GPU cache, forcing the update from the CPU mirror.
+    InvalidateGpuCache,
+    /// Causes the scene builder to pause for a given amount of milliseconds each time it
+    /// processes a transaction.
+    SimulateLongSceneBuild(u32),
+    /// Set an override tile size to use for picture caches
+    SetPictureTileSize(Option<DeviceIntSize>),
+    /// Set an override for max off-screen surface size
+    SetMaximumSurfaceSize(Option<usize>),
+}
+
+/// Message sent by the `RenderApi` to the render backend thread.
+pub enum ApiMsg {
+    /// Adds a new document namespace.
+    CloneApi(Sender<IdNamespace>),
+    /// Adds a new document namespace.
+    CloneApiByClient(IdNamespace),
+    /// Adds a new document with given initial size.
+    AddDocument(DocumentId, DeviceIntSize),
+    /// A message targeted at a particular document.
+    UpdateDocuments(Vec<Box<TransactionMsg>>),
+    /// Flush from the caches anything that isn't necessary, to free some memory.
+    MemoryPressure,
+    /// Collects a memory report.
+    ReportMemory(Sender<Box<MemoryReport>>),
+    /// Change debugging options.
+    DebugCommand(DebugCommand),
+    /// Message from the scene builder thread.
+    SceneBuilderResult(SceneBuilderResult),
+}
+
+impl fmt::Debug for ApiMsg {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(match *self {
+            ApiMsg::CloneApi(..) => "ApiMsg::CloneApi",
+            ApiMsg::CloneApiByClient(..) => "ApiMsg::CloneApiByClient",
+            ApiMsg::AddDocument(..) => "ApiMsg::AddDocument",
+            ApiMsg::UpdateDocuments(..) => "ApiMsg::UpdateDocuments",
+            ApiMsg::MemoryPressure => "ApiMsg::MemoryPressure",
+            ApiMsg::ReportMemory(..) => "ApiMsg::ReportMemory",
+            ApiMsg::DebugCommand(..) => "ApiMsg::DebugCommand",
+            ApiMsg::SceneBuilderResult(..) => "ApiMsg::SceneBuilderResult",
+        })
+    }
+}
+
+/// Allows the API to communicate with WebRender.
+///
+/// This object is created along with the `Renderer` and it's main use from a
+/// user perspective is to create one or several `RenderApi` objects.
+pub struct RenderApiSender {
+    api_sender: Sender<ApiMsg>,
+    scene_sender: Sender<SceneBuilderRequest>,
+    low_priority_scene_sender: Sender<SceneBuilderRequest>,
+    blob_image_handler: Option<Box<dyn BlobImageHandler>>,
+    fonts: SharedFontResources,
+}
+
+impl RenderApiSender {
+    /// Used internally by the `Renderer`.
+    pub fn new(
+        api_sender: Sender<ApiMsg>,
+        scene_sender: Sender<SceneBuilderRequest>,
+        low_priority_scene_sender: Sender<SceneBuilderRequest>,
+        blob_image_handler: Option<Box<dyn BlobImageHandler>>,
+        fonts: SharedFontResources,
+    ) -> Self {
+        RenderApiSender {
+            api_sender,
+            scene_sender,
+            low_priority_scene_sender,
+            blob_image_handler,
+            fonts,
+        }
+    }
+
+    /// Creates a new resource API object with a dedicated namespace.
+    pub fn create_api(&self) -> RenderApi {
+        let (sync_tx, sync_rx) = single_msg_channel();
+        let msg = ApiMsg::CloneApi(sync_tx);
+        self.api_sender.send(msg).expect("Failed to send CloneApi message");
+        let namespace_id = sync_rx.recv().expect("Failed to receive CloneApi reply");
+        RenderApi {
+            api_sender: self.api_sender.clone(),
+            scene_sender: self.scene_sender.clone(),
+            low_priority_scene_sender: self.low_priority_scene_sender.clone(),
+            namespace_id,
+            next_id: Cell::new(ResourceId(0)),
+            resources: ApiResources::new(
+                self.blob_image_handler.as_ref().map(|handler| handler.create_similar()),
+                self.fonts.clone(),
+            ),
+        }
+    }
+
+    /// Creates a new resource API object with a dedicated namespace.
+    /// Namespace id is allocated by client.
+    ///
+    /// The function could be used only when WebRenderOptions::namespace_alloc_by_client is true.
+    /// When the option is true, create_api() could not be used to prevent namespace id conflict.
+    pub fn create_api_by_client(&self, namespace_id: IdNamespace) -> RenderApi {
+        let msg = ApiMsg::CloneApiByClient(namespace_id);
+        self.api_sender.send(msg).expect("Failed to send CloneApiByClient message");
+        RenderApi {
+            api_sender: self.api_sender.clone(),
+            scene_sender: self.scene_sender.clone(),
+            low_priority_scene_sender: self.low_priority_scene_sender.clone(),
+            namespace_id,
+            next_id: Cell::new(ResourceId(0)),
+            resources: ApiResources::new(
+                self.blob_image_handler.as_ref().map(|handler| handler.create_similar()),
+                self.fonts.clone(),
+            ),
+        }
+    }
+}
+
+/// The main entry point to interact with WebRender.
+pub struct RenderApi {
+    api_sender: Sender<ApiMsg>,
+    scene_sender: Sender<SceneBuilderRequest>,
+    low_priority_scene_sender: Sender<SceneBuilderRequest>,
+    namespace_id: IdNamespace,
+    next_id: Cell<ResourceId>,
+    resources: ApiResources,
+}
+
+impl RenderApi {
+    /// Returns the namespace ID used by this API object.
+    pub fn get_namespace_id(&self) -> IdNamespace {
+        self.namespace_id
+    }
+
+    ///
+    pub fn create_sender(&self) -> RenderApiSender {
+        RenderApiSender::new(
+            self.api_sender.clone(),
+            self.scene_sender.clone(),
+            self.low_priority_scene_sender.clone(),
+            self.resources.blob_image_handler.as_ref().map(|handler| handler.create_similar()),
+            self.resources.get_fonts(),
+        )
+    }
+
+    /// Add a document to the WebRender instance.
+    ///
+    /// Instances can manage one or several documents (using the same render backend thread).
+    /// Each document will internally correspond to a single scene, and scenes are made of
+    /// one or several pipelines.
+    pub fn add_document(&self, initial_size: DeviceIntSize) -> DocumentId {
+        let new_id = self.next_unique_id();
+        self.add_document_with_id(initial_size, new_id)
+    }
+
+    /// See `add_document`
+    pub fn add_document_with_id(&self,
+                                initial_size: DeviceIntSize,
+                                id: u32) -> DocumentId {
+        window_size_sanity_check(initial_size);
+
+        let document_id = DocumentId::new(self.namespace_id, id);
+
+        // We send this message to both the render backend and the scene builder instead of having
+        // the scene builder thread forward it to the render backend as we do elswhere. This is because
+        // some transactions can skip the scene builder thread and we want to avoid them arriving before
+        // the render backend knows about the existence of the corresponding document id.
+        // It may not be necessary, though.
+        self.api_sender.send(
+            ApiMsg::AddDocument(document_id, initial_size)
+        ).unwrap();
+        self.scene_sender.send(
+            SceneBuilderRequest::AddDocument(document_id, initial_size)
+        ).unwrap();
+
+        document_id
+    }
+
+    /// Delete a document.
+    pub fn delete_document(&self, document_id: DocumentId) {
+        self.low_priority_scene_sender.send(
+            SceneBuilderRequest::DeleteDocument(document_id)
+        ).unwrap();
+    }
+
+    /// Generate a new font key
+    pub fn generate_font_key(&self) -> FontKey {
+        let new_id = self.next_unique_id();
+        FontKey::new(self.namespace_id, new_id)
+    }
+
+    /// Generate a new font instance key
+    pub fn generate_font_instance_key(&self) -> FontInstanceKey {
+        let new_id = self.next_unique_id();
+        FontInstanceKey::new(self.namespace_id, new_id)
+    }
+
+    /// Gets the dimensions for the supplied glyph keys
+    ///
+    /// Note: Internally, the internal texture cache doesn't store
+    /// 'empty' textures (height or width = 0)
+    /// This means that glyph dimensions e.g. for spaces (' ') will mostly be None.
+    pub fn get_glyph_dimensions(
+        &self,
+        key: FontInstanceKey,
+        glyph_indices: Vec<GlyphIndex>,
+    ) -> Vec<Option<GlyphDimensions>> {
+        let (sender, rx) = single_msg_channel();
+        let msg = SceneBuilderRequest::GetGlyphDimensions(GlyphDimensionRequest {
+            key,
+            glyph_indices,
+            sender
+        });
+        self.low_priority_scene_sender.send(msg).unwrap();
+        rx.recv().unwrap()
+    }
+
+    /// Gets the glyph indices for the supplied string. These
+    /// can be used to construct GlyphKeys.
+    pub fn get_glyph_indices(&self, key: FontKey, text: &str) -> Vec<Option<u32>> {
+        let (sender, rx) = single_msg_channel();
+        let msg = SceneBuilderRequest::GetGlyphIndices(GlyphIndexRequest {
+            key,
+            text: text.to_string(),
+            sender,
+        });
+        self.low_priority_scene_sender.send(msg).unwrap();
+        rx.recv().unwrap()
+    }
+
+    /// Creates an `ImageKey`.
+    pub fn generate_image_key(&self) -> ImageKey {
+        let new_id = self.next_unique_id();
+        ImageKey::new(self.namespace_id, new_id)
+    }
+
+    /// Creates a `BlobImageKey`.
+    pub fn generate_blob_image_key(&self) -> BlobImageKey {
+        BlobImageKey(self.generate_image_key())
+    }
+
+    /// A Gecko-specific notification mechanism to get some code executed on the
+    /// `Renderer`'s thread, mostly replaced by `NotificationHandler`. You should
+    /// probably use the latter instead.
+    pub fn send_external_event(&self, evt: ExternalEvent) {
+        let msg = SceneBuilderRequest::ExternalEvent(evt);
+        self.low_priority_scene_sender.send(msg).unwrap();
+    }
+
+    /// Notify WebRender that now is a good time to flush caches and release
+    /// as much memory as possible.
+    pub fn notify_memory_pressure(&self) {
+        self.api_sender.send(ApiMsg::MemoryPressure).unwrap();
+    }
+
+    /// Synchronously requests memory report.
+    pub fn report_memory(&self, _ops: malloc_size_of::MallocSizeOfOps) -> MemoryReport {
+        let (tx, rx) = single_msg_channel();
+        self.api_sender.send(ApiMsg::ReportMemory(tx)).unwrap();
+        *rx.recv().unwrap()
+    }
+
+    /// Update debugging flags.
+    pub fn set_debug_flags(&self, flags: DebugFlags) {
+        let cmd = DebugCommand::SetFlags(flags);
+        self.api_sender.send(ApiMsg::DebugCommand(cmd)).unwrap();
+    }
+
+    /// Stop RenderBackend's task until shut down
+    pub fn stop_render_backend(&self) {
+        self.low_priority_scene_sender.send(SceneBuilderRequest::StopRenderBackend).unwrap();
+    }
+
+    /// Shut the WebRender instance down.
+    pub fn shut_down(&self, synchronously: bool) {
+        if synchronously {
+            let (tx, rx) = single_msg_channel();
+            self.low_priority_scene_sender.send(SceneBuilderRequest::ShutDown(Some(tx))).unwrap();
+            rx.recv().unwrap();
+        } else {
+            self.low_priority_scene_sender.send(SceneBuilderRequest::ShutDown(None)).unwrap();
+        }
+    }
+
+    /// Create a new unique key that can be used for
+    /// animated property bindings.
+    pub fn generate_property_binding_key<T: Copy>(&self) -> PropertyBindingKey<T> {
+        let new_id = self.next_unique_id();
+        PropertyBindingKey {
+            id: PropertyBindingId {
+                namespace: self.namespace_id,
+                uid: new_id,
+            },
+            _phantom: PhantomData,
+        }
+    }
+
+    #[inline]
+    fn next_unique_id(&self) -> u32 {
+        let ResourceId(id) = self.next_id.get();
+        self.next_id.set(ResourceId(id + 1));
+        id
+    }
+
+    // For use in Wrench only
+    #[doc(hidden)]
+    pub fn send_message(&self, msg: ApiMsg) {
+        self.api_sender.send(msg).unwrap();
+    }
+
+    /// Creates a transaction message from a single frame message.
+    fn frame_message(&self, msg: FrameMsg, document_id: DocumentId) -> Box<TransactionMsg> {
+        Box::new(TransactionMsg {
+            document_id,
+            scene_ops: Vec::new(),
+            frame_ops: vec![msg],
+            resource_updates: Vec::new(),
+            notifications: Vec::new(),
+            generate_frame: GenerateFrame::No,
+            creation_time: None,
+            invalidate_rendered_frame: false,
+            use_scene_builder_thread: false,
+            low_priority: false,
+            blob_rasterizer: None,
+            blob_requests: Vec::new(),
+            rasterized_blobs: Vec::new(),
+            profile: TransactionProfile::new(),
+            render_reasons: RenderReasons::empty(),
+        })
+    }
+
+    /// A helper method to send document messages.
+    fn send_frame_msg(&self, document_id: DocumentId, msg: FrameMsg) {
+        // This assertion fails on Servo use-cases, because it creates different
+        // `RenderApi` instances for layout and compositor.
+        //assert_eq!(document_id.0, self.namespace_id);
+        self.api_sender
+            .send(ApiMsg::UpdateDocuments(vec![self.frame_message(msg, document_id)]))
+            .unwrap()
+    }
+
+    /// Send a transaction to WebRender.
+    pub fn send_transaction(&mut self, document_id: DocumentId, transaction: Transaction) {
+        let mut transaction = transaction.finalize(document_id);
+
+        self.resources.update(&mut transaction);
+
+        if transaction.generate_frame.as_bool() {
+            transaction.profile.start_time(profiler::API_SEND_TIME);
+            transaction.profile.start_time(profiler::TOTAL_FRAME_CPU_TIME);
+        }
+
+        if transaction.use_scene_builder_thread {
+            let sender = if transaction.low_priority {
+                &mut self.low_priority_scene_sender
+            } else {
+                &mut self.scene_sender
+            };
+
+            sender.send(SceneBuilderRequest::Transactions(vec![transaction]))
+                .expect("send by scene sender failed");
+        } else {
+            self.api_sender.send(ApiMsg::UpdateDocuments(vec![transaction]))
+                .expect("send by api sender failed");
+        }
+    }
+
+    /// Does a hit test on display items in the specified document, at the given
+    /// point. If a pipeline_id is specified, it is used to further restrict the
+    /// hit results so that only items inside that pipeline are matched. The vector
+    /// of hit results will contain all display items that match, ordered from
+    /// front to back.
+    pub fn hit_test(&self,
+        document_id: DocumentId,
+        point: WorldPoint,
+    ) -> HitTestResult {
+        let (tx, rx) = single_msg_channel();
+
+        self.send_frame_msg(
+            document_id,
+            FrameMsg::HitTest(point, tx)
+        );
+        rx.recv().unwrap()
+    }
+
+    /// Synchronously request an object that can perform fast hit testing queries.
+    pub fn request_hit_tester(&self, document_id: DocumentId) -> HitTesterRequest {
+        let (tx, rx) = single_msg_channel();
+        self.send_frame_msg(
+            document_id,
+            FrameMsg::RequestHitTester(tx)
+        );
+
+        HitTesterRequest { rx }
+    }
+
+    // Some internal scheduling magic that leaked into the API.
+    // Buckle up and see APZUpdater.cpp for more info about what this is about.
+    #[doc(hidden)]
+    pub fn wake_scene_builder(&self) {
+        self.scene_sender.send(SceneBuilderRequest::WakeUp).unwrap();
+    }
+
+    /// Block until a round-trip to the scene builder thread has completed. This
+    /// ensures that any transactions (including ones deferred to the scene
+    /// builder thread) have been processed.
+    pub fn flush_scene_builder(&self) {
+        let (tx, rx) = single_msg_channel();
+        self.low_priority_scene_sender.send(SceneBuilderRequest::Flush(tx)).unwrap();
+        rx.recv().unwrap(); // Block until done.
+    }
+
+    /// Save a capture of the current frame state for debugging.
+    pub fn save_capture(&self, path: PathBuf, bits: CaptureBits) {
+        let msg = ApiMsg::DebugCommand(DebugCommand::SaveCapture(path, bits));
+        self.send_message(msg);
+    }
+
+    /// Load a capture of the current frame state for debugging.
+    pub fn load_capture(&self, path: PathBuf, ids: Option<(u32, u32)>) -> Vec<CapturedDocument> {
+        // First flush the scene builder otherwise async scenes might clobber
+        // the capture we are about to load.
+        self.flush_scene_builder();
+
+        let (tx, rx) = unbounded_channel();
+        let msg = ApiMsg::DebugCommand(DebugCommand::LoadCapture(path, ids, tx));
+        self.send_message(msg);
+
+        let mut documents = Vec::new();
+        while let Ok(captured_doc) = rx.recv() {
+            documents.push(captured_doc);
+        }
+        documents
+    }
+
+    /// Start capturing a sequence of frames.
+    pub fn start_capture_sequence(&self, path: PathBuf, bits: CaptureBits) {
+        let msg = ApiMsg::DebugCommand(DebugCommand::StartCaptureSequence(path, bits));
+        self.send_message(msg);
+    }
+
+    /// Stop capturing sequences of frames.
+    pub fn stop_capture_sequence(&self) {
+        let msg = ApiMsg::DebugCommand(DebugCommand::StopCaptureSequence);
+        self.send_message(msg);
+    }
+
+    /// Update the state of builtin debugging facilities.
+    pub fn send_debug_cmd(&self, cmd: DebugCommand) {
+        let msg = ApiMsg::DebugCommand(cmd);
+        self.send_message(msg);
+    }
+
+    /// Update a instance-global parameter.
+    pub fn set_parameter(&mut self, parameter: Parameter) {
+        if let Parameter::Bool(BoolParameter::Multithreading, enabled) = parameter {
+            self.resources.enable_multithreading(enabled);
+        }
+
+        let _ = self.low_priority_scene_sender.send(
+            SceneBuilderRequest::SetParameter(parameter)
+        );
+    }
+}
+
+impl Drop for RenderApi {
+    fn drop(&mut self) {
+        let msg = SceneBuilderRequest::ClearNamespace(self.namespace_id);
+        let _ = self.low_priority_scene_sender.send(msg);
+    }
+}
+
+
+fn window_size_sanity_check(size: DeviceIntSize) {
+    // Anything bigger than this will crash later when attempting to create
+    // a render task.
+    use crate::api::MAX_RENDER_TASK_SIZE;
+    if size.width > MAX_RENDER_TASK_SIZE || size.height > MAX_RENDER_TASK_SIZE {
+        panic!("Attempting to create a {}x{} window/document", size.width, size.height);
+    }
+}
+
+/// Collection of heap sizes, in bytes.
+/// cbindgen:derive-eq=false
+/// cbindgen:derive-ostream=false
+#[repr(C)]
+#[allow(missing_docs)]
+#[derive(AddAssign, Clone, Debug, Default)]
+pub struct MemoryReport {
+    //
+    // CPU Memory.
+    //
+    pub clip_stores: usize,
+    pub gpu_cache_metadata: usize,
+    pub gpu_cache_cpu_mirror: usize,
+    pub render_tasks: usize,
+    pub hit_testers: usize,
+    pub fonts: usize,
+    pub weak_fonts: usize,
+    pub images: usize,
+    pub rasterized_blobs: usize,
+    pub shader_cache: usize,
+    pub interning: InterningMemoryReport,
+    pub display_list: usize,
+    pub upload_staging_memory: usize,
+    pub swgl: usize,
+
+    //
+    // GPU memory.
+    //
+    pub gpu_cache_textures: usize,
+    pub vertex_data_textures: usize,
+    pub render_target_textures: usize,
+    pub picture_tile_textures: usize,
+    pub atlas_textures: usize,
+    pub standalone_textures: usize,
+    pub texture_cache_structures: usize,
+    pub depth_target_textures: usize,
+    pub texture_upload_pbos: usize,
+    pub swap_chain: usize,
+    pub render_texture_hosts: usize,
+    pub upload_staging_textures: usize,
+}
diff --git a/gfx/wr/webrender/src/render_backend.rs b/gfx/wr/webrender/src/render_backend.rs
new file mode 100644
index 0000000000..0119adc612
--- /dev/null
+++ b/gfx/wr/webrender/src/render_backend.rs
@@ -0,0 +1,1919 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! The high-level module responsible for managing the pipeline and preparing
+//! commands to be issued by the `Renderer`.
+//!
+//! See the comment at the top of the `renderer` module for a description of
+//! how these two pieces interact.
+
+use api::{DebugFlags, Parameter, BoolParameter, PrimitiveFlags};
+use api::{DocumentId, ExternalScrollId, HitTestResult};
+use api::{IdNamespace, PipelineId, RenderNotifier, SampledScrollOffset};
+use api::{NotificationRequest, Checkpoint, QualitySettings};
+use api::{PrimitiveKeyKind, RenderReasons};
+use api::units::*;
+use api::channel::{single_msg_channel, Sender, Receiver};
+use crate::AsyncPropertySampler;
+#[cfg(any(feature = "capture", feature = "replay"))]
+use crate::render_api::CaptureBits;
+#[cfg(feature = "replay")]
+use crate::render_api::CapturedDocument;
+use crate::render_api::{MemoryReport, TransactionMsg, ResourceUpdate, ApiMsg, FrameMsg, ClearCache, DebugCommand};
+use crate::clip::{ClipIntern, PolygonIntern, ClipStoreScratchBuffer};
+use crate::filterdata::FilterDataIntern;
+#[cfg(any(feature = "capture", feature = "replay"))]
+use crate::capture::CaptureConfig;
+use crate::composite::{CompositorKind, CompositeDescriptor};
+use crate::frame_builder::{FrameBuilder, FrameBuilderConfig, FrameScratchBuffer};
+use glyph_rasterizer::{FontInstance};
+use crate::gpu_cache::GpuCache;
+use crate::hit_test::{HitTest, HitTester, SharedHitTester};
+use crate::intern::DataStore;
+#[cfg(any(feature = "capture", feature = "replay"))]
+use crate::internal_types::{DebugOutput};
+use crate::internal_types::{FastHashMap, RenderedDocument, ResultMsg, FrameId, FrameStamp};
+use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
+use crate::picture::{PictureScratchBuffer, SliceId, TileCacheInstance, TileCacheParams, SurfaceInfo, RasterConfig};
+use crate::picture::{PicturePrimitive};
+use crate::prim_store::{PrimitiveScratchBuffer, PrimitiveInstance};
+use crate::prim_store::{PrimitiveInstanceKind, PrimTemplateCommonData};
+use crate::prim_store::interned::*;
+use crate::profiler::{self, TransactionProfile};
+use crate::render_task_graph::RenderTaskGraphBuilder;
+use crate::renderer::{FullFrameStats, PipelineInfo};
+use crate::resource_cache::ResourceCache;
+#[cfg(feature = "replay")]
+use crate::resource_cache::PlainCacheOwn;
+#[cfg(feature = "replay")]
+use crate::resource_cache::PlainResources;
+#[cfg(feature = "replay")]
+use crate::scene::Scene;
+use crate::scene::{BuiltScene, SceneProperties};
+use crate::scene_builder_thread::*;
+use crate::spatial_tree::SpatialTree;
+#[cfg(feature = "replay")]
+use crate::spatial_tree::SceneSpatialTree;
+use crate::telemetry::Telemetry;
+#[cfg(feature = "serialize")]
+use serde::{Serialize, Deserialize};
+#[cfg(feature = "replay")]
+use std::collections::hash_map::Entry::{Occupied, Vacant};
+use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::{mem, u32};
+#[cfg(feature = "capture")]
+use std::path::PathBuf;
+#[cfg(feature = "replay")]
+use crate::frame_builder::Frame;
+use time::precise_time_ns;
+use core::time::Duration;
+use crate::util::{Recycler, VecHelper, drain_filter};
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Copy, Clone)]
+pub struct DocumentView {
+    scene: SceneView,
+}
+
+/// Some rendering parameters applying at the scene level.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Copy, Clone)]
+pub struct SceneView {
+    pub device_rect: DeviceIntRect,
+    pub quality_settings: QualitySettings,
+}
+
+enum RenderBackendStatus {
+    Continue,
+    StopRenderBackend,
+    ShutDown(Option<Sender<()>>),
+}
+
+macro_rules! declare_data_stores {
+    ( $( $name:ident : $ty:ty, )+ ) => {
+        /// A collection of resources that are shared by clips, primitives
+        /// between display lists.
+        #[cfg_attr(feature = "capture", derive(Serialize))]
+        #[cfg_attr(feature = "replay", derive(Deserialize))]
+        #[derive(Default)]
+        pub struct DataStores {
+            $(
+                pub $name: DataStore<$ty>,
+            )+
+        }
+
+        impl DataStores {
+            /// Reports CPU heap usage.
+            fn report_memory(&self, ops: &mut MallocSizeOfOps, r: &mut MemoryReport) {
+                $(
+                    r.interning.data_stores.$name += self.$name.size_of(ops);
+                )+
+            }
+
+            fn apply_updates(
+                &mut self,
+                updates: InternerUpdates,
+                profile: &mut TransactionProfile,
+            ) {
+                $(
+                    self.$name.apply_updates(
+                        updates.$name,
+                        profile,
+                    );
+                )+
+            }
+        }
+    }
+}
+
+crate::enumerate_interners!(declare_data_stores);
+
+impl DataStores {
+    /// Returns the local rect for a primitive. For most primitives, this is
+    /// stored in the template. For pictures, this is stored inside the picture
+    /// primitive instance itself, since this is determined during frame building.
+    pub fn get_local_prim_rect(
+        &self,
+        prim_instance: &PrimitiveInstance,
+        pictures: &[PicturePrimitive],
+        surfaces: &[SurfaceInfo],
+    ) -> LayoutRect {
+        match prim_instance.kind {
+            PrimitiveInstanceKind::Picture { pic_index, .. } => {
+                let pic = &pictures[pic_index.0];
+
+                match pic.raster_config {
+                    Some(RasterConfig { surface_index, ref composite_mode, .. }) => {
+                        let surface = &surfaces[surface_index.0];
+
+                        composite_mode.get_rect(surface, None)
+                    }
+                    None => {
+                        panic!("bug: get_local_prim_rect should not be called for pass-through pictures");
+                    }
+                }
+            }
+            _ => {
+                self.as_common_data(prim_instance).prim_rect
+            }
+        }
+    }
+
+    /// Returns the local coverage (space occupied) for a primitive. For most primitives,
+    /// this is stored in the template. For pictures, this is stored inside the picture
+    /// primitive instance itself, since this is determined during frame building.
+    pub fn get_local_prim_coverage_rect(
+        &self,
+        prim_instance: &PrimitiveInstance,
+        pictures: &[PicturePrimitive],
+        surfaces: &[SurfaceInfo],
+    ) -> LayoutRect {
+        match prim_instance.kind {
+            PrimitiveInstanceKind::Picture { pic_index, .. } => {
+                let pic = &pictures[pic_index.0];
+
+                match pic.raster_config {
+                    Some(RasterConfig { surface_index, ref composite_mode, .. }) => {
+                        let surface = &surfaces[surface_index.0];
+
+                        composite_mode.get_coverage(surface, None)
+                    }
+                    None => {
+                        panic!("bug: get_local_prim_coverage_rect should not be called for pass-through pictures");
+                    }
+                }
+            }
+            _ => {
+                self.as_common_data(prim_instance).prim_rect
+            }
+        }
+    }
+
+    /// Returns true if this primitive might need repition.
+    // TODO(gw): This seems like the wrong place for this - maybe this flag should
+    //           not be in the common prim template data?
+    pub fn prim_may_need_repetition(
+        &self,
+        prim_instance: &PrimitiveInstance,
+    ) -> bool {
+        match prim_instance.kind {
+            PrimitiveInstanceKind::Picture { .. } => {
+                false
+            }
+            _ => {
+                self.as_common_data(prim_instance).may_need_repetition
+            }
+        }
+    }
+
+    /// Returns true if this primitive has anti-aliasing enabled.
+    pub fn prim_has_anti_aliasing(
+        &self,
+        prim_instance: &PrimitiveInstance,
+    ) -> bool {
+        match prim_instance.kind {
+            PrimitiveInstanceKind::Picture { .. } => {
+                false
+            }
+            _ => {
+                self.as_common_data(prim_instance).flags.contains(PrimitiveFlags::ANTIALISED)
+            }
+        }
+    }
+
+    pub fn as_common_data(
+        &self,
+        prim_inst: &PrimitiveInstance
+    ) -> &PrimTemplateCommonData {
+        match prim_inst.kind {
+            PrimitiveInstanceKind::Rectangle { data_handle, .. } |
+            PrimitiveInstanceKind::Clear { data_handle, .. } => {
+                let prim_data = &self.prim[data_handle];
+                &prim_data.common
+            }
+            PrimitiveInstanceKind::Image { data_handle, .. } => {
+                let prim_data = &self.image[data_handle];
+                &prim_data.common
+            }
+            PrimitiveInstanceKind::ImageBorder { data_handle, .. } => {
+                let prim_data = &self.image_border[data_handle];
+                &prim_data.common
+            }
+            PrimitiveInstanceKind::LineDecoration { data_handle, .. } => {
+                let prim_data = &self.line_decoration[data_handle];
+                &prim_data.common
+            }
+            PrimitiveInstanceKind::LinearGradient { data_handle, .. }
+            | PrimitiveInstanceKind::CachedLinearGradient { data_handle, .. } => {
+                let prim_data = &self.linear_grad[data_handle];
+                &prim_data.common
+            }
+            PrimitiveInstanceKind::NormalBorder { data_handle, .. } => {
+                let prim_data = &self.normal_border[data_handle];
+                &prim_data.common
+            }
+            PrimitiveInstanceKind::Picture { .. } => {
+                panic!("BUG: picture prims don't have common data!");
+            }
+            PrimitiveInstanceKind::RadialGradient { data_handle, .. } => {
+                let prim_data = &self.radial_grad[data_handle];
+                &prim_data.common
+            }
+            PrimitiveInstanceKind::ConicGradient { data_handle, .. } => {
+                let prim_data = &self.conic_grad[data_handle];
+                &prim_data.common
+            }
+            PrimitiveInstanceKind::TextRun { data_handle, .. }  => {
+                let prim_data = &self.text_run[data_handle];
+                &prim_data.common
+            }
+            PrimitiveInstanceKind::YuvImage { data_handle, .. } => {
+                let prim_data = &self.yuv_image[data_handle];
+                &prim_data.common
+            }
+            PrimitiveInstanceKind::BackdropCapture { data_handle, .. } => {
+                let prim_data = &self.backdrop_capture[data_handle];
+                &prim_data.common
+            }
+            PrimitiveInstanceKind::BackdropRender { data_handle, .. } => {
+                let prim_data = &self.backdrop_render[data_handle];
+                &prim_data.common
+            }
+        }
+    }
+}
+
+#[derive(Default)]
+pub struct ScratchBuffer {
+    pub primitive: PrimitiveScratchBuffer,
+    pub picture: PictureScratchBuffer,
+    pub frame: FrameScratchBuffer,
+    pub clip_store: ClipStoreScratchBuffer,
+}
+
+impl ScratchBuffer {
+    pub fn begin_frame(&mut self) {
+        self.primitive.begin_frame();
+        self.picture.begin_frame();
+        self.frame.begin_frame();
+    }
+
+    pub fn end_frame(&mut self) {
+        self.primitive.end_frame();
+    }
+
+    pub fn recycle(&mut self, recycler: &mut Recycler) {
+        self.primitive.recycle(recycler);
+        self.picture.recycle(recycler);
+    }
+
+    pub fn memory_pressure(&mut self) {
+        // TODO: causes browser chrome test crashes on windows.
+        //self.primitive = Default::default();
+        self.picture = Default::default();
+        self.frame = Default::default();
+        self.clip_store = Default::default();
+    }
+}
+
+struct Document {
+    /// The id of this document
+    id: DocumentId,
+
+    /// Temporary list of removed pipelines received from the scene builder
+    /// thread and forwarded to the renderer.
+    removed_pipelines: Vec<(PipelineId, DocumentId)>,
+
+    view: DocumentView,
+
+    /// The id and time of the current frame.
+    stamp: FrameStamp,
+
+    /// The latest built scene, usable to build frames.
+    /// received from the scene builder thread.
+    scene: BuiltScene,
+
+    /// The builder object that prodces frames, kept around to preserve some retained state.
+    frame_builder: FrameBuilder,
+
+    /// Allows graphs of render tasks to be created, and then built into an immutable graph output.
+    rg_builder: RenderTaskGraphBuilder,
+
+    /// A data structure to allow hit testing against rendered frames. This is updated
+    /// every time we produce a fully rendered frame.
+    hit_tester: Option<Arc<HitTester>>,
+    /// To avoid synchronous messaging we update a shared hit-tester that other threads
+    /// can query.
+    shared_hit_tester: Arc<SharedHitTester>,
+
+    /// Properties that are resolved during frame building and can be changed at any time
+    /// without requiring the scene to be re-built.
+    dynamic_properties: SceneProperties,
+
+    /// Track whether the last built frame is up to date or if it will need to be re-built
+    /// before rendering again.
+    frame_is_valid: bool,
+    hit_tester_is_valid: bool,
+    rendered_frame_is_valid: bool,
+    /// We track this information to be able to display debugging information from the
+    /// renderer.
+    has_built_scene: bool,
+
+    data_stores: DataStores,
+
+    /// Retained frame-building version of the spatial tree
+    spatial_tree: SpatialTree,
+
+    /// Contains various vecs of data that is used only during frame building,
+    /// where we want to recycle the memory each new display list, to avoid constantly
+    /// re-allocating and moving memory around.
+    scratch: ScratchBuffer,
+
+    #[cfg(feature = "replay")]
+    loaded_scene: Scene,
+
+    /// Tracks the state of the picture cache tiles that were composited on the previous frame.
+    prev_composite_descriptor: CompositeDescriptor,
+
+    /// Tracks if we need to invalidate dirty rects for this document, due to the picture
+    /// cache slice configuration having changed when a new scene is swapped in.
+    dirty_rects_are_valid: bool,
+
+    profile: TransactionProfile,
+    frame_stats: Option<FullFrameStats>,
+}
+
+impl Document {
+    pub fn new(
+        id: DocumentId,
+        size: DeviceIntSize,
+    ) -> Self {
+        Document {
+            id,
+            removed_pipelines: Vec::new(),
+            view: DocumentView {
+                scene: SceneView {
+                    device_rect: size.into(),
+                    quality_settings: QualitySettings::default(),
+                },
+            },
+            stamp: FrameStamp::first(id),
+            scene: BuiltScene::empty(),
+            frame_builder: FrameBuilder::new(),
+            hit_tester: None,
+            shared_hit_tester: Arc::new(SharedHitTester::new()),
+            dynamic_properties: SceneProperties::new(),
+            frame_is_valid: false,
+            hit_tester_is_valid: false,
+            rendered_frame_is_valid: false,
+            has_built_scene: false,
+            data_stores: DataStores::default(),
+            spatial_tree: SpatialTree::new(),
+            scratch: ScratchBuffer::default(),
+            #[cfg(feature = "replay")]
+            loaded_scene: Scene::new(),
+            prev_composite_descriptor: CompositeDescriptor::empty(),
+            dirty_rects_are_valid: true,
+            profile: TransactionProfile::new(),
+            rg_builder: RenderTaskGraphBuilder::new(),
+            frame_stats: None,
+        }
+    }
+
+    fn can_render(&self) -> bool {
+        self.scene.has_root_pipeline
+    }
+
+    fn has_pixels(&self) -> bool {
+        !self.view.scene.device_rect.is_empty()
+    }
+
+    fn process_frame_msg(
+        &mut self,
+        message: FrameMsg,
+    ) -> DocumentOps {
+        match message {
+            FrameMsg::UpdateEpoch(pipeline_id, epoch) => {
+                self.scene.pipeline_epochs.insert(pipeline_id, epoch);
+            }
+            FrameMsg::HitTest(point, tx) => {
+                if !self.hit_tester_is_valid {
+                    self.rebuild_hit_tester();
+                }
+
+                let result = match self.hit_tester {
+                    Some(ref hit_tester) => {
+                        hit_tester.hit_test(HitTest::new(point))
+                    }
+                    None => HitTestResult { items: Vec::new() },
+                };
+
+                tx.send(result).unwrap();
+            }
+            FrameMsg::RequestHitTester(tx) => {
+                tx.send(self.shared_hit_tester.clone()).unwrap();
+            }
+            FrameMsg::SetScrollOffsets(id, offset) => {
+                profile_scope!("SetScrollOffset");
+
+                if self.set_scroll_offsets(id, offset) {
+                    self.hit_tester_is_valid = false;
+                    self.frame_is_valid = false;
+                }
+
+                return DocumentOps {
+                    scroll: true,
+                    ..DocumentOps::nop()
+                };
+            }
+            FrameMsg::ResetDynamicProperties => {
+                self.dynamic_properties.reset_properties();
+            }
+            FrameMsg::AppendDynamicProperties(property_bindings) => {
+                self.dynamic_properties.add_properties(property_bindings);
+            }
+            FrameMsg::AppendDynamicTransformProperties(property_bindings) => {
+                self.dynamic_properties.add_transforms(property_bindings);
+            }
+            FrameMsg::SetIsTransformAsyncZooming(is_zooming, animation_id) => {
+                if let Some(node_index) = self.spatial_tree.find_spatial_node_by_anim_id(animation_id) {
+                    let node = self.spatial_tree.get_spatial_node_mut(node_index);
+
+                    if node.is_async_zooming != is_zooming {
+                        node.is_async_zooming = is_zooming;
+                        self.frame_is_valid = false;
+                    }
+                }
+            }
+        }
+
+        DocumentOps::nop()
+    }
+
+    fn build_frame(
+        &mut self,
+        resource_cache: &mut ResourceCache,
+        gpu_cache: &mut GpuCache,
+        debug_flags: DebugFlags,
+        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
+        frame_stats: Option<FullFrameStats>,
+        render_reasons: RenderReasons,
+    ) -> RenderedDocument {
+        let frame_build_start_time = precise_time_ns();
+
+        // Advance to the next frame.
+        self.stamp.advance();
+
+        assert!(self.stamp.frame_id() != FrameId::INVALID,
+                "First frame increment must happen before build_frame()");
+
+        let frame = {
+            let frame = self.frame_builder.build(
+                &mut self.scene,
+                resource_cache,
+                gpu_cache,
+                &mut self.rg_builder,
+                self.stamp,
+                self.view.scene.device_rect.min,
+                &self.dynamic_properties,
+                &mut self.data_stores,
+                &mut self.scratch,
+                debug_flags,
+                tile_caches,
+                &mut self.spatial_tree,
+                self.dirty_rects_are_valid,
+                &mut self.profile,
+            );
+
+            frame
+        };
+
+        self.frame_is_valid = true;
+        self.dirty_rects_are_valid = true;
+
+        let is_new_scene = self.has_built_scene;
+        self.has_built_scene = false;
+
+        let frame_build_time_ms =
+            profiler::ns_to_ms(precise_time_ns() - frame_build_start_time);
+        self.profile.set(profiler::FRAME_BUILDING_TIME, frame_build_time_ms);
+
+        let frame_stats = frame_stats.map(|mut stats| {
+            stats.frame_build_time += frame_build_time_ms;
+            stats
+        });
+
+        RenderedDocument {
+            frame,
+            is_new_scene,
+            profile: self.profile.take_and_reset(),
+            frame_stats: frame_stats,
+            render_reasons,
+        }
+    }
+
+    fn rebuild_hit_tester(&mut self) {
+        self.spatial_tree.update_tree(&self.dynamic_properties);
+
+        let hit_tester = Arc::new(self.scene.create_hit_tester(&self.spatial_tree));
+        self.hit_tester = Some(Arc::clone(&hit_tester));
+        self.shared_hit_tester.update(hit_tester);
+        self.hit_tester_is_valid = true;
+    }
+
+    pub fn updated_pipeline_info(&mut self) -> PipelineInfo {
+        let removed_pipelines = self.removed_pipelines.take_and_preallocate();
+        PipelineInfo {
+            epochs: self.scene.pipeline_epochs.iter()
+                .map(|(&pipeline_id, &epoch)| ((pipeline_id, self.id), epoch)).collect(),
+            removed_pipelines,
+        }
+    }
+
+    /// Returns true if the node actually changed position or false otherwise.
+    pub fn set_scroll_offsets(
+        &mut self,
+        id: ExternalScrollId,
+        offsets: Vec<SampledScrollOffset>,
+    ) -> bool {
+        self.spatial_tree.set_scroll_offsets(id, offsets)
+    }
+
+    /// Update the state of tile caches when a new scene is being swapped in to
+    /// the render backend. Retain / reuse existing caches if possible, and
+    /// destroy any now unused caches.
+    fn update_tile_caches_for_new_scene(
+        &mut self,
+        mut requested_tile_caches: FastHashMap<SliceId, TileCacheParams>,
+        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
+        resource_cache: &mut ResourceCache,
+    ) {
+        let mut new_tile_caches = FastHashMap::default();
+        new_tile_caches.reserve(requested_tile_caches.len());
+
+        // Step through the tile caches that are needed for the new scene, and see
+        // if we have an existing cache that can be reused.
+        for (slice_id, params) in requested_tile_caches.drain() {
+            let tile_cache = match tile_caches.remove(&slice_id) {
+                Some(mut existing_tile_cache) => {
+                    // Found an existing cache - update the cache params and reuse it
+                    existing_tile_cache.prepare_for_new_scene(
+                        params,
+                        resource_cache,
+                    );
+                    existing_tile_cache
+                }
+                None => {
+                    // No cache exists so create a new one
+                    Box::new(TileCacheInstance::new(params))
+                }
+            };
+
+            new_tile_caches.insert(slice_id, tile_cache);
+        }
+
+        // Replace current tile cache map, and return what was left over,
+        // which are now unused.
+        let unused_tile_caches = mem::replace(
+            tile_caches,
+            new_tile_caches,
+        );
+
+        if !unused_tile_caches.is_empty() {
+            // If the slice configuration changed, assume we can't rely on the
+            // current dirty rects for next composite
+            self.dirty_rects_are_valid = false;
+
+            // Destroy any native surfaces allocated by these unused caches
+            for (_, tile_cache) in unused_tile_caches {
+                tile_cache.destroy(resource_cache);
+            }
+        }
+    }
+
+    pub fn new_async_scene_ready(
+        &mut self,
+        mut built_scene: BuiltScene,
+        recycler: &mut Recycler,
+        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
+        resource_cache: &mut ResourceCache,
+    ) {
+        self.frame_is_valid = false;
+        self.hit_tester_is_valid = false;
+
+        self.update_tile_caches_for_new_scene(
+            mem::replace(&mut built_scene.tile_cache_config.tile_caches, FastHashMap::default()),
+            tile_caches,
+            resource_cache,
+        );
+
+        self.scene = built_scene;
+        self.scratch.recycle(recycler);
+    }
+}
+
+struct DocumentOps {
+    scroll: bool,
+}
+
+impl DocumentOps {
+    fn nop() -> Self {
+        DocumentOps {
+            scroll: false,
+        }
+    }
+}
+
+/// The unique id for WR resource identification.
+/// The namespace_id should start from 1.
+static NEXT_NAMESPACE_ID: AtomicUsize = AtomicUsize::new(1);
+
+#[cfg(any(feature = "capture", feature = "replay"))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct PlainRenderBackend {
+    frame_config: FrameBuilderConfig,
+    documents: FastHashMap<DocumentId, DocumentView>,
+    resource_sequence_id: u32,
+}
+
+/// The render backend is responsible for transforming high level display lists into
+/// GPU-friendly work which is then submitted to the renderer in the form of a frame::Frame.
+///
+/// The render backend operates on its own thread.
+pub struct RenderBackend {
+    api_rx: Receiver<ApiMsg>,
+    result_tx: Sender<ResultMsg>,
+    scene_tx: Sender<SceneBuilderRequest>,
+
+    gpu_cache: GpuCache,
+    resource_cache: ResourceCache,
+
+    frame_config: FrameBuilderConfig,
+    default_compositor_kind: CompositorKind,
+    documents: FastHashMap<DocumentId, Document>,
+
+    notifier: Box<dyn RenderNotifier>,
+    sampler: Option<Box<dyn AsyncPropertySampler + Send>>,
+    size_of_ops: Option<MallocSizeOfOps>,
+    debug_flags: DebugFlags,
+    namespace_alloc_by_client: bool,
+
+    recycler: Recycler,
+
+    #[cfg(feature = "capture")]
+    /// If `Some`, do 'sequence capture' logging, recording updated documents,
+    /// frames, etc. This is set only through messages from the scene builder,
+    /// so all control of sequence capture goes through there.
+    capture_config: Option<CaptureConfig>,
+
+    #[cfg(feature = "replay")]
+    loaded_resource_sequence_id: u32,
+
+    /// A map of tile caches. These are stored in the backend as they are
+    /// persisted between both frame and scenes.
+    tile_caches: FastHashMap<SliceId, Box<TileCacheInstance>>,
+}
+
+impl RenderBackend {
+    pub fn new(
+        api_rx: Receiver<ApiMsg>,
+        result_tx: Sender<ResultMsg>,
+        scene_tx: Sender<SceneBuilderRequest>,
+        resource_cache: ResourceCache,
+        notifier: Box<dyn RenderNotifier>,
+        frame_config: FrameBuilderConfig,
+        sampler: Option<Box<dyn AsyncPropertySampler + Send>>,
+        size_of_ops: Option<MallocSizeOfOps>,
+        debug_flags: DebugFlags,
+        namespace_alloc_by_client: bool,
+    ) -> RenderBackend {
+        RenderBackend {
+            api_rx,
+            result_tx,
+            scene_tx,
+            resource_cache,
+            gpu_cache: GpuCache::new(),
+            frame_config,
+            default_compositor_kind : frame_config.compositor_kind,
+            documents: FastHashMap::default(),
+            notifier,
+            sampler,
+            size_of_ops,
+            debug_flags,
+            namespace_alloc_by_client,
+            recycler: Recycler::new(),
+            #[cfg(feature = "capture")]
+            capture_config: None,
+            #[cfg(feature = "replay")]
+            loaded_resource_sequence_id: 0,
+            tile_caches: FastHashMap::default(),
+        }
+    }
+
+    pub fn next_namespace_id() -> IdNamespace {
+        IdNamespace(NEXT_NAMESPACE_ID.fetch_add(1, Ordering::Relaxed) as u32)
+    }
+
+    pub fn run(&mut self) {
+        let mut frame_counter: u32 = 0;
+        let mut status = RenderBackendStatus::Continue;
+
+        if let Some(ref sampler) = self.sampler {
+            sampler.register();
+        }
+
+        while let RenderBackendStatus::Continue = status {
+            status = match self.api_rx.recv() {
+                Ok(msg) => {
+                    self.process_api_msg(msg, &mut frame_counter)
+                }
+                Err(..) => { RenderBackendStatus::ShutDown(None) }
+            };
+        }
+
+        if let RenderBackendStatus::StopRenderBackend = status {
+            while let Ok(msg) = self.api_rx.recv() {
+                match msg {
+                    ApiMsg::SceneBuilderResult(SceneBuilderResult::ExternalEvent(evt)) => {
+                        self.notifier.external_event(evt);
+                    }
+                    ApiMsg::SceneBuilderResult(SceneBuilderResult::FlushComplete(tx)) => {
+                        // If somebody's blocked waiting for a flush, how did they
+                        // trigger the RB thread to shut down? This shouldn't happen
+                        // but handle it gracefully anyway.
+                        debug_assert!(false);
+                        tx.send(()).ok();
+                    }
+                    ApiMsg::SceneBuilderResult(SceneBuilderResult::ShutDown(sender)) => {
+                        info!("Recycling stats: {:?}", self.recycler);
+                        status = RenderBackendStatus::ShutDown(sender);
+                        break;
+                   }
+                    _ => {},
+                }
+            }
+        }
+
+        // Ensure we read everything the scene builder is sending us from
+        // inflight messages, otherwise the scene builder might panic.
+        while let Ok(msg) = self.api_rx.try_recv() {
+            match msg {
+                ApiMsg::SceneBuilderResult(SceneBuilderResult::FlushComplete(tx)) => {
+                    // If somebody's blocked waiting for a flush, how did they
+                    // trigger the RB thread to shut down? This shouldn't happen
+                    // but handle it gracefully anyway.
+                    debug_assert!(false);
+                    tx.send(()).ok();
+                }
+                _ => {},
+            }
+        }
+
+        self.documents.clear();
+
+        self.notifier.shut_down();
+
+        if let Some(ref sampler) = self.sampler {
+            sampler.deregister();
+        }
+
+
+        if let RenderBackendStatus::ShutDown(Some(sender)) = status {
+            let _ = sender.send(());
+        }
+    }
+
+    fn process_transaction(
+        &mut self,
+        mut txns: Vec<Box<BuiltTransaction>>,
+        result_tx: Option<Sender<SceneSwapResult>>,
+        frame_counter: &mut u32,
+    ) -> bool {
+        self.prepare_for_frames();
+        self.maybe_force_nop_documents(
+            frame_counter,
+            |document_id| txns.iter().any(|txn| txn.document_id == document_id));
+
+        let mut built_frame = false;
+        for mut txn in txns.drain(..) {
+           let has_built_scene = txn.built_scene.is_some();
+
+            if let Some(doc) = self.documents.get_mut(&txn.document_id) {
+                doc.removed_pipelines.append(&mut txn.removed_pipelines);
+                doc.view.scene = txn.view;
+                doc.profile.merge(&mut txn.profile);
+
+                doc.frame_stats = if let Some(stats) = &doc.frame_stats {
+                    Some(stats.merge(&txn.frame_stats))
+                } else {
+                    Some(txn.frame_stats)
+                };
+
+                if let Some(updates) = txn.spatial_tree_updates.take() {
+                    doc.spatial_tree.apply_updates(updates);
+                }
+
+                if let Some(built_scene) = txn.built_scene.take() {
+                    doc.new_async_scene_ready(
+                        built_scene,
+                        &mut self.recycler,
+                        &mut self.tile_caches,
+                        &mut self.resource_cache,
+                    );
+                }
+
+                // If there are any additions or removals of clip modes
+                // during the scene build, apply them to the data store now.
+                // This needs to happen before we build the hit tester.
+                if let Some(updates) = txn.interner_updates.take() {
+                    doc.data_stores.apply_updates(updates, &mut doc.profile);
+                }
+
+                // Build the hit tester while the APZ lock is held so that its content
+                // is in sync with the gecko APZ tree.
+                if !doc.hit_tester_is_valid {
+                    doc.rebuild_hit_tester();
+                }
+
+                if let Some(ref tx) = result_tx {
+                    let (resume_tx, resume_rx) = single_msg_channel();
+                    tx.send(SceneSwapResult::Complete(resume_tx)).unwrap();
+                    // Block until the post-swap hook has completed on
+                    // the scene builder thread. We need to do this before
+                    // we can sample from the sampler hook which might happen
+                    // in the update_document call below.
+                    resume_rx.recv().ok();
+                }
+
+                self.resource_cache.add_rasterized_blob_images(
+                    txn.rasterized_blobs.take(),
+                    &mut doc.profile,
+                );
+
+            } else {
+                // The document was removed while we were building it, skip it.
+                // TODO: we might want to just ensure that removed documents are
+                // always forwarded to the scene builder thread to avoid this case.
+                if let Some(ref tx) = result_tx {
+                    tx.send(SceneSwapResult::Aborted).unwrap();
+                }
+                continue;
+            }
+
+            built_frame |= self.update_document(
+                txn.document_id,
+                txn.resource_updates.take(),
+                txn.frame_ops.take(),
+                txn.notifications.take(),
+                txn.render_frame,
+                RenderReasons::SCENE,
+                None,
+                txn.invalidate_rendered_frame,
+                frame_counter,
+                has_built_scene,
+                None,
+            );
+        }
+
+        built_frame
+    }
+
+    fn process_api_msg(
+        &mut self,
+        msg: ApiMsg,
+        frame_counter: &mut u32,
+    ) -> RenderBackendStatus {
+        match msg {
+            ApiMsg::CloneApi(sender) => {
+                assert!(!self.namespace_alloc_by_client);
+                sender.send(Self::next_namespace_id()).unwrap();
+            }
+            ApiMsg::CloneApiByClient(namespace_id) => {
+                assert!(self.namespace_alloc_by_client);
+                debug_assert!(!self.documents.iter().any(|(did, _doc)| did.namespace_id == namespace_id));
+            }
+            ApiMsg::AddDocument(document_id, initial_size) => {
+                let document = Document::new(
+                    document_id,
+                    initial_size,
+                );
+                let old = self.documents.insert(document_id, document);
+                debug_assert!(old.is_none());
+            }
+            ApiMsg::MemoryPressure => {
+                // This is drastic. It will basically flush everything out of the cache,
+                // and the next frame will have to rebuild all of its resources.
+                // We may want to look into something less extreme, but on the other hand this
+                // should only be used in situations where are running low enough on memory
+                // that we risk crashing if we don't do something about it.
+                // The advantage of clearing the cache completely is that it gets rid of any
+                // remaining fragmentation that could have persisted if we kept around the most
+                // recently used resources.
+                self.resource_cache.clear(ClearCache::all());
+
+                self.gpu_cache.clear();
+
+                for (_, doc) in &mut self.documents {
+                    doc.scratch.memory_pressure();
+                    for tile_cache in self.tile_caches.values_mut() {
+                        tile_cache.memory_pressure(&mut self.resource_cache);
+                    }
+                }
+
+                let resource_updates = self.resource_cache.pending_updates();
+                let msg = ResultMsg::UpdateResources {
+                    resource_updates,
+                    memory_pressure: true,
+                };
+                self.result_tx.send(msg).unwrap();
+                self.notifier.wake_up(false);
+            }
+            ApiMsg::ReportMemory(tx) => {
+                self.report_memory(tx);
+            }
+            ApiMsg::DebugCommand(option) => {
+                let msg = match option {
+                    DebugCommand::SetPictureTileSize(tile_size) => {
+                        self.frame_config.tile_size_override = tile_size;
+                        self.update_frame_builder_config();
+
+                        return RenderBackendStatus::Continue;
+                    }
+                    DebugCommand::SetMaximumSurfaceSize(surface_size) => {
+                        self.frame_config.max_surface_override = surface_size;
+                        self.update_frame_builder_config();
+
+                        return RenderBackendStatus::Continue;
+                    }
+                    #[cfg(feature = "capture")]
+                    DebugCommand::SaveCapture(root, bits) => {
+                        let output = self.save_capture(root, bits);
+                        ResultMsg::DebugOutput(output)
+                    },
+                    #[cfg(feature = "capture")]
+                    DebugCommand::StartCaptureSequence(root, bits) => {
+                        self.start_capture_sequence(root, bits);
+                        return RenderBackendStatus::Continue;
+                    },
+                    #[cfg(feature = "capture")]
+                    DebugCommand::StopCaptureSequence => {
+                        self.stop_capture_sequence();
+                        return RenderBackendStatus::Continue;
+                    },
+                    #[cfg(feature = "replay")]
+                    DebugCommand::LoadCapture(path, ids, tx) => {
+                        NEXT_NAMESPACE_ID.fetch_add(1, Ordering::Relaxed);
+                        *frame_counter += 1;
+
+                        let mut config = CaptureConfig::new(path, CaptureBits::all());
+                        if let Some((scene_id, frame_id)) = ids {
+                            config.scene_id = scene_id;
+                            config.frame_id = frame_id;
+                        }
+
+                        self.load_capture(config);
+
+                        for (id, doc) in &self.documents {
+                            let captured = CapturedDocument {
+                                document_id: *id,
+                                root_pipeline_id: doc.loaded_scene.root_pipeline_id,
+                            };
+                            tx.send(captured).unwrap();
+                        }
+
+                        // Note: we can't pass `LoadCapture` here since it needs to arrive
+                        // before the `PublishDocument` messages sent by `load_capture`.
+                        return RenderBackendStatus::Continue;
+                    }
+                    DebugCommand::ClearCaches(mask) => {
+                        self.resource_cache.clear(mask);
+                        return RenderBackendStatus::Continue;
+                    }
+                    DebugCommand::EnableNativeCompositor(enable) => {
+                        // Default CompositorKind should be Native
+                        if let CompositorKind::Draw { .. } = self.default_compositor_kind {
+                            unreachable!();
+                        }
+
+                        let compositor_kind = if enable {
+                            self.default_compositor_kind
+                        } else {
+                            CompositorKind::default()
+                        };
+
+                        for (_, doc) in &mut self.documents {
+                            doc.scene.config.compositor_kind = compositor_kind;
+                            doc.frame_is_valid = false;
+                        }
+
+                        self.frame_config.compositor_kind = compositor_kind;
+                        self.update_frame_builder_config();
+
+                        // We don't want to forward this message to the renderer.
+                        return RenderBackendStatus::Continue;
+                    }
+                    DebugCommand::SetBatchingLookback(count) => {
+                        self.frame_config.batch_lookback_count = count as usize;
+                        self.update_frame_builder_config();
+
+                        return RenderBackendStatus::Continue;
+                    }
+                    DebugCommand::SimulateLongSceneBuild(time_ms) => {
+                        let _ = self.scene_tx.send(SceneBuilderRequest::SimulateLongSceneBuild(time_ms));
+                        return RenderBackendStatus::Continue;
+                    }
+                    DebugCommand::SetFlags(flags) => {
+                        self.resource_cache.set_debug_flags(flags);
+                        self.gpu_cache.set_debug_flags(flags);
+
+                        let force_invalidation = flags.contains(DebugFlags::FORCE_PICTURE_INVALIDATION);
+                        if self.frame_config.force_invalidation != force_invalidation {
+                            self.frame_config.force_invalidation = force_invalidation;
+                            self.update_frame_builder_config();
+                        }
+
+                        // If we're toggling on the GPU cache debug display, we
+                        // need to blow away the cache. This is because we only
+                        // send allocation/free notifications to the renderer
+                        // thread when the debug display is enabled, and thus
+                        // enabling it when the cache is partially populated will
+                        // give the renderer an incomplete view of the world.
+                        // And since we might as well drop all the debugging state
+                        // from the renderer when we disable the debug display,
+                        // we just clear the cache on toggle.
+                        let changed = self.debug_flags ^ flags;
+                        if changed.contains(DebugFlags::GPU_CACHE_DBG) {
+                            self.gpu_cache.clear();
+                        }
+                        self.debug_flags = flags;
+
+                        ResultMsg::DebugCommand(option)
+                    }
+                    _ => ResultMsg::DebugCommand(option),
+                };
+                self.result_tx.send(msg).unwrap();
+                self.notifier.wake_up(true);
+            }
+            ApiMsg::UpdateDocuments(transaction_msgs) => {
+                self.prepare_transactions(
+                    transaction_msgs,
+                    frame_counter,
+                );
+            }
+            ApiMsg::SceneBuilderResult(msg) => {
+                return self.process_scene_builder_result(msg, frame_counter);
+            }
+        }
+
+        RenderBackendStatus::Continue
+    }
+
+    fn process_scene_builder_result(
+        &mut self,
+        msg: SceneBuilderResult,
+        frame_counter: &mut u32,
+    ) -> RenderBackendStatus {
+        profile_scope!("sb_msg");
+
+        match msg {
+            SceneBuilderResult::Transactions(txns, result_tx) => {
+                self.process_transaction(
+                    txns,
+                    result_tx,
+                    frame_counter,
+                );
+                self.bookkeep_after_frames();
+            },
+            #[cfg(feature = "capture")]
+            SceneBuilderResult::CapturedTransactions(txns, capture_config, result_tx) => {
+                if let Some(ref mut old_config) = self.capture_config {
+                    assert!(old_config.scene_id <= capture_config.scene_id);
+                    if old_config.scene_id < capture_config.scene_id {
+                        old_config.scene_id = capture_config.scene_id;
+                        old_config.frame_id = 0;
+                    }
+                } else {
+                    self.capture_config = Some(capture_config);
+                }
+
+                let built_frame = self.process_transaction(
+                    txns,
+                    result_tx,
+                    frame_counter,
+                );
+
+                if built_frame {
+                    self.save_capture_sequence();
+                }
+
+                self.bookkeep_after_frames();
+            },
+            #[cfg(feature = "capture")]
+            SceneBuilderResult::StopCaptureSequence => {
+                self.capture_config = None;
+            }
+            SceneBuilderResult::GetGlyphDimensions(request) => {
+                let mut glyph_dimensions = Vec::with_capacity(request.glyph_indices.len());
+                let instance_key = self.resource_cache.map_font_instance_key(request.key);
+                if let Some(base) = self.resource_cache.get_font_instance(instance_key) {
+                    let font = FontInstance::from_base(Arc::clone(&base));
+                    for glyph_index in &request.glyph_indices {
+                        let glyph_dim = self.resource_cache.get_glyph_dimensions(&font, *glyph_index);
+                        glyph_dimensions.push(glyph_dim);
+                    }
+                }
+                request.sender.send(glyph_dimensions).unwrap();
+            }
+            SceneBuilderResult::GetGlyphIndices(request) => {
+                let mut glyph_indices = Vec::with_capacity(request.text.len());
+                let font_key = self.resource_cache.map_font_key(request.key);
+                for ch in request.text.chars() {
+                    let index = self.resource_cache.get_glyph_index(font_key, ch);
+                    glyph_indices.push(index);
+                }
+                request.sender.send(glyph_indices).unwrap();
+            }
+            SceneBuilderResult::FlushComplete(tx) => {
+                tx.send(()).ok();
+            }
+            SceneBuilderResult::ExternalEvent(evt) => {
+                self.notifier.external_event(evt);
+            }
+            SceneBuilderResult::ClearNamespace(id) => {
+                self.resource_cache.clear_namespace(id);
+                self.documents.retain(|doc_id, _doc| doc_id.namespace_id != id);
+            }
+            SceneBuilderResult::DeleteDocument(document_id) => {
+                self.documents.remove(&document_id);
+            }
+            SceneBuilderResult::SetParameter(param) => {
+                if let Parameter::Bool(BoolParameter::Multithreading, enabled) = param {
+                    self.resource_cache.enable_multithreading(enabled);
+                }
+                let _ = self.result_tx.send(ResultMsg::SetParameter(param));
+            }
+            SceneBuilderResult::StopRenderBackend => {
+                return RenderBackendStatus::StopRenderBackend;
+            }
+            SceneBuilderResult::ShutDown(sender) => {
+                info!("Recycling stats: {:?}", self.recycler);
+                return RenderBackendStatus::ShutDown(sender);
+            }
+        }
+
+        RenderBackendStatus::Continue
+    }
+
+    fn update_frame_builder_config(&self) {
+        self.send_backend_message(
+            SceneBuilderRequest::SetFrameBuilderConfig(
+                self.frame_config.clone()
+            )
+        );
+    }
+
+    fn prepare_for_frames(&mut self) {
+        self.gpu_cache.prepare_for_frames();
+    }
+
+    fn bookkeep_after_frames(&mut self) {
+        self.gpu_cache.bookkeep_after_frames();
+    }
+
+    fn requires_frame_build(&mut self) -> bool {
+        self.gpu_cache.requires_frame_build()
+    }
+
+    fn prepare_transactions(
+        &mut self,
+        txns: Vec<Box<TransactionMsg>>,
+        frame_counter: &mut u32,
+    ) {
+        self.prepare_for_frames();
+        self.maybe_force_nop_documents(
+            frame_counter,
+            |document_id| txns.iter().any(|txn| txn.document_id == document_id));
+
+        let mut built_frame = false;
+        for mut txn in txns {
+            if txn.generate_frame.as_bool() {
+                txn.profile.end_time(profiler::API_SEND_TIME);
+            }
+
+            self.documents.get_mut(&txn.document_id).unwrap().profile.merge(&mut txn.profile);
+
+            built_frame |= self.update_document(
+                txn.document_id,
+                txn.resource_updates.take(),
+                txn.frame_ops.take(),
+                txn.notifications.take(),
+                txn.generate_frame.as_bool(),
+                txn.render_reasons,
+                txn.generate_frame.id(),
+                txn.invalidate_rendered_frame,
+                frame_counter,
+                false,
+                txn.creation_time,
+            );
+        }
+        if built_frame {
+            #[cfg(feature = "capture")]
+            self.save_capture_sequence();
+        }
+        self.bookkeep_after_frames();
+    }
+
+    /// In certain cases, resources shared by multiple documents have to run
+    /// maintenance operations, like cleaning up unused cache items. In those
+    /// cases, we are forced to build frames for all documents, however we
+    /// may not have a transaction ready for every document - this method
+    /// calls update_document with the details of a fake, nop transaction just
+    /// to force a frame build.
+    fn maybe_force_nop_documents<F>(&mut self,
+                                    frame_counter: &mut u32,
+                                    document_already_present: F) where
+        F: Fn(DocumentId) -> bool {
+        if self.requires_frame_build() {
+            let nop_documents : Vec<DocumentId> = self.documents.keys()
+                .cloned()
+                .filter(|key| !document_already_present(*key))
+                .collect();
+            #[allow(unused_variables)]
+            let mut built_frame = false;
+            for &document_id in &nop_documents {
+                built_frame |= self.update_document(
+                    document_id,
+                    Vec::default(),
+                    Vec::default(),
+                    Vec::default(),
+                    false,
+                    RenderReasons::empty(),
+                    None,
+                    false,
+                    frame_counter,
+                    false,
+                    None);
+            }
+            #[cfg(feature = "capture")]
+            match built_frame {
+                true => self.save_capture_sequence(),
+                _ => {},
+            }
+        }
+    }
+
+    fn update_document(
+        &mut self,
+        document_id: DocumentId,
+        resource_updates: Vec<ResourceUpdate>,
+        mut frame_ops: Vec<FrameMsg>,
+        mut notifications: Vec<NotificationRequest>,
+        mut render_frame: bool,
+        render_reasons: RenderReasons,
+        generated_frame_id: Option<u64>,
+        invalidate_rendered_frame: bool,
+        frame_counter: &mut u32,
+        has_built_scene: bool,
+        start_time: Option<u64>
+    ) -> bool {
+        let requested_frame = render_frame;
+
+        let requires_frame_build = self.requires_frame_build();
+        let doc = self.documents.get_mut(&document_id).unwrap();
+
+        // If we have a sampler, get more frame ops from it and add them
+        // to the transaction. This is a hook to allow the WR user code to
+        // fiddle with things after a potentially long scene build, but just
+        // before rendering. This is useful for rendering with the latest
+        // async transforms.
+        if requested_frame {
+            if let Some(ref sampler) = self.sampler {
+                frame_ops.append(&mut sampler.sample(document_id, generated_frame_id));
+            }
+        }
+
+        doc.has_built_scene |= has_built_scene;
+
+        // TODO: this scroll variable doesn't necessarily mean we scrolled. It is only used
+        // for something wrench specific and we should remove it.
+        let mut scroll = false;
+        for frame_msg in frame_ops {
+            let op = doc.process_frame_msg(frame_msg);
+            scroll |= op.scroll;
+        }
+
+        for update in &resource_updates {
+            if let ResourceUpdate::UpdateImage(..) = update {
+                doc.frame_is_valid = false;
+            }
+        }
+
+        self.resource_cache.post_scene_building_update(
+            resource_updates,
+            &mut doc.profile,
+        );
+
+        if doc.dynamic_properties.flush_pending_updates() {
+            doc.frame_is_valid = false;
+            doc.hit_tester_is_valid = false;
+        }
+
+        if !doc.can_render() {
+            // TODO: this happens if we are building the first scene asynchronously and
+            // scroll at the same time. we should keep track of the fact that we skipped
+            // composition here and do it as soon as we receive the scene.
+            render_frame = false;
+        }
+
+        // Avoid re-building the frame if the current built frame is still valid.
+        // However, if the resource_cache requires a frame build, _always_ do that, unless
+        // doc.can_render() is false, as in that case a frame build can't happen anyway.
+        // We want to ensure we do this because even if the doc doesn't have pixels it
+        // can still try to access stale texture cache items.
+        let build_frame = (render_frame && !doc.frame_is_valid && doc.has_pixels()) ||
+            (requires_frame_build && doc.can_render());
+
+        // Request composite is true when we want to composite frame even when
+        // there is no frame update. This happens when video frame is updated under
+        // external image with NativeTexture or when platform requested to composite frame.
+        if invalidate_rendered_frame {
+            doc.rendered_frame_is_valid = false;
+            if doc.scene.config.compositor_kind.should_redraw_on_invalidation() {
+                let msg = ResultMsg::ForceRedraw;
+                self.result_tx.send(msg).unwrap();
+            }
+        }
+
+        if build_frame {
+            if start_time.is_some() {
+              Telemetry::record_time_to_frame_build(Duration::from_nanos(precise_time_ns() - start_time.unwrap()));
+            }
+            profile_scope!("generate frame");
+
+            *frame_counter += 1;
+
+            // borrow ck hack for profile_counters
+            let (pending_update, mut rendered_document) = {
+                let timer_id = Telemetry::start_framebuild_time();
+
+                let frame_stats = doc.frame_stats.take();
+
+                let rendered_document = doc.build_frame(
+                    &mut self.resource_cache,
+                    &mut self.gpu_cache,
+                    self.debug_flags,
+                    &mut self.tile_caches,
+                    frame_stats,
+                    render_reasons,
+                );
+
+                debug!("generated frame for document {:?} with {} passes",
+                    document_id, rendered_document.frame.passes.len());
+
+                let msg = ResultMsg::UpdateGpuCache(self.gpu_cache.extract_updates());
+                self.result_tx.send(msg).unwrap();
+
+                Telemetry::stop_and_accumulate_framebuild_time(timer_id);
+
+                let pending_update = self.resource_cache.pending_updates();
+                (pending_update, rendered_document)
+            };
+
+            // Invalidate dirty rects if the compositing config has changed significantly
+            rendered_document
+                .frame
+                .composite_state
+                .update_dirty_rect_validity(&doc.prev_composite_descriptor);
+
+            // Build a small struct that represents the state of the tiles to be composited.
+            let composite_descriptor = rendered_document
+                .frame
+                .composite_state
+                .descriptor
+                .clone();
+
+            // If there are texture cache updates to apply, or if the produced
+            // frame is not a no-op, or the compositor state has changed,
+            // then we cannot skip compositing this frame.
+            if !pending_update.is_nop() ||
+               !rendered_document.frame.is_nop() ||
+               composite_descriptor != doc.prev_composite_descriptor {
+                doc.rendered_frame_is_valid = false;
+            }
+            doc.prev_composite_descriptor = composite_descriptor;
+
+            #[cfg(feature = "capture")]
+            match self.capture_config {
+                Some(ref mut config) => {
+                    // FIXME(aosmond): document splitting causes multiple prepare frames
+                    config.prepare_frame();
+
+                    if config.bits.contains(CaptureBits::FRAME) {
+                        let file_name = format!("frame-{}-{}", document_id.namespace_id.0, document_id.id);
+                        config.serialize_for_frame(&rendered_document.frame, file_name);
+                    }
+
+                    let data_stores_name = format!("data-stores-{}-{}", document_id.namespace_id.0, document_id.id);
+                    config.serialize_for_frame(&doc.data_stores, data_stores_name);
+
+                    let frame_spatial_tree_name = format!("frame-spatial-tree-{}-{}", document_id.namespace_id.0, document_id.id);
+                    config.serialize_for_frame::<SpatialTree, _>(&doc.spatial_tree, frame_spatial_tree_name);
+
+                    let properties_name = format!("properties-{}-{}", document_id.namespace_id.0, document_id.id);
+                    config.serialize_for_frame(&doc.dynamic_properties, properties_name);
+                },
+                None => {},
+            }
+
+            let msg = ResultMsg::PublishPipelineInfo(doc.updated_pipeline_info());
+            self.result_tx.send(msg).unwrap();
+
+            // Publish the frame
+            let msg = ResultMsg::PublishDocument(
+                document_id,
+                rendered_document,
+                pending_update,
+            );
+            self.result_tx.send(msg).unwrap();
+        } else if requested_frame {
+            // WR-internal optimization to avoid doing a bunch of render work if
+            // there's no pixels. We still want to pretend to render and request
+            // a render to make sure that the callbacks (particularly the
+            // new_frame_ready callback below) has the right flags.
+            let msg = ResultMsg::PublishPipelineInfo(doc.updated_pipeline_info());
+            self.result_tx.send(msg).unwrap();
+        }
+
+        drain_filter(
+            &mut notifications,
+            |n| { n.when() == Checkpoint::FrameBuilt },
+            |n| { n.notify(); },
+        );
+
+        if !notifications.is_empty() {
+            self.result_tx.send(ResultMsg::AppendNotificationRequests(notifications)).unwrap();
+        }
+
+        // Always forward the transaction to the renderer if a frame was requested,
+        // otherwise gecko can get into a state where it waits (forever) for the
+        // transaction to complete before sending new work.
+        if requested_frame {
+            // If rendered frame is already valid, there is no need to render frame.
+            if doc.rendered_frame_is_valid {
+                render_frame = false;
+            } else if render_frame {
+                doc.rendered_frame_is_valid = true;
+            }
+            self.notifier.new_frame_ready(document_id, scroll, render_frame);
+        }
+
+        if !doc.hit_tester_is_valid {
+            doc.rebuild_hit_tester();
+        }
+
+        build_frame
+    }
+
+    fn send_backend_message(&self, msg: SceneBuilderRequest) {
+        self.scene_tx.send(msg).unwrap();
+    }
+
+    fn report_memory(&mut self, tx: Sender<Box<MemoryReport>>) {
+        let mut report = Box::new(MemoryReport::default());
+        let ops = self.size_of_ops.as_mut().unwrap();
+        let op = ops.size_of_op;
+        report.gpu_cache_metadata = self.gpu_cache.size_of(ops);
+        for doc in self.documents.values() {
+            report.clip_stores += doc.scene.clip_store.size_of(ops);
+            report.hit_testers += match &doc.hit_tester {
+                Some(hit_tester) => hit_tester.size_of(ops),
+                None => 0,
+            };
+
+            doc.data_stores.report_memory(ops, &mut report)
+        }
+
+        (*report) += self.resource_cache.report_memory(op);
+        report.texture_cache_structures = self.resource_cache
+            .texture_cache
+            .report_memory(ops);
+
+        // Send a message to report memory on the scene-builder thread, which
+        // will add its report to this one and send the result back to the original
+        // thread waiting on the request.
+        self.send_backend_message(
+            SceneBuilderRequest::ReportMemory(report, tx)
+        );
+    }
+
+    #[cfg(feature = "capture")]
+    fn save_capture_sequence(&mut self) {
+        if let Some(ref mut config) = self.capture_config {
+            let deferred = self.resource_cache.save_capture_sequence(config);
+
+            let backend = PlainRenderBackend {
+                frame_config: self.frame_config.clone(),
+                resource_sequence_id: config.resource_id,
+                documents: self.documents
+                    .iter()
+                    .map(|(id, doc)| (*id, doc.view))
+                    .collect(),
+            };
+            config.serialize_for_frame(&backend, "backend");
+
+            if !deferred.is_empty() {
+                let msg = ResultMsg::DebugOutput(DebugOutput::SaveCapture(config.clone(), deferred));
+                self.result_tx.send(msg).unwrap();
+            }
+        }
+    }
+}
+
+impl RenderBackend {
+    #[cfg(feature = "capture")]
+    // Note: the mutable `self` is only needed here for resolving blob images
+    fn save_capture(
+        &mut self,
+        root: PathBuf,
+        bits: CaptureBits,
+    ) -> DebugOutput {
+        use std::fs;
+        use crate::render_task_graph::dump_render_tasks_as_svg;
+
+        debug!("capture: saving {:?}", root);
+        if !root.is_dir() {
+            if let Err(e) = fs::create_dir_all(&root) {
+                panic!("Unable to create capture dir: {:?}", e);
+            }
+        }
+        let config = CaptureConfig::new(root, bits);
+
+        if config.bits.contains(CaptureBits::FRAME) {
+            self.prepare_for_frames();
+        }
+
+        for (&id, doc) in &mut self.documents {
+            debug!("\tdocument {:?}", id);
+            if config.bits.contains(CaptureBits::FRAME) {
+                // Temporarily force invalidation otherwise the render task graph dump is empty.
+                let force_invalidation = std::mem::replace(&mut doc.scene.config.force_invalidation, true);
+
+                let rendered_document = doc.build_frame(
+                    &mut self.resource_cache,
+                    &mut self.gpu_cache,
+                    self.debug_flags,
+                    &mut self.tile_caches,
+                    None,
+                    RenderReasons::empty(),
+                );
+
+                doc.scene.config.force_invalidation = force_invalidation;
+
+                // After we rendered the frames, there are pending updates to both
+                // GPU cache and resources. Instead of serializing them, we are going to make sure
+                // they are applied on the `Renderer` side.
+                let msg_update_gpu_cache = ResultMsg::UpdateGpuCache(self.gpu_cache.extract_updates());
+                self.result_tx.send(msg_update_gpu_cache).unwrap();
+                //TODO: write down doc's pipeline info?
+                // it has `pipeline_epoch_map`,
+                // which may capture necessary details for some cases.
+                let file_name = format!("frame-{}-{}", id.namespace_id.0, id.id);
+                config.serialize_for_frame(&rendered_document.frame, file_name);
+                let file_name = format!("spatial-{}-{}", id.namespace_id.0, id.id);
+                config.serialize_tree_for_frame(&doc.spatial_tree, file_name);
+                let file_name = format!("built-primitives-{}-{}", id.namespace_id.0, id.id);
+                config.serialize_for_frame(&doc.scene.prim_store, file_name);
+                let file_name = format!("built-clips-{}-{}", id.namespace_id.0, id.id);
+                config.serialize_for_frame(&doc.scene.clip_store, file_name);
+                let file_name = format!("scratch-{}-{}", id.namespace_id.0, id.id);
+                config.serialize_for_frame(&doc.scratch.primitive, file_name);
+                let file_name = format!("render-tasks-{}-{}.svg", id.namespace_id.0, id.id);
+                let mut render_tasks_file = fs::File::create(&config.file_path_for_frame(file_name, "svg"))
+                    .expect("Failed to open the SVG file.");
+                dump_render_tasks_as_svg(
+                    &rendered_document.frame.render_tasks,
+                    &mut render_tasks_file
+                ).unwrap();
+
+                let file_name = format!("texture-cache-color-linear-{}-{}.svg", id.namespace_id.0, id.id);
+                let mut texture_file = fs::File::create(&config.file_path_for_frame(file_name, "svg"))
+                    .expect("Failed to open the SVG file.");
+                self.resource_cache.texture_cache.dump_color8_linear_as_svg(&mut texture_file).unwrap();
+
+                let file_name = format!("texture-cache-color8-glyphs-{}-{}.svg", id.namespace_id.0, id.id);
+                let mut texture_file = fs::File::create(&config.file_path_for_frame(file_name, "svg"))
+                    .expect("Failed to open the SVG file.");
+                self.resource_cache.texture_cache.dump_color8_glyphs_as_svg(&mut texture_file).unwrap();
+
+                let file_name = format!("texture-cache-alpha8-glyphs-{}-{}.svg", id.namespace_id.0, id.id);
+                let mut texture_file = fs::File::create(&config.file_path_for_frame(file_name, "svg"))
+                    .expect("Failed to open the SVG file.");
+                self.resource_cache.texture_cache.dump_alpha8_glyphs_as_svg(&mut texture_file).unwrap();
+
+                let file_name = format!("texture-cache-alpha8-linear-{}-{}.svg", id.namespace_id.0, id.id);
+                let mut texture_file = fs::File::create(&config.file_path_for_frame(file_name, "svg"))
+                    .expect("Failed to open the SVG file.");
+                self.resource_cache.texture_cache.dump_alpha8_linear_as_svg(&mut texture_file).unwrap();
+            }
+
+            let data_stores_name = format!("data-stores-{}-{}", id.namespace_id.0, id.id);
+            config.serialize_for_frame(&doc.data_stores, data_stores_name);
+
+            let frame_spatial_tree_name = format!("frame-spatial-tree-{}-{}", id.namespace_id.0, id.id);
+            config.serialize_for_frame::<SpatialTree, _>(&doc.spatial_tree, frame_spatial_tree_name);
+
+            let properties_name = format!("properties-{}-{}", id.namespace_id.0, id.id);
+            config.serialize_for_frame(&doc.dynamic_properties, properties_name);
+        }
+
+        if config.bits.contains(CaptureBits::FRAME) {
+            // TODO: there is no guarantee that we won't hit this case, but we want to
+            // report it here if we do. If we don't, it will simply crash in
+            // Renderer::render_impl and give us less information about the source.
+            assert!(!self.requires_frame_build(), "Caches were cleared during a capture.");
+            self.bookkeep_after_frames();
+        }
+
+        debug!("\tscene builder");
+        self.send_backend_message(
+            SceneBuilderRequest::SaveScene(config.clone())
+        );
+
+        debug!("\tresource cache");
+        let (resources, deferred) = self.resource_cache.save_capture(&config.root);
+
+        info!("\tbackend");
+        let backend = PlainRenderBackend {
+            frame_config: self.frame_config.clone(),
+            resource_sequence_id: 0,
+            documents: self.documents
+                .iter()
+                .map(|(id, doc)| (*id, doc.view))
+                .collect(),
+        };
+
+        config.serialize_for_frame(&backend, "backend");
+        config.serialize_for_frame(&resources, "plain-resources");
+
+        if config.bits.contains(CaptureBits::FRAME) {
+            let msg_update_resources = ResultMsg::UpdateResources {
+                resource_updates: self.resource_cache.pending_updates(),
+                memory_pressure: false,
+            };
+            self.result_tx.send(msg_update_resources).unwrap();
+            // Save the texture/glyph/image caches.
+            info!("\tresource cache");
+            let caches = self.resource_cache.save_caches(&config.root);
+            config.serialize_for_resource(&caches, "resource_cache");
+            info!("\tgpu cache");
+            config.serialize_for_resource(&self.gpu_cache, "gpu_cache");
+        }
+
+        DebugOutput::SaveCapture(config, deferred)
+    }
+
+    #[cfg(feature = "capture")]
+    fn start_capture_sequence(
+        &mut self,
+        root: PathBuf,
+        bits: CaptureBits,
+    ) {
+        self.send_backend_message(
+            SceneBuilderRequest::StartCaptureSequence(CaptureConfig::new(root, bits))
+        );
+    }
+
+    #[cfg(feature = "capture")]
+    fn stop_capture_sequence(
+        &mut self,
+    ) {
+        self.send_backend_message(
+            SceneBuilderRequest::StopCaptureSequence
+        );
+    }
+
+    #[cfg(feature = "replay")]
+    fn load_capture(
+        &mut self,
+        mut config: CaptureConfig,
+    ) {
+        debug!("capture: loading {:?}", config.frame_root());
+        let backend = config.deserialize_for_frame::<PlainRenderBackend, _>("backend")
+            .expect("Unable to open backend.ron");
+
+        // If this is a capture sequence, then the ID will be non-zero, and won't
+        // match what is loaded, but for still captures, the ID will be zero.
+        let first_load = backend.resource_sequence_id == 0;
+        if self.loaded_resource_sequence_id != backend.resource_sequence_id || first_load {
+            // FIXME(aosmond): We clear the documents because when we update the
+            // resource cache, we actually wipe and reload, because we don't
+            // know what is the same and what has changed. If we were to keep as
+            // much of the resource cache state as possible, we could avoid
+            // flushing the document state (which has its own dependecies on the
+            // cache).
+            //
+            // FIXME(aosmond): If we try to load the next capture in the
+            // sequence too quickly, we may lose resources we depend on in the
+            // current frame. This can cause panics. Ideally we would not
+            // advance to the next frame until the FrameRendered event for all
+            // of the pipelines.
+            self.documents.clear();
+
+            config.resource_id = backend.resource_sequence_id;
+            self.loaded_resource_sequence_id = backend.resource_sequence_id;
+
+            let plain_resources = config.deserialize_for_resource::<PlainResources, _>("plain-resources")
+                .expect("Unable to open plain-resources.ron");
+            let caches_maybe = config.deserialize_for_resource::<PlainCacheOwn, _>("resource_cache");
+
+            // Note: it would be great to have `RenderBackend` to be split
+            // rather explicitly on what's used before and after scene building
+            // so that, for example, we never miss anything in the code below:
+
+            let plain_externals = self.resource_cache.load_capture(
+                plain_resources,
+                caches_maybe,
+                &config,
+            );
+
+            let msg_load = ResultMsg::DebugOutput(
+                DebugOutput::LoadCapture(config.clone(), plain_externals)
+            );
+            self.result_tx.send(msg_load).unwrap();
+
+            self.gpu_cache = match config.deserialize_for_resource::<GpuCache, _>("gpu_cache") {
+                Some(gpu_cache) => gpu_cache,
+                None => GpuCache::new(),
+            };
+        }
+
+        self.frame_config = backend.frame_config;
+
+        let mut scenes_to_build = Vec::new();
+
+        for (id, view) in backend.documents {
+            debug!("\tdocument {:?}", id);
+            let scene_name = format!("scene-{}-{}", id.namespace_id.0, id.id);
+            let scene = config.deserialize_for_scene::<Scene, _>(&scene_name)
+                .expect(&format!("Unable to open {}.ron", scene_name));
+
+            let scene_spatial_tree_name = format!("scene-spatial-tree-{}-{}", id.namespace_id.0, id.id);
+            let scene_spatial_tree = config.deserialize_for_scene::<SceneSpatialTree, _>(&scene_spatial_tree_name)
+                .expect(&format!("Unable to open {}.ron", scene_spatial_tree_name));
+
+            let interners_name = format!("interners-{}-{}", id.namespace_id.0, id.id);
+            let interners = config.deserialize_for_scene::<Interners, _>(&interners_name)
+                .expect(&format!("Unable to open {}.ron", interners_name));
+
+            let data_stores_name = format!("data-stores-{}-{}", id.namespace_id.0, id.id);
+            let data_stores = config.deserialize_for_frame::<DataStores, _>(&data_stores_name)
+                .expect(&format!("Unable to open {}.ron", data_stores_name));
+
+            let properties_name = format!("properties-{}-{}", id.namespace_id.0, id.id);
+            let properties = config.deserialize_for_frame::<SceneProperties, _>(&properties_name)
+                .expect(&format!("Unable to open {}.ron", properties_name));
+
+            let frame_spatial_tree_name = format!("frame-spatial-tree-{}-{}", id.namespace_id.0, id.id);
+            let frame_spatial_tree = config.deserialize_for_frame::<SpatialTree, _>(&frame_spatial_tree_name)
+                .expect(&format!("Unable to open {}.ron", frame_spatial_tree_name));
+
+            // Update the document if it still exists, rather than replace it entirely.
+            // This allows us to preserve state information such as the frame stamp,
+            // which is necessary for cache sanity.
+            match self.documents.entry(id) {
+                Occupied(entry) => {
+                    let doc = entry.into_mut();
+                    doc.view = view;
+                    doc.loaded_scene = scene.clone();
+                    doc.data_stores = data_stores;
+                    doc.spatial_tree = frame_spatial_tree;
+                    doc.dynamic_properties = properties;
+                    doc.frame_is_valid = false;
+                    doc.rendered_frame_is_valid = false;
+                    doc.has_built_scene = false;
+                    doc.hit_tester_is_valid = false;
+                }
+                Vacant(entry) => {
+                    let doc = Document {
+                        id,
+                        scene: BuiltScene::empty(),
+                        removed_pipelines: Vec::new(),
+                        view,
+                        stamp: FrameStamp::first(id),
+                        frame_builder: FrameBuilder::new(),
+                        dynamic_properties: properties,
+                        hit_tester: None,
+                        shared_hit_tester: Arc::new(SharedHitTester::new()),
+                        frame_is_valid: false,
+                        hit_tester_is_valid: false,
+                        rendered_frame_is_valid: false,
+                        has_built_scene: false,
+                        data_stores,
+                        scratch: ScratchBuffer::default(),
+                        spatial_tree: frame_spatial_tree,
+                        loaded_scene: scene.clone(),
+                        prev_composite_descriptor: CompositeDescriptor::empty(),
+                        dirty_rects_are_valid: false,
+                        profile: TransactionProfile::new(),
+                        rg_builder: RenderTaskGraphBuilder::new(),
+                        frame_stats: None,
+                    };
+                    entry.insert(doc);
+                }
+            };
+
+            let frame_name = format!("frame-{}-{}", id.namespace_id.0, id.id);
+            let frame = config.deserialize_for_frame::<Frame, _>(frame_name);
+            let build_frame = match frame {
+                Some(frame) => {
+                    info!("\tloaded a built frame with {} passes", frame.passes.len());
+
+                    let msg_update = ResultMsg::UpdateGpuCache(self.gpu_cache.extract_updates());
+                    self.result_tx.send(msg_update).unwrap();
+
+                    let msg_publish = ResultMsg::PublishDocument(
+                        id,
+                        RenderedDocument {
+                            frame,
+                            is_new_scene: true,
+                            profile: TransactionProfile::new(),
+                            render_reasons: RenderReasons::empty(),
+                            frame_stats: None,
+                        },
+                        self.resource_cache.pending_updates(),
+                    );
+                    self.result_tx.send(msg_publish).unwrap();
+
+                    self.notifier.new_frame_ready(id, false, true);
+
+                    // We deserialized the state of the frame so we don't want to build
+                    // it (but we do want to update the scene builder's state)
+                    false
+                }
+                None => true,
+            };
+
+            scenes_to_build.push(LoadScene {
+                document_id: id,
+                scene,
+                view: view.scene.clone(),
+                config: self.frame_config.clone(),
+                fonts: self.resource_cache.get_fonts(),
+                build_frame,
+                interners,
+                spatial_tree: scene_spatial_tree,
+            });
+        }
+
+        if !scenes_to_build.is_empty() {
+            self.send_backend_message(
+                SceneBuilderRequest::LoadScenes(scenes_to_build)
+            );
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/render_target.rs b/gfx/wr/webrender/src/render_target.rs
new file mode 100644
index 0000000000..71515cd084
--- /dev/null
+++ b/gfx/wr/webrender/src/render_target.rs
@@ -0,0 +1,883 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+use api::units::*;
+use api::{ColorF, ImageFormat, LineOrientation, BorderStyle};
+use crate::batch::{AlphaBatchBuilder, AlphaBatchContainer, BatchTextures};
+use crate::batch::{ClipBatcher, BatchBuilder};
+use crate::command_buffer::CommandBufferList;
+use crate::spatial_tree::SpatialTree;
+use crate::clip::ClipStore;
+use crate::frame_builder::{FrameGlobalResources};
+use crate::gpu_cache::{GpuCache, GpuCacheAddress};
+use crate::gpu_types::{BorderInstance, SvgFilterInstance, BlurDirection, BlurInstance, PrimitiveHeaders, ScalingInstance};
+use crate::gpu_types::{TransformPalette, ZBufferIdGenerator};
+use crate::internal_types::{FastHashMap, TextureSource, CacheTextureId};
+use crate::picture::{SliceId, SurfaceInfo, ResolvedSurfaceTexture, TileCacheInstance};
+use crate::prim_store::{PrimitiveInstance, PrimitiveStore, PrimitiveScratchBuffer};
+use crate::prim_store::gradient::{
+    FastLinearGradientInstance, LinearGradientInstance, RadialGradientInstance,
+    ConicGradientInstance,
+};
+use crate::renderer::{GpuBufferBuilder};
+use crate::render_backend::DataStores;
+use crate::render_task::{RenderTaskKind, RenderTaskAddress};
+use crate::render_task::{RenderTask, ScalingTask, SvgFilterInfo};
+use crate::render_task_graph::{RenderTaskGraph, RenderTaskId};
+use crate::resource_cache::ResourceCache;
+use crate::spatial_tree::SpatialNodeIndex;
+
+
+const STYLE_SOLID: i32 = ((BorderStyle::Solid as i32) << 8) | ((BorderStyle::Solid as i32) << 16);
+const STYLE_MASK: i32 = 0x00FF_FF00;
+
+/// A tag used to identify the output format of a `RenderTarget`.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum RenderTargetKind {
+    Color, // RGBA8
+    Alpha, // R8
+}
+
+/// Identifies a given `RenderTarget` in a `RenderTargetList`.
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTargetIndex(pub usize);
+
+pub struct RenderTargetContext<'a, 'rc> {
+    pub global_device_pixel_scale: DevicePixelScale,
+    pub prim_store: &'a PrimitiveStore,
+    pub resource_cache: &'rc mut ResourceCache,
+    pub use_dual_source_blending: bool,
+    pub use_advanced_blending: bool,
+    pub break_advanced_blend_batches: bool,
+    pub batch_lookback_count: usize,
+    pub spatial_tree: &'a SpatialTree,
+    pub data_stores: &'a DataStores,
+    pub surfaces: &'a [SurfaceInfo],
+    pub scratch: &'a PrimitiveScratchBuffer,
+    pub screen_world_rect: WorldRect,
+    pub globals: &'a FrameGlobalResources,
+    pub tile_caches: &'a FastHashMap<SliceId, Box<TileCacheInstance>>,
+    pub root_spatial_node_index: SpatialNodeIndex,
+}
+
+/// Represents a number of rendering operations on a surface.
+///
+/// In graphics parlance, a "render target" usually means "a surface (texture or
+/// framebuffer) bound to the output of a shader". This trait has a slightly
+/// different meaning, in that it represents the operations on that surface
+/// _before_ it's actually bound and rendered. So a `RenderTarget` is built by
+/// the `RenderBackend` by inserting tasks, and then shipped over to the
+/// `Renderer` where a device surface is resolved and the tasks are transformed
+/// into draw commands on that surface.
+///
+/// We express this as a trait to generalize over color and alpha surfaces.
+/// a given `RenderTask` will draw to one or the other, depending on its type
+/// and sometimes on its parameters. See `RenderTask::target_kind`.
+pub trait RenderTarget {
+    /// Creates a new RenderTarget of the given type.
+    fn new(
+        texture_id: CacheTextureId,
+        screen_size: DeviceIntSize,
+        gpu_supports_fast_clears: bool,
+        used_rect: DeviceIntRect,
+    ) -> Self;
+
+    /// Optional hook to provide additional processing for the target at the
+    /// end of the build phase.
+    fn build(
+        &mut self,
+        _ctx: &mut RenderTargetContext,
+        _gpu_cache: &mut GpuCache,
+        _render_tasks: &RenderTaskGraph,
+        _prim_headers: &mut PrimitiveHeaders,
+        _transforms: &mut TransformPalette,
+        _z_generator: &mut ZBufferIdGenerator,
+        _prim_instances: &[PrimitiveInstance],
+        _cmd_buffers: &CommandBufferList,
+    ) {
+    }
+
+    /// Associates a `RenderTask` with this target. That task must be assigned
+    /// to a region returned by invoking `allocate()` on this target.
+    ///
+    /// TODO(gw): It's a bit odd that we need the deferred resolves and mutable
+    /// GPU cache here. They are typically used by the build step above. They
+    /// are used for the blit jobs to allow resolve_image to be called. It's a
+    /// bit of extra overhead to store the image key here and the resolve them
+    /// in the build step separately.  BUT: if/when we add more texture cache
+    /// target jobs, we might want to tidy this up.
+    fn add_task(
+        &mut self,
+        task_id: RenderTaskId,
+        ctx: &RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskGraph,
+        clip_store: &ClipStore,
+        transforms: &mut TransformPalette,
+    );
+
+    fn needs_depth(&self) -> bool;
+    fn texture_id(&self) -> CacheTextureId;
+}
+
+/// A series of `RenderTarget` instances, serving as the high-level container
+/// into which `RenderTasks` are assigned.
+///
+/// During the build phase, we iterate over the tasks in each `RenderPass`. For
+/// each task, we invoke `allocate()` on the `RenderTargetList`, which in turn
+/// attempts to allocate an output region in the last `RenderTarget` in the
+/// list. If allocation fails (or if the list is empty), a new `RenderTarget` is
+/// created and appended to the list. The build phase then assign the task into
+/// the target associated with the final allocation.
+///
+/// The result is that each `RenderPass` is associated with one or two
+/// `RenderTargetLists`, depending on whether we have all our tasks have the
+/// same `RenderTargetKind`. The lists are then shipped to the `Renderer`, which
+/// allocates a device texture array, with one slice per render target in the
+/// list.
+///
+/// The upshot of this scheme is that it maximizes batching. In a given pass,
+/// we need to do a separate batch for each individual render target. But with
+/// the texture array, we can expose the entirety of the previous pass to each
+/// task in the current pass in a single batch, which generally allows each
+/// task to be drawn in a single batch regardless of how many results from the
+/// previous pass it depends on.
+///
+/// Note that in some cases (like drop-shadows), we can depend on the output of
+/// a pass earlier than the immediately-preceding pass.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTargetList<T> {
+    pub format: ImageFormat,
+    pub targets: Vec<T>,
+}
+
+impl<T: RenderTarget> RenderTargetList<T> {
+    pub fn new(
+        format: ImageFormat,
+    ) -> Self {
+        RenderTargetList {
+            format,
+            targets: Vec::new(),
+        }
+    }
+
+    pub fn build(
+        &mut self,
+        ctx: &mut RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskGraph,
+        prim_headers: &mut PrimitiveHeaders,
+        transforms: &mut TransformPalette,
+        z_generator: &mut ZBufferIdGenerator,
+        prim_instances: &[PrimitiveInstance],
+        cmd_buffers: &CommandBufferList,
+    ) {
+        if self.targets.is_empty() {
+            return;
+        }
+
+        for target in &mut self.targets {
+            target.build(
+                ctx,
+                gpu_cache,
+                render_tasks,
+                prim_headers,
+                transforms,
+                z_generator,
+                prim_instances,
+                cmd_buffers,
+            );
+        }
+    }
+
+    pub fn needs_depth(&self) -> bool {
+        self.targets.iter().any(|target| target.needs_depth())
+    }
+}
+
+
+/// Contains the work (in the form of instance arrays) needed to fill a color
+/// color output surface (RGBA8).
+///
+/// See `RenderTarget`.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ColorRenderTarget {
+    pub alpha_batch_containers: Vec<AlphaBatchContainer>,
+    // List of blur operations to apply for this render target.
+    pub vertical_blurs: FastHashMap<TextureSource, Vec<BlurInstance>>,
+    pub horizontal_blurs: FastHashMap<TextureSource, Vec<BlurInstance>>,
+    pub scalings: FastHashMap<TextureSource, Vec<ScalingInstance>>,
+    pub svg_filters: Vec<(BatchTextures, Vec<SvgFilterInstance>)>,
+    pub blits: Vec<BlitJob>,
+    alpha_tasks: Vec<RenderTaskId>,
+    screen_size: DeviceIntSize,
+    pub texture_id: CacheTextureId,
+    // Track the used rect of the render target, so that
+    // we can set a scissor rect and only clear to the
+    // used portion of the target as an optimization.
+    pub used_rect: DeviceIntRect,
+    pub resolve_ops: Vec<ResolveOp>,
+    pub clear_color: Option<ColorF>,
+}
+
+impl RenderTarget for ColorRenderTarget {
+    fn new(
+        texture_id: CacheTextureId,
+        screen_size: DeviceIntSize,
+        _: bool,
+        used_rect: DeviceIntRect,
+    ) -> Self {
+        ColorRenderTarget {
+            alpha_batch_containers: Vec::new(),
+            vertical_blurs: FastHashMap::default(),
+            horizontal_blurs: FastHashMap::default(),
+            scalings: FastHashMap::default(),
+            svg_filters: Vec::new(),
+            blits: Vec::new(),
+            alpha_tasks: Vec::new(),
+            screen_size,
+            texture_id,
+            used_rect,
+            resolve_ops: Vec::new(),
+            clear_color: Some(ColorF::TRANSPARENT),
+        }
+    }
+
+    fn build(
+        &mut self,
+        ctx: &mut RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskGraph,
+        prim_headers: &mut PrimitiveHeaders,
+        transforms: &mut TransformPalette,
+        z_generator: &mut ZBufferIdGenerator,
+        prim_instances: &[PrimitiveInstance],
+        cmd_buffers: &CommandBufferList,
+    ) {
+        profile_scope!("build");
+        let mut merged_batches = AlphaBatchContainer::new(None);
+        let mut gpu_buffer_builder = GpuBufferBuilder::new();
+
+        for task_id in &self.alpha_tasks {
+            profile_scope!("alpha_task");
+            let task = &render_tasks[*task_id];
+
+            match task.kind {
+                RenderTaskKind::Picture(ref pic_task) => {
+                    let target_rect = task.get_target_rect();
+
+                    let scissor_rect = if pic_task.can_merge {
+                        None
+                    } else {
+                        Some(target_rect)
+                    };
+
+                    if !pic_task.can_use_shared_surface {
+                        self.clear_color = pic_task.clear_color;
+                    }
+
+                    // TODO(gw): The type names of AlphaBatchBuilder and BatchBuilder
+                    //           are still confusing. Once more of the picture caching
+                    //           improvement code lands, the AlphaBatchBuilder and
+                    //           AlphaBatchList types will be collapsed into one, which
+                    //           should simplify coming up with better type names.
+                    let alpha_batch_builder = AlphaBatchBuilder::new(
+                        self.screen_size,
+                        ctx.break_advanced_blend_batches,
+                        ctx.batch_lookback_count,
+                        *task_id,
+                        (*task_id).into(),
+                    );
+
+                    let mut batch_builder = BatchBuilder::new(alpha_batch_builder);
+                    let cmd_buffer = cmd_buffers.get(pic_task.cmd_buffer_index);
+
+                    cmd_buffer.iter_prims(&mut |cmd, spatial_node_index| {
+                        batch_builder.add_prim_to_batch(
+                            cmd,
+                            spatial_node_index,
+                            ctx,
+                            gpu_cache,
+                            render_tasks,
+                            prim_headers,
+                            transforms,
+                            pic_task.raster_spatial_node_index,
+                            pic_task.surface_spatial_node_index,
+                            z_generator,
+                            prim_instances,
+                            &mut gpu_buffer_builder,
+                        );
+                    });
+
+                    let alpha_batch_builder = batch_builder.finalize();
+
+                    alpha_batch_builder.build(
+                        &mut self.alpha_batch_containers,
+                        &mut merged_batches,
+                        target_rect,
+                        scissor_rect,
+                    );
+                }
+                _ => {
+                    unreachable!();
+                }
+            }
+        }
+
+        if !merged_batches.is_empty() {
+            self.alpha_batch_containers.push(merged_batches);
+        }
+    }
+
+    fn texture_id(&self) -> CacheTextureId {
+        self.texture_id
+    }
+
+    fn add_task(
+        &mut self,
+        task_id: RenderTaskId,
+        _ctx: &RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskGraph,
+        _: &ClipStore,
+        _: &mut TransformPalette,
+    ) {
+        profile_scope!("add_task");
+        let task = &render_tasks[task_id];
+
+        match task.kind {
+            RenderTaskKind::VerticalBlur(..) => {
+                add_blur_instances(
+                    &mut self.vertical_blurs,
+                    BlurDirection::Vertical,
+                    task_id.into(),
+                    task.children[0],
+                    render_tasks,
+                );
+            }
+            RenderTaskKind::HorizontalBlur(..) => {
+                add_blur_instances(
+                    &mut self.horizontal_blurs,
+                    BlurDirection::Horizontal,
+                    task_id.into(),
+                    task.children[0],
+                    render_tasks,
+                );
+            }
+            RenderTaskKind::Picture(ref pic_task) => {
+                if let Some(ref resolve_op) = pic_task.resolve_op {
+                    self.resolve_ops.push(resolve_op.clone());
+                }
+                self.alpha_tasks.push(task_id);
+            }
+            RenderTaskKind::SvgFilter(ref task_info) => {
+                add_svg_filter_instances(
+                    &mut self.svg_filters,
+                    render_tasks,
+                    &task_info.info,
+                    task_id,
+                    task.children.get(0).cloned(),
+                    task.children.get(1).cloned(),
+                    task_info.extra_gpu_cache_handle.map(|handle| gpu_cache.get_address(&handle)),
+                )
+            }
+            RenderTaskKind::Image(..) |
+            RenderTaskKind::Cached(..) |
+            RenderTaskKind::ClipRegion(..) |
+            RenderTaskKind::Border(..) |
+            RenderTaskKind::CacheMask(..) |
+            RenderTaskKind::FastLinearGradient(..) |
+            RenderTaskKind::LinearGradient(..) |
+            RenderTaskKind::RadialGradient(..) |
+            RenderTaskKind::ConicGradient(..) |
+            RenderTaskKind::TileComposite(..) |
+            RenderTaskKind::LineDecoration(..) => {
+                panic!("Should not be added to color target!");
+            }
+            RenderTaskKind::Readback(..) => {}
+            RenderTaskKind::Scaling(ref info) => {
+                add_scaling_instances(
+                    info,
+                    &mut self.scalings,
+                    task,
+                    task.children.first().map(|&child| &render_tasks[child]),
+                );
+            }
+            RenderTaskKind::Blit(ref task_info) => {
+                let target_rect = task
+                    .get_target_rect();
+                self.blits.push(BlitJob {
+                    source: task_info.source,
+                    target_rect,
+                });
+            }
+            #[cfg(test)]
+            RenderTaskKind::Test(..) => {}
+        }
+    }
+
+    fn needs_depth(&self) -> bool {
+        self.alpha_batch_containers.iter().any(|ab| {
+            !ab.opaque_batches.is_empty()
+        })
+    }
+}
+
+/// Contains the work (in the form of instance arrays) needed to fill an alpha
+/// output surface (R8).
+///
+/// See `RenderTarget`.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct AlphaRenderTarget {
+    pub clip_batcher: ClipBatcher,
+    // List of blur operations to apply for this render target.
+    pub vertical_blurs: FastHashMap<TextureSource, Vec<BlurInstance>>,
+    pub horizontal_blurs: FastHashMap<TextureSource, Vec<BlurInstance>>,
+    pub scalings: FastHashMap<TextureSource, Vec<ScalingInstance>>,
+    pub zero_clears: Vec<RenderTaskId>,
+    pub one_clears: Vec<RenderTaskId>,
+    pub texture_id: CacheTextureId,
+}
+
+impl RenderTarget for AlphaRenderTarget {
+    fn new(
+        texture_id: CacheTextureId,
+        _: DeviceIntSize,
+        gpu_supports_fast_clears: bool,
+        _: DeviceIntRect,
+    ) -> Self {
+        AlphaRenderTarget {
+            clip_batcher: ClipBatcher::new(gpu_supports_fast_clears),
+            vertical_blurs: FastHashMap::default(),
+            horizontal_blurs: FastHashMap::default(),
+            scalings: FastHashMap::default(),
+            zero_clears: Vec::new(),
+            one_clears: Vec::new(),
+            texture_id,
+        }
+    }
+
+    fn texture_id(&self) -> CacheTextureId {
+        self.texture_id
+    }
+
+    fn add_task(
+        &mut self,
+        task_id: RenderTaskId,
+        ctx: &RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskGraph,
+        clip_store: &ClipStore,
+        transforms: &mut TransformPalette,
+    ) {
+        profile_scope!("add_task");
+        let task = &render_tasks[task_id];
+        let target_rect = task.get_target_rect();
+
+        match task.kind {
+            RenderTaskKind::Image(..) |
+            RenderTaskKind::Cached(..) |
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::Picture(..) |
+            RenderTaskKind::Blit(..) |
+            RenderTaskKind::Border(..) |
+            RenderTaskKind::LineDecoration(..) |
+            RenderTaskKind::FastLinearGradient(..) |
+            RenderTaskKind::LinearGradient(..) |
+            RenderTaskKind::RadialGradient(..) |
+            RenderTaskKind::ConicGradient(..) |
+            RenderTaskKind::TileComposite(..) |
+            RenderTaskKind::SvgFilter(..) => {
+                panic!("BUG: should not be added to alpha target!");
+            }
+            RenderTaskKind::VerticalBlur(..) => {
+                self.zero_clears.push(task_id);
+                add_blur_instances(
+                    &mut self.vertical_blurs,
+                    BlurDirection::Vertical,
+                    task_id.into(),
+                    task.children[0],
+                    render_tasks,
+                );
+            }
+            RenderTaskKind::HorizontalBlur(..) => {
+                self.zero_clears.push(task_id);
+                add_blur_instances(
+                    &mut self.horizontal_blurs,
+                    BlurDirection::Horizontal,
+                    task_id.into(),
+                    task.children[0],
+                    render_tasks,
+                );
+            }
+            RenderTaskKind::CacheMask(ref task_info) => {
+                let clear_to_one = self.clip_batcher.add(
+                    task_info.clip_node_range,
+                    task_info.root_spatial_node_index,
+                    render_tasks,
+                    gpu_cache,
+                    clip_store,
+                    transforms,
+                    task_info.actual_rect,
+                    task_info.device_pixel_scale,
+                    target_rect.min.to_f32(),
+                    task_info.actual_rect.min,
+                    ctx,
+                );
+                if task_info.clear_to_one || clear_to_one {
+                    self.one_clears.push(task_id);
+                }
+            }
+            RenderTaskKind::ClipRegion(ref region_task) => {
+                if region_task.clear_to_one {
+                    self.one_clears.push(task_id);
+                }
+                let device_rect = DeviceRect::from_size(
+                    target_rect.size().to_f32(),
+                );
+                self.clip_batcher.add_clip_region(
+                    region_task.local_pos,
+                    device_rect,
+                    region_task.clip_data.clone(),
+                    target_rect.min.to_f32(),
+                    DevicePoint::zero(),
+                    region_task.device_pixel_scale.0,
+                );
+            }
+            RenderTaskKind::Scaling(ref info) => {
+                add_scaling_instances(
+                    info,
+                    &mut self.scalings,
+                    task,
+                    task.children.first().map(|&child| &render_tasks[child]),
+                );
+            }
+            #[cfg(test)]
+            RenderTaskKind::Test(..) => {}
+        }
+    }
+
+    fn needs_depth(&self) -> bool {
+        false
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, PartialEq, Clone)]
+pub struct ResolveOp {
+    pub src_task_ids: Vec<RenderTaskId>,
+    pub dest_task_id: RenderTaskId,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum PictureCacheTargetKind {
+    Draw {
+        alpha_batch_container: AlphaBatchContainer,
+    },
+    Blit {
+        task_id: RenderTaskId,
+        sub_rect_offset: DeviceIntVector2D,
+    },
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PictureCacheTarget {
+    pub surface: ResolvedSurfaceTexture,
+    pub kind: PictureCacheTargetKind,
+    pub clear_color: Option<ColorF>,
+    pub dirty_rect: DeviceIntRect,
+    pub valid_rect: DeviceIntRect,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct TextureCacheRenderTarget {
+    pub target_kind: RenderTargetKind,
+    pub horizontal_blurs: FastHashMap<TextureSource, Vec<BlurInstance>>,
+    pub blits: Vec<BlitJob>,
+    pub border_segments_complex: Vec<BorderInstance>,
+    pub border_segments_solid: Vec<BorderInstance>,
+    pub clears: Vec<DeviceIntRect>,
+    pub line_decorations: Vec<LineDecorationJob>,
+    pub fast_linear_gradients: Vec<FastLinearGradientInstance>,
+    pub linear_gradients: Vec<LinearGradientInstance>,
+    pub radial_gradients: Vec<RadialGradientInstance>,
+    pub conic_gradients: Vec<ConicGradientInstance>,
+}
+
+impl TextureCacheRenderTarget {
+    pub fn new(target_kind: RenderTargetKind) -> Self {
+        TextureCacheRenderTarget {
+            target_kind,
+            horizontal_blurs: FastHashMap::default(),
+            blits: vec![],
+            border_segments_complex: vec![],
+            border_segments_solid: vec![],
+            clears: vec![],
+            line_decorations: vec![],
+            fast_linear_gradients: vec![],
+            linear_gradients: vec![],
+            radial_gradients: vec![],
+            conic_gradients: vec![],
+        }
+    }
+
+    pub fn add_task(
+        &mut self,
+        task_id: RenderTaskId,
+        render_tasks: &RenderTaskGraph,
+    ) {
+        profile_scope!("add_task");
+        let task_address = task_id.into();
+
+        let task = &render_tasks[task_id];
+        let target_rect = task.get_target_rect();
+
+        match task.kind {
+            RenderTaskKind::LineDecoration(ref info) => {
+                self.clears.push(target_rect);
+
+                self.line_decorations.push(LineDecorationJob {
+                    task_rect: target_rect.to_f32(),
+                    local_size: info.local_size,
+                    style: info.style as i32,
+                    axis_select: match info.orientation {
+                        LineOrientation::Horizontal => 0.0,
+                        LineOrientation::Vertical => 1.0,
+                    },
+                    wavy_line_thickness: info.wavy_line_thickness,
+                });
+            }
+            RenderTaskKind::HorizontalBlur(..) => {
+                add_blur_instances(
+                    &mut self.horizontal_blurs,
+                    BlurDirection::Horizontal,
+                    task_address,
+                    task.children[0],
+                    render_tasks,
+                );
+            }
+            RenderTaskKind::Blit(ref task_info) => {
+                // Add a blit job to copy from an existing render
+                // task to this target.
+                self.blits.push(BlitJob {
+                    source: task_info.source,
+                    target_rect,
+                });
+            }
+            RenderTaskKind::Border(ref task_info) => {
+                self.clears.push(target_rect);
+
+                let task_origin = target_rect.min.to_f32();
+                // TODO(gw): Clone here instead of a move of this vec, since the frame
+                //           graph is immutable by this point. It's rare that borders
+                //           are drawn since they are persisted in the texture cache,
+                //           but perhaps this could be improved in future.
+                let instances = task_info.instances.clone();
+                for mut instance in instances {
+                    // TODO(gw): It may be better to store the task origin in
+                    //           the render task data instead of per instance.
+                    instance.task_origin = task_origin;
+                    if instance.flags & STYLE_MASK == STYLE_SOLID {
+                        self.border_segments_solid.push(instance);
+                    } else {
+                        self.border_segments_complex.push(instance);
+                    }
+                }
+            }
+            RenderTaskKind::FastLinearGradient(ref task_info) => {
+                self.fast_linear_gradients.push(task_info.to_instance(&target_rect));
+            }
+            RenderTaskKind::LinearGradient(ref task_info) => {
+                self.linear_gradients.push(task_info.to_instance(&target_rect));
+            }
+            RenderTaskKind::RadialGradient(ref task_info) => {
+                self.radial_gradients.push(task_info.to_instance(&target_rect));
+            }
+            RenderTaskKind::ConicGradient(ref task_info) => {
+                self.conic_gradients.push(task_info.to_instance(&target_rect));
+            }
+            RenderTaskKind::Image(..) |
+            RenderTaskKind::Cached(..) |
+            RenderTaskKind::VerticalBlur(..) |
+            RenderTaskKind::Picture(..) |
+            RenderTaskKind::ClipRegion(..) |
+            RenderTaskKind::CacheMask(..) |
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::Scaling(..) |
+            RenderTaskKind::TileComposite(..) |
+            RenderTaskKind::SvgFilter(..) => {
+                panic!("BUG: unexpected task kind for texture cache target");
+            }
+            #[cfg(test)]
+            RenderTaskKind::Test(..) => {}
+        }
+    }
+}
+
+fn add_blur_instances(
+    instances: &mut FastHashMap<TextureSource, Vec<BlurInstance>>,
+    blur_direction: BlurDirection,
+    task_address: RenderTaskAddress,
+    src_task_id: RenderTaskId,
+    render_tasks: &RenderTaskGraph,
+) {
+    let source = render_tasks[src_task_id].get_texture_source();
+
+    let instance = BlurInstance {
+        task_address,
+        src_task_address: src_task_id.into(),
+        blur_direction,
+    };
+
+    instances
+        .entry(source)
+        .or_insert(Vec::new())
+        .push(instance);
+}
+
+fn add_scaling_instances(
+    task: &ScalingTask,
+    instances: &mut FastHashMap<TextureSource, Vec<ScalingInstance>>,
+    target_task: &RenderTask,
+    source_task: Option<&RenderTask>,
+) {
+    let target_rect = target_task
+        .get_target_rect()
+        .inner_box(task.padding)
+        .to_f32();
+
+    let source = source_task.unwrap().get_texture_source();
+
+    let source_rect = source_task.unwrap().get_target_rect().to_f32();
+
+    instances
+        .entry(source)
+        .or_insert(Vec::new())
+        .push(ScalingInstance {
+            target_rect,
+            source_rect,
+        });
+}
+
+fn add_svg_filter_instances(
+    instances: &mut Vec<(BatchTextures, Vec<SvgFilterInstance>)>,
+    render_tasks: &RenderTaskGraph,
+    filter: &SvgFilterInfo,
+    task_id: RenderTaskId,
+    input_1_task: Option<RenderTaskId>,
+    input_2_task: Option<RenderTaskId>,
+    extra_data_address: Option<GpuCacheAddress>,
+) {
+    let mut textures = BatchTextures::empty();
+
+    if let Some(id) = input_1_task {
+        textures.input.colors[0] = render_tasks[id].get_texture_source();
+    }
+
+    if let Some(id) = input_2_task {
+        textures.input.colors[1] = render_tasks[id].get_texture_source();
+    }
+
+    let kind = match filter {
+        SvgFilterInfo::Blend(..) => 0,
+        SvgFilterInfo::Flood(..) => 1,
+        SvgFilterInfo::LinearToSrgb => 2,
+        SvgFilterInfo::SrgbToLinear => 3,
+        SvgFilterInfo::Opacity(..) => 4,
+        SvgFilterInfo::ColorMatrix(..) => 5,
+        SvgFilterInfo::DropShadow(..) => 6,
+        SvgFilterInfo::Offset(..) => 7,
+        SvgFilterInfo::ComponentTransfer(..) => 8,
+        SvgFilterInfo::Identity => 9,
+        SvgFilterInfo::Composite(..) => 10,
+    };
+
+    let input_count = match filter {
+        SvgFilterInfo::Flood(..) => 0,
+
+        SvgFilterInfo::LinearToSrgb |
+        SvgFilterInfo::SrgbToLinear |
+        SvgFilterInfo::Opacity(..) |
+        SvgFilterInfo::ColorMatrix(..) |
+        SvgFilterInfo::Offset(..) |
+        SvgFilterInfo::ComponentTransfer(..) |
+        SvgFilterInfo::Identity => 1,
+
+        // Not techincally a 2 input filter, but we have 2 inputs here: original content & blurred content.
+        SvgFilterInfo::DropShadow(..) |
+        SvgFilterInfo::Blend(..) |
+        SvgFilterInfo::Composite(..) => 2,
+    };
+
+    let generic_int = match filter {
+        SvgFilterInfo::Blend(mode) => *mode as u16,
+        SvgFilterInfo::ComponentTransfer(data) =>
+            (data.r_func.to_int() << 12 |
+             data.g_func.to_int() << 8 |
+             data.b_func.to_int() << 4 |
+             data.a_func.to_int()) as u16,
+        SvgFilterInfo::Composite(operator) =>
+            operator.as_int() as u16,
+        SvgFilterInfo::LinearToSrgb |
+        SvgFilterInfo::SrgbToLinear |
+        SvgFilterInfo::Flood(..) |
+        SvgFilterInfo::Opacity(..) |
+        SvgFilterInfo::ColorMatrix(..) |
+        SvgFilterInfo::DropShadow(..) |
+        SvgFilterInfo::Offset(..) |
+        SvgFilterInfo::Identity => 0,
+    };
+
+    let instance = SvgFilterInstance {
+        task_address: task_id.into(),
+        input_1_task_address: input_1_task.map(|id| id.into()).unwrap_or(RenderTaskAddress(0)),
+        input_2_task_address: input_2_task.map(|id| id.into()).unwrap_or(RenderTaskAddress(0)),
+        kind,
+        input_count,
+        generic_int,
+        extra_data_address: extra_data_address.unwrap_or(GpuCacheAddress::INVALID),
+    };
+
+    for (ref mut batch_textures, ref mut batch) in instances.iter_mut() {
+        if let Some(combined_textures) = batch_textures.combine_textures(textures) {
+            batch.push(instance);
+            // Update the batch textures to the newly combined batch textures
+            *batch_textures = combined_textures;
+            return;
+        }
+    }
+
+    instances.push((textures, vec![instance]));
+}
+
+// Information required to do a blit from a source to a target.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BlitJob {
+    pub source: RenderTaskId,
+    pub target_rect: DeviceIntRect,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone, Debug)]
+pub struct LineDecorationJob {
+    pub task_rect: DeviceRect,
+    pub local_size: LayoutSize,
+    pub wavy_line_thickness: f32,
+    pub style: i32,
+    pub axis_select: f32,
+}
diff --git a/gfx/wr/webrender/src/render_task.rs b/gfx/wr/webrender/src/render_task.rs
new file mode 100644
index 0000000000..cfc3b4375f
--- /dev/null
+++ b/gfx/wr/webrender/src/render_task.rs
@@ -0,0 +1,1512 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{CompositeOperator, FilterPrimitive, FilterPrimitiveInput, FilterPrimitiveKind};
+use api::{LineStyle, LineOrientation, ClipMode, MixBlendMode, ColorF, ColorSpace};
+use api::MAX_RENDER_TASK_SIZE;
+use api::units::*;
+use crate::clip::{ClipDataStore, ClipItemKind, ClipStore, ClipNodeRange};
+use crate::command_buffer::CommandBufferIndex;
+use crate::spatial_tree::SpatialNodeIndex;
+use crate::filterdata::SFilterData;
+use crate::frame_builder::{FrameBuilderConfig};
+use crate::gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
+use crate::gpu_types::{BorderInstance, ImageSource, UvRectKind};
+use crate::internal_types::{CacheTextureId, FastHashMap, TextureSource, Swizzle};
+use crate::picture::ResolvedSurfaceTexture;
+use crate::prim_store::ClipData;
+use crate::prim_store::gradient::{
+    FastLinearGradientTask, RadialGradientTask,
+    ConicGradientTask, LinearGradientTask,
+};
+use crate::resource_cache::{ResourceCache, ImageRequest};
+use std::{usize, f32, i32, u32};
+use crate::renderer::GpuBufferBuilder;
+use crate::render_target::{ResolveOp, RenderTargetKind};
+use crate::render_task_graph::{PassId, RenderTaskId, RenderTaskGraphBuilder};
+use crate::render_task_cache::{RenderTaskCacheEntryHandle, RenderTaskCacheKey, RenderTaskCacheKeyKind, RenderTaskParent};
+use crate::surface::SurfaceBuilder;
+use smallvec::SmallVec;
+
+const FLOATS_PER_RENDER_TASK_INFO: usize = 8;
+pub const MAX_BLUR_STD_DEVIATION: f32 = 4.0;
+pub const MIN_DOWNSCALING_RT_SIZE: i32 = 8;
+
+fn render_task_sanity_check(size: &DeviceIntSize) {
+    if size.width > MAX_RENDER_TASK_SIZE ||
+        size.height > MAX_RENDER_TASK_SIZE {
+        error!("Attempting to create a render task of size {}x{}", size.width, size.height);
+        panic!();
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTaskAddress(pub u16);
+
+impl Into<RenderTaskAddress> for RenderTaskId {
+    fn into(self) -> RenderTaskAddress {
+        RenderTaskAddress(self.index as u16)
+    }
+}
+
+/// A render task location that targets a persistent output buffer which
+/// will be retained over multiple frames.
+#[derive(Clone, Debug, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum StaticRenderTaskSurface {
+    /// The output of the `RenderTask` will be persisted beyond this frame, and
+    /// thus should be drawn into the `TextureCache`.
+    TextureCache {
+        /// Which texture in the texture cache should be drawn into.
+        texture: CacheTextureId,
+        /// What format this texture cache surface is
+        target_kind: RenderTargetKind,
+    },
+    /// Only used as a source for render tasks, can be any texture including an
+    /// external one.
+    ReadOnly {
+        source: TextureSource,
+    },
+    /// This render task will be drawn to a picture cache texture that is
+    /// persisted between both frames and scenes, if the content remains valid.
+    PictureCache {
+        /// Describes either a WR texture or a native OS compositor target
+        surface: ResolvedSurfaceTexture,
+    },
+}
+
+/// Identifies the output buffer location for a given `RenderTask`.
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum RenderTaskLocation {
+    // Towards the beginning of the frame, most task locations are typically not
+    // known yet, in which case they are set to one of the following variants:
+
+    /// A dynamic task that has not yet been allocated a texture and rect.
+    Unallocated {
+        /// Requested size of this render task
+        size: DeviceIntSize,
+    },
+    /// Will be replaced by a Static location after the texture cache update.
+    CacheRequest {
+        size: DeviceIntSize,
+    },
+    /// Same allocation as an existing task deeper in the dependency graph
+    Existing {
+        parent_task_id: RenderTaskId,
+        /// Requested size of this render task
+        size: DeviceIntSize,
+    },
+
+    // Before batching begins, we expect that locations have been resolved to
+    // one of the following variants:
+
+    /// The `RenderTask` should be drawn to a target provided by the atlas
+    /// allocator. This is the most common case.
+    Dynamic {
+        /// Texture that this task was allocated to render on
+        texture_id: CacheTextureId,
+        /// Rectangle in the texture this task occupies
+        rect: DeviceIntRect,
+    },
+    /// A task that is output to a persistent / retained target.
+    Static {
+        /// Target to draw to
+        surface: StaticRenderTaskSurface,
+        /// Rectangle in the texture this task occupies
+        rect: DeviceIntRect,
+    },
+}
+
+impl RenderTaskLocation {
+    /// Returns true if this is a dynamic location.
+    pub fn is_dynamic(&self) -> bool {
+        match *self {
+            RenderTaskLocation::Dynamic { .. } => true,
+            _ => false,
+        }
+    }
+
+    pub fn size(&self) -> DeviceIntSize {
+        match self {
+            RenderTaskLocation::Unallocated { size } => *size,
+            RenderTaskLocation::Dynamic { rect, .. } => rect.size(),
+            RenderTaskLocation::Static { rect, .. } => rect.size(),
+            RenderTaskLocation::CacheRequest { size } => *size,
+            RenderTaskLocation::Existing { size, .. } => *size,
+        }
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CachedTask {
+    pub target_kind: RenderTargetKind,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CacheMaskTask {
+    pub actual_rect: DeviceRect,
+    pub root_spatial_node_index: SpatialNodeIndex,
+    pub clip_node_range: ClipNodeRange,
+    pub device_pixel_scale: DevicePixelScale,
+    pub clear_to_one: bool,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ClipRegionTask {
+    pub local_pos: LayoutPoint,
+    pub device_pixel_scale: DevicePixelScale,
+    pub clip_data: ClipData,
+    pub clear_to_one: bool,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct TileCompositeTask {
+    pub clear_color: ColorF,
+    pub scissor_rect: DeviceIntRect,
+    pub valid_rect: DeviceIntRect,
+    pub task_id: Option<RenderTaskId>,
+    pub sub_rect_offset: DeviceIntVector2D,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PictureTask {
+    pub can_merge: bool,
+    pub content_origin: DevicePoint,
+    pub surface_spatial_node_index: SpatialNodeIndex,
+    pub raster_spatial_node_index: SpatialNodeIndex,
+    pub device_pixel_scale: DevicePixelScale,
+    pub clear_color: Option<ColorF>,
+    pub scissor_rect: Option<DeviceIntRect>,
+    pub valid_rect: Option<DeviceIntRect>,
+    pub cmd_buffer_index: CommandBufferIndex,
+    pub resolve_op: Option<ResolveOp>,
+
+    pub can_use_shared_surface: bool,
+}
+
+impl PictureTask {
+    /// Copy an existing picture task, but set a new command buffer for it to build in to.
+    /// Used for pictures that are split between render tasks (e.g. pre/post a backdrop
+    /// filter). Subsequent picture tasks never have a clear color as they are by definition
+    /// going to write to an existing target
+    pub fn duplicate(
+        &self,
+        cmd_buffer_index: CommandBufferIndex,
+    ) -> Self {
+        assert_eq!(self.resolve_op, None);
+
+        PictureTask {
+            clear_color: None,
+            cmd_buffer_index,
+            resolve_op: None,
+            can_use_shared_surface: false,
+            ..*self
+        }
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BlurTask {
+    pub blur_std_deviation: f32,
+    pub target_kind: RenderTargetKind,
+    pub blur_region: DeviceIntSize,
+}
+
+impl BlurTask {
+    // In order to do the blur down-scaling passes without introducing errors, we need the
+    // source of each down-scale pass to be a multuple of two. If need be, this inflates
+    // the source size so that each down-scale pass will sample correctly.
+    pub fn adjusted_blur_source_size(original_size: DeviceSize, mut std_dev: DeviceSize) -> DeviceIntSize {
+        let mut adjusted_size = original_size;
+        let mut scale_factor = 1.0;
+        while std_dev.width > MAX_BLUR_STD_DEVIATION && std_dev.height > MAX_BLUR_STD_DEVIATION {
+            if adjusted_size.width < MIN_DOWNSCALING_RT_SIZE as f32 ||
+               adjusted_size.height < MIN_DOWNSCALING_RT_SIZE as f32 {
+                break;
+            }
+            std_dev = std_dev * 0.5;
+            scale_factor *= 2.0;
+            adjusted_size = (original_size.to_f32() / scale_factor).ceil();
+        }
+
+        (adjusted_size * scale_factor).round().to_i32()
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ScalingTask {
+    pub target_kind: RenderTargetKind,
+    pub padding: DeviceIntSideOffsets,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BorderTask {
+    pub instances: Vec<BorderInstance>,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct BlitTask {
+    pub source: RenderTaskId,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct LineDecorationTask {
+    pub wavy_line_thickness: f32,
+    pub style: LineStyle,
+    pub orientation: LineOrientation,
+    pub local_size: LayoutSize,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum SvgFilterInfo {
+    Blend(MixBlendMode),
+    Flood(ColorF),
+    LinearToSrgb,
+    SrgbToLinear,
+    Opacity(f32),
+    ColorMatrix(Box<[f32; 20]>),
+    DropShadow(ColorF),
+    Offset(DeviceVector2D),
+    ComponentTransfer(SFilterData),
+    Composite(CompositeOperator),
+    // TODO: This is used as a hack to ensure that a blur task's input is always in the blur's previous pass.
+    Identity,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SvgFilterTask {
+    pub info: SvgFilterInfo,
+    pub extra_gpu_cache_handle: Option<GpuCacheHandle>,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ReadbackTask {
+    // The offset of the rect that needs to be read back, in the
+    // device space of the surface that will be read back from.
+    // If this is None, there is no readback surface available
+    // and this is a dummy (empty) readback.
+    pub readback_origin: Option<DevicePoint>,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTaskData {
+    pub data: [f32; FLOATS_PER_RENDER_TASK_INFO],
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum RenderTaskKind {
+    Image(ImageRequest),
+    Cached(CachedTask),
+    Picture(PictureTask),
+    CacheMask(CacheMaskTask),
+    ClipRegion(ClipRegionTask),
+    VerticalBlur(BlurTask),
+    HorizontalBlur(BlurTask),
+    Readback(ReadbackTask),
+    Scaling(ScalingTask),
+    Blit(BlitTask),
+    Border(BorderTask),
+    LineDecoration(LineDecorationTask),
+    FastLinearGradient(FastLinearGradientTask),
+    LinearGradient(LinearGradientTask),
+    RadialGradient(RadialGradientTask),
+    ConicGradient(ConicGradientTask),
+    SvgFilter(SvgFilterTask),
+    TileComposite(TileCompositeTask),
+    #[cfg(test)]
+    Test(RenderTargetKind),
+}
+
+impl RenderTaskKind {
+    pub fn is_a_rendering_operation(&self) -> bool {
+        match self {
+            &RenderTaskKind::Image(..) => false,
+            &RenderTaskKind::Cached(..) => false,
+            _ => true,
+        }
+    }
+
+    /// Whether this task can be allocated on a shared render target surface
+    pub fn can_use_shared_surface(&self) -> bool {
+        match self {
+            &RenderTaskKind::Picture(ref info) => info.can_use_shared_surface,
+            _ => true,
+        }
+    }
+
+    pub fn should_advance_pass(&self) -> bool {
+        match self {
+            &RenderTaskKind::Image(..) => false,
+            &RenderTaskKind::Cached(..) => false,
+            _ => true,
+        }
+    }
+
+    pub fn as_str(&self) -> &'static str {
+        match *self {
+            RenderTaskKind::Image(..) => "Image",
+            RenderTaskKind::Cached(..) => "Cached",
+            RenderTaskKind::Picture(..) => "Picture",
+            RenderTaskKind::CacheMask(..) => "CacheMask",
+            RenderTaskKind::ClipRegion(..) => "ClipRegion",
+            RenderTaskKind::VerticalBlur(..) => "VerticalBlur",
+            RenderTaskKind::HorizontalBlur(..) => "HorizontalBlur",
+            RenderTaskKind::Readback(..) => "Readback",
+            RenderTaskKind::Scaling(..) => "Scaling",
+            RenderTaskKind::Blit(..) => "Blit",
+            RenderTaskKind::Border(..) => "Border",
+            RenderTaskKind::LineDecoration(..) => "LineDecoration",
+            RenderTaskKind::FastLinearGradient(..) => "FastLinearGradient",
+            RenderTaskKind::LinearGradient(..) => "LinearGradient",
+            RenderTaskKind::RadialGradient(..) => "RadialGradient",
+            RenderTaskKind::ConicGradient(..) => "ConicGradient",
+            RenderTaskKind::SvgFilter(..) => "SvgFilter",
+            RenderTaskKind::TileComposite(..) => "TileComposite",
+            #[cfg(test)]
+            RenderTaskKind::Test(..) => "Test",
+        }
+    }
+
+    pub fn target_kind(&self) -> RenderTargetKind {
+        match *self {
+            RenderTaskKind::Image(..) |
+            RenderTaskKind::LineDecoration(..) |
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::Border(..) |
+            RenderTaskKind::FastLinearGradient(..) |
+            RenderTaskKind::LinearGradient(..) |
+            RenderTaskKind::RadialGradient(..) |
+            RenderTaskKind::ConicGradient(..) |
+            RenderTaskKind::Picture(..) |
+            RenderTaskKind::Blit(..) |
+            RenderTaskKind::TileComposite(..) |
+            RenderTaskKind::SvgFilter(..) => {
+                RenderTargetKind::Color
+            }
+
+            RenderTaskKind::ClipRegion(..) |
+            RenderTaskKind::CacheMask(..) => {
+                RenderTargetKind::Alpha
+            }
+
+            RenderTaskKind::VerticalBlur(ref task_info) |
+            RenderTaskKind::HorizontalBlur(ref task_info) => {
+                task_info.target_kind
+            }
+
+            RenderTaskKind::Scaling(ref task_info) => {
+                task_info.target_kind
+            }
+
+            RenderTaskKind::Cached(ref task_info) => {
+                task_info.target_kind
+            }
+
+            #[cfg(test)]
+            RenderTaskKind::Test(kind) => kind,
+        }
+    }
+
+    pub fn new_tile_composite(
+        sub_rect_offset: DeviceIntVector2D,
+        scissor_rect: DeviceIntRect,
+        valid_rect: DeviceIntRect,
+        clear_color: ColorF,
+    ) -> Self {
+        RenderTaskKind::TileComposite(TileCompositeTask {
+            task_id: None,
+            sub_rect_offset,
+            scissor_rect,
+            valid_rect,
+            clear_color,
+        })
+    }
+
+    pub fn new_picture(
+        size: DeviceIntSize,
+        needs_scissor_rect: bool,
+        content_origin: DevicePoint,
+        surface_spatial_node_index: SpatialNodeIndex,
+        raster_spatial_node_index: SpatialNodeIndex,
+        device_pixel_scale: DevicePixelScale,
+        scissor_rect: Option<DeviceIntRect>,
+        valid_rect: Option<DeviceIntRect>,
+        clear_color: Option<ColorF>,
+        cmd_buffer_index: CommandBufferIndex,
+        can_use_shared_surface: bool,
+    ) -> Self {
+        render_task_sanity_check(&size);
+
+        RenderTaskKind::Picture(PictureTask {
+            content_origin,
+            can_merge: !needs_scissor_rect,
+            surface_spatial_node_index,
+            raster_spatial_node_index,
+            device_pixel_scale,
+            scissor_rect,
+            valid_rect,
+            clear_color,
+            cmd_buffer_index,
+            resolve_op: None,
+            can_use_shared_surface,
+        })
+    }
+
+    pub fn new_readback(
+        readback_origin: Option<DevicePoint>,
+    ) -> Self {
+        RenderTaskKind::Readback(
+            ReadbackTask {
+                readback_origin,
+            }
+        )
+    }
+
+    pub fn new_line_decoration(
+        style: LineStyle,
+        orientation: LineOrientation,
+        wavy_line_thickness: f32,
+        local_size: LayoutSize,
+    ) -> Self {
+        RenderTaskKind::LineDecoration(LineDecorationTask {
+            style,
+            orientation,
+            wavy_line_thickness,
+            local_size,
+        })
+    }
+
+    pub fn new_border_segment(
+        instances: Vec<BorderInstance>,
+    ) -> Self {
+        RenderTaskKind::Border(BorderTask {
+            instances,
+        })
+    }
+
+    pub fn new_rounded_rect_mask(
+        local_pos: LayoutPoint,
+        clip_data: ClipData,
+        device_pixel_scale: DevicePixelScale,
+        fb_config: &FrameBuilderConfig,
+    ) -> Self {
+        RenderTaskKind::ClipRegion(ClipRegionTask {
+            local_pos,
+            device_pixel_scale,
+            clip_data,
+            clear_to_one: fb_config.gpu_supports_fast_clears,
+        })
+    }
+
+    pub fn new_mask(
+        outer_rect: DeviceRect,
+        clip_node_range: ClipNodeRange,
+        root_spatial_node_index: SpatialNodeIndex,
+        clip_store: &mut ClipStore,
+        gpu_cache: &mut GpuCache,
+        gpu_buffer_builder: &mut GpuBufferBuilder,
+        resource_cache: &mut ResourceCache,
+        rg_builder: &mut RenderTaskGraphBuilder,
+        clip_data_store: &mut ClipDataStore,
+        device_pixel_scale: DevicePixelScale,
+        fb_config: &FrameBuilderConfig,
+        surface_builder: &mut SurfaceBuilder,
+    ) -> RenderTaskId {
+        // Step through the clip sources that make up this mask. If we find
+        // any box-shadow clip sources, request that image from the render
+        // task cache. This allows the blurred box-shadow rect to be cached
+        // in the texture cache across frames.
+        // TODO(gw): Consider moving this logic outside this function, especially
+        //           as we add more clip sources that depend on render tasks.
+        // TODO(gw): If this ever shows up in a profile, we could pre-calculate
+        //           whether a ClipSources contains any box-shadows and skip
+        //           this iteration for the majority of cases.
+        let task_size = outer_rect.size().to_i32();
+
+        // If we have a potentially tiled clip mask, clear the mask area first. Otherwise,
+        // the first (primary) clip mask will overwrite all the clip mask pixels with
+        // blending disabled to set to the initial value.
+
+        let clip_task_id = rg_builder.add().init(
+            RenderTask::new_dynamic(
+                task_size,
+                RenderTaskKind::CacheMask(CacheMaskTask {
+                    actual_rect: outer_rect,
+                    clip_node_range,
+                    root_spatial_node_index,
+                    device_pixel_scale,
+                    clear_to_one: fb_config.gpu_supports_fast_clears,
+                }),
+            )
+        );
+
+        for i in 0 .. clip_node_range.count {
+            let clip_instance = clip_store.get_instance_from_range(&clip_node_range, i);
+            let clip_node = &mut clip_data_store[clip_instance.handle];
+            match clip_node.item.kind {
+                ClipItemKind::BoxShadow { ref mut source } => {
+                    let (cache_size, cache_key) = source.cache_key
+                        .as_ref()
+                        .expect("bug: no cache key set")
+                        .clone();
+                    let blur_radius_dp = cache_key.blur_radius_dp as f32;
+                    let device_pixel_scale = DevicePixelScale::new(cache_key.device_pixel_scale.to_f32_px());
+
+                    // Request a cacheable render task with a blurred, minimal
+                    // sized box-shadow rect.
+                    source.render_task = Some(resource_cache.request_render_task(
+                        RenderTaskCacheKey {
+                            size: cache_size,
+                            kind: RenderTaskCacheKeyKind::BoxShadow(cache_key),
+                        },
+                        gpu_cache,
+                        gpu_buffer_builder,
+                        rg_builder,
+                        None,
+                        false,
+                        RenderTaskParent::RenderTask(clip_task_id),
+                        surface_builder,
+                        |rg_builder, _| {
+                            let clip_data = ClipData::rounded_rect(
+                                source.minimal_shadow_rect.size(),
+                                &source.shadow_radius,
+                                ClipMode::Clip,
+                            );
+
+                            // Draw the rounded rect.
+                            let mask_task_id = rg_builder.add().init(RenderTask::new_dynamic(
+                                cache_size,
+                                RenderTaskKind::new_rounded_rect_mask(
+                                    source.minimal_shadow_rect.min,
+                                    clip_data,
+                                    device_pixel_scale,
+                                    fb_config,
+                                ),
+                            ));
+
+                            // Blur it
+                            RenderTask::new_blur(
+                                DeviceSize::new(blur_radius_dp, blur_radius_dp),
+                                mask_task_id,
+                                rg_builder,
+                                RenderTargetKind::Alpha,
+                                None,
+                                cache_size,
+                            )
+                        }
+                    ));
+                }
+                ClipItemKind::Rectangle { .. } |
+                ClipItemKind::RoundedRectangle { .. } |
+                ClipItemKind::Image { .. } => {}
+            }
+        }
+
+        clip_task_id
+    }
+
+    // Write (up to) 8 floats of data specific to the type
+    // of render task that is provided to the GPU shaders
+    // via a vertex texture.
+    pub fn write_task_data(
+        &self,
+        target_rect: DeviceIntRect,
+    ) -> RenderTaskData {
+        // NOTE: The ordering and layout of these structures are
+        //       required to match both the GPU structures declared
+        //       in prim_shared.glsl, and also the uses in submit_batch()
+        //       in renderer.rs.
+        // TODO(gw): Maybe there's a way to make this stuff a bit
+        //           more type-safe. Although, it will always need
+        //           to be kept in sync with the GLSL code anyway.
+
+        let data = match self {
+            RenderTaskKind::Picture(ref task) => {
+                // Note: has to match `PICTURE_TYPE_*` in shaders
+                [
+                    task.device_pixel_scale.0,
+                    task.content_origin.x,
+                    task.content_origin.y,
+                    0.0,
+                ]
+            }
+            RenderTaskKind::CacheMask(ref task) => {
+                [
+                    task.device_pixel_scale.0,
+                    task.actual_rect.min.x,
+                    task.actual_rect.min.y,
+                    0.0,
+                ]
+            }
+            RenderTaskKind::ClipRegion(ref task) => {
+                [
+                    task.device_pixel_scale.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                ]
+            }
+            RenderTaskKind::VerticalBlur(ref task) |
+            RenderTaskKind::HorizontalBlur(ref task) => {
+                [
+                    task.blur_std_deviation,
+                    task.blur_region.width as f32,
+                    task.blur_region.height as f32,
+                    0.0,
+                ]
+            }
+            RenderTaskKind::Image(..) |
+            RenderTaskKind::Cached(..) |
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::Scaling(..) |
+            RenderTaskKind::Border(..) |
+            RenderTaskKind::LineDecoration(..) |
+            RenderTaskKind::FastLinearGradient(..) |
+            RenderTaskKind::LinearGradient(..) |
+            RenderTaskKind::RadialGradient(..) |
+            RenderTaskKind::ConicGradient(..) |
+            RenderTaskKind::TileComposite(..) |
+            RenderTaskKind::Blit(..) => {
+                [0.0; 4]
+            }
+
+            RenderTaskKind::SvgFilter(ref task) => {
+                match task.info {
+                    SvgFilterInfo::Opacity(opacity) => [opacity, 0.0, 0.0, 0.0],
+                    SvgFilterInfo::Offset(offset) => [offset.x, offset.y, 0.0, 0.0],
+                    _ => [0.0; 4]
+                }
+            }
+
+            #[cfg(test)]
+            RenderTaskKind::Test(..) => {
+                [0.0; 4]
+            }
+        };
+
+        RenderTaskData {
+            data: [
+                target_rect.min.x as f32,
+                target_rect.min.y as f32,
+                target_rect.max.x as f32,
+                target_rect.max.y as f32,
+                data[0],
+                data[1],
+                data[2],
+                data[3],
+            ]
+        }
+    }
+
+    pub fn write_gpu_blocks(
+        &mut self,
+        gpu_cache: &mut GpuCache,
+    ) {
+        if let RenderTaskKind::SvgFilter(ref mut filter_task) = self {
+            match filter_task.info {
+                SvgFilterInfo::ColorMatrix(ref matrix) => {
+                    let handle = filter_task.extra_gpu_cache_handle.get_or_insert_with(GpuCacheHandle::new);
+                    if let Some(mut request) = gpu_cache.request(handle) {
+                        for i in 0..5 {
+                            request.push([matrix[i*4], matrix[i*4+1], matrix[i*4+2], matrix[i*4+3]]);
+                        }
+                    }
+                }
+                SvgFilterInfo::DropShadow(color) |
+                SvgFilterInfo::Flood(color) => {
+                    let handle = filter_task.extra_gpu_cache_handle.get_or_insert_with(GpuCacheHandle::new);
+                    if let Some(mut request) = gpu_cache.request(handle) {
+                        request.push(color.to_array());
+                    }
+                }
+                SvgFilterInfo::ComponentTransfer(ref data) => {
+                    let handle = filter_task.extra_gpu_cache_handle.get_or_insert_with(GpuCacheHandle::new);
+                    if let Some(request) = gpu_cache.request(handle) {
+                        data.update(request);
+                    }
+                }
+                SvgFilterInfo::Composite(ref operator) => {
+                    if let CompositeOperator::Arithmetic(k_vals) = operator {
+                        let handle = filter_task.extra_gpu_cache_handle.get_or_insert_with(GpuCacheHandle::new);
+                        if let Some(mut request) = gpu_cache.request(handle) {
+                            request.push(*k_vals);
+                        }
+                    }
+                }
+                _ => {},
+            }
+        }
+    }
+}
+
+/// In order to avoid duplicating the down-scaling and blur passes when a picture has several blurs,
+/// we use a local (primitive-level) cache of the render tasks generated for a single shadowed primitive
+/// in a single frame.
+pub type BlurTaskCache = FastHashMap<BlurTaskKey, RenderTaskId>;
+
+/// Since we only use it within a single primitive, the key only needs to contain the down-scaling level
+/// and the blur std deviation.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum BlurTaskKey {
+    DownScale(u32),
+    Blur { downscale_level: u32, stddev_x: u32, stddev_y: u32 },
+}
+
+impl BlurTaskKey {
+    fn downscale_and_blur(downscale_level: u32, blur_stddev: DeviceSize) -> Self {
+        // Quantise the std deviations and store it as integers to work around
+        // Eq and Hash's f32 allergy.
+        // The blur radius is rounded before RenderTask::new_blur so we don't need
+        // a lot of precision.
+        const QUANTIZATION_FACTOR: f32 = 1024.0;
+        let stddev_x = (blur_stddev.width * QUANTIZATION_FACTOR) as u32;
+        let stddev_y = (blur_stddev.height * QUANTIZATION_FACTOR) as u32;
+        BlurTaskKey::Blur { downscale_level, stddev_x, stddev_y }
+    }
+}
+
+// The majority of render tasks have 0, 1 or 2 dependencies, except for pictures that
+// typically have dozens to hundreds of dependencies. SmallVec with 2 inline elements
+// avoids many tiny heap allocations in pages with a lot of text shadows and other
+// types of render tasks.
+pub type TaskDependencies = SmallVec<[RenderTaskId;2]>;
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTask {
+    pub location: RenderTaskLocation,
+    pub children: TaskDependencies,
+    pub kind: RenderTaskKind,
+
+    // TODO(gw): These fields and perhaps others can become private once the
+    //           frame_graph / render_task source files are unified / cleaned up.
+    pub free_after: PassId,
+    pub render_on: PassId,
+
+    /// The gpu cache handle for the render task's destination rect.
+    ///
+    /// Will be set to None if the render task is cached, in which case the texture cache
+    /// manages the handle.
+    pub uv_rect_handle: GpuCacheHandle,
+    pub cache_handle: Option<RenderTaskCacheEntryHandle>,
+    uv_rect_kind: UvRectKind,
+}
+
+impl RenderTask {
+    pub fn new(
+        location: RenderTaskLocation,
+        kind: RenderTaskKind,
+    ) -> Self {
+        render_task_sanity_check(&location.size());
+
+        RenderTask {
+            location,
+            children: TaskDependencies::new(),
+            kind,
+            free_after: PassId::MAX,
+            render_on: PassId::MIN,
+            uv_rect_handle: GpuCacheHandle::new(),
+            uv_rect_kind: UvRectKind::Rect,
+            cache_handle: None,
+        }
+    }
+
+    pub fn new_dynamic(
+        size: DeviceIntSize,
+        kind: RenderTaskKind,
+    ) -> Self {
+        RenderTask::new(
+            RenderTaskLocation::Unallocated { size },
+            kind,
+        )
+    }
+
+    pub fn with_uv_rect_kind(mut self, uv_rect_kind: UvRectKind) -> Self {
+        self.uv_rect_kind = uv_rect_kind;
+        self
+    }
+
+    pub fn new_image(
+        size: DeviceIntSize,
+        request: ImageRequest,
+    ) -> Self {
+        // Note: this is a special constructor for image render tasks that does not
+        // do the render task size sanity check. This is because with SWGL we purposefully
+        // avoid tiling large images. There is no upload with SWGL so whatever was
+        // successfully allocated earlier will be what shaders read, regardless of the size
+        // and copying into tiles would only slow things down.
+        // As a result we can run into very large images being added to the frame graph
+        // (this is covered by a few reftests on the CI).
+
+        RenderTask {
+            location: RenderTaskLocation::CacheRequest { size, },
+            children: TaskDependencies::new(),
+            kind: RenderTaskKind::Image(request),
+            free_after: PassId::MAX,
+            render_on: PassId::MIN,
+            uv_rect_handle: GpuCacheHandle::new(),
+            uv_rect_kind: UvRectKind::Rect,
+            cache_handle: None,
+        }
+    }
+
+
+    #[cfg(test)]
+    pub fn new_test(
+        location: RenderTaskLocation,
+        target: RenderTargetKind,
+    ) -> Self {
+        RenderTask {
+            location,
+            children: TaskDependencies::new(),
+            kind: RenderTaskKind::Test(target),
+            free_after: PassId::MAX,
+            render_on: PassId::MIN,
+            uv_rect_handle: GpuCacheHandle::new(),
+            uv_rect_kind: UvRectKind::Rect,
+            cache_handle: None,
+        }
+    }
+
+    pub fn new_blit(
+        size: DeviceIntSize,
+        source: RenderTaskId,
+        rg_builder: &mut RenderTaskGraphBuilder,
+    ) -> RenderTaskId {
+        // If this blit uses a render task as a source,
+        // ensure it's added as a child task. This will
+        // ensure it gets allocated in the correct pass
+        // and made available as an input when this task
+        // executes.
+
+        let blit_task_id = rg_builder.add().init(RenderTask::new_dynamic(
+            size,
+            RenderTaskKind::Blit(BlitTask { source }),
+        ));
+
+        rg_builder.add_dependency(blit_task_id, source);
+
+        blit_task_id
+    }
+
+    // Construct a render task to apply a blur to a primitive.
+    // The render task chain that is constructed looks like:
+    //
+    //    PrimitiveCacheTask: Draw the primitives.
+    //           ^
+    //           |
+    //    DownscalingTask(s): Each downscaling task reduces the size of render target to
+    //           ^            half. Also reduce the std deviation to half until the std
+    //           |            deviation less than 4.0.
+    //           |
+    //           |
+    //    VerticalBlurTask: Apply the separable vertical blur to the primitive.
+    //           ^
+    //           |
+    //    HorizontalBlurTask: Apply the separable horizontal blur to the vertical blur.
+    //           |
+    //           +---- This is stored as the input task to the primitive shader.
+    //
+    pub fn new_blur(
+        blur_std_deviation: DeviceSize,
+        src_task_id: RenderTaskId,
+        rg_builder: &mut RenderTaskGraphBuilder,
+        target_kind: RenderTargetKind,
+        mut blur_cache: Option<&mut BlurTaskCache>,
+        blur_region: DeviceIntSize,
+    ) -> RenderTaskId {
+        // Adjust large std deviation value.
+        let mut adjusted_blur_std_deviation = blur_std_deviation;
+        let (blur_target_size, uv_rect_kind) = {
+            let src_task = rg_builder.get_task(src_task_id);
+            (src_task.location.size(), src_task.uv_rect_kind())
+        };
+        let mut adjusted_blur_target_size = blur_target_size;
+        let mut downscaling_src_task_id = src_task_id;
+        let mut scale_factor = 1.0;
+        let mut n_downscales = 1;
+        while adjusted_blur_std_deviation.width > MAX_BLUR_STD_DEVIATION &&
+              adjusted_blur_std_deviation.height > MAX_BLUR_STD_DEVIATION {
+            if adjusted_blur_target_size.width < MIN_DOWNSCALING_RT_SIZE ||
+               adjusted_blur_target_size.height < MIN_DOWNSCALING_RT_SIZE {
+                break;
+            }
+            adjusted_blur_std_deviation = adjusted_blur_std_deviation * 0.5;
+            scale_factor *= 2.0;
+            adjusted_blur_target_size = (blur_target_size.to_f32() / scale_factor).to_i32();
+
+            let cached_task = match blur_cache {
+                Some(ref mut cache) => cache.get(&BlurTaskKey::DownScale(n_downscales)).cloned(),
+                None => None,
+            };
+
+            downscaling_src_task_id = cached_task.unwrap_or_else(|| {
+                RenderTask::new_scaling(
+                    downscaling_src_task_id,
+                    rg_builder,
+                    target_kind,
+                    adjusted_blur_target_size,
+                )
+            });
+
+            if let Some(ref mut cache) = blur_cache {
+                cache.insert(BlurTaskKey::DownScale(n_downscales), downscaling_src_task_id);
+            }
+
+            n_downscales += 1;
+        }
+
+
+        let blur_key = BlurTaskKey::downscale_and_blur(n_downscales, adjusted_blur_std_deviation);
+
+        let cached_task = match blur_cache {
+            Some(ref mut cache) => cache.get(&blur_key).cloned(),
+            None => None,
+        };
+
+        let blur_region = blur_region / (scale_factor as i32);
+
+        let blur_task_id = cached_task.unwrap_or_else(|| {
+            let blur_task_v = rg_builder.add().init(RenderTask::new_dynamic(
+                adjusted_blur_target_size,
+                RenderTaskKind::VerticalBlur(BlurTask {
+                    blur_std_deviation: adjusted_blur_std_deviation.height,
+                    target_kind,
+                    blur_region,
+                }),
+            ).with_uv_rect_kind(uv_rect_kind));
+            rg_builder.add_dependency(blur_task_v, downscaling_src_task_id);
+
+            let task_id = rg_builder.add().init(RenderTask::new_dynamic(
+                adjusted_blur_target_size,
+                RenderTaskKind::HorizontalBlur(BlurTask {
+                    blur_std_deviation: adjusted_blur_std_deviation.width,
+                    target_kind,
+                    blur_region,
+                }),
+            ).with_uv_rect_kind(uv_rect_kind));
+            rg_builder.add_dependency(task_id, blur_task_v);
+
+            task_id
+        });
+
+        if let Some(ref mut cache) = blur_cache {
+            cache.insert(blur_key, blur_task_id);
+        }
+
+        blur_task_id
+    }
+
+    pub fn new_scaling(
+        src_task_id: RenderTaskId,
+        rg_builder: &mut RenderTaskGraphBuilder,
+        target_kind: RenderTargetKind,
+        size: DeviceIntSize,
+    ) -> RenderTaskId {
+        Self::new_scaling_with_padding(
+            src_task_id,
+            rg_builder,
+            target_kind,
+            size,
+            DeviceIntSideOffsets::zero(),
+        )
+    }
+
+    pub fn new_scaling_with_padding(
+        source: RenderTaskId,
+        rg_builder: &mut RenderTaskGraphBuilder,
+        target_kind: RenderTargetKind,
+        padded_size: DeviceIntSize,
+        padding: DeviceIntSideOffsets,
+    ) -> RenderTaskId {
+        let uv_rect_kind = rg_builder.get_task(source).uv_rect_kind();
+
+        let task_id = rg_builder.add().init(
+            RenderTask::new_dynamic(
+                padded_size,
+                RenderTaskKind::Scaling(ScalingTask {
+                    target_kind,
+                    padding,
+                }),
+            ).with_uv_rect_kind(uv_rect_kind)
+        );
+
+        rg_builder.add_dependency(task_id, source);
+
+        task_id
+    }
+
+    pub fn new_svg_filter(
+        filter_primitives: &[FilterPrimitive],
+        filter_datas: &[SFilterData],
+        rg_builder: &mut RenderTaskGraphBuilder,
+        content_size: DeviceIntSize,
+        uv_rect_kind: UvRectKind,
+        original_task_id: RenderTaskId,
+        device_pixel_scale: DevicePixelScale,
+    ) -> RenderTaskId {
+
+        if filter_primitives.is_empty() {
+            return original_task_id;
+        }
+
+        // Resolves the input to a filter primitive
+        let get_task_input = |
+            input: &FilterPrimitiveInput,
+            filter_primitives: &[FilterPrimitive],
+            rg_builder: &mut RenderTaskGraphBuilder,
+            cur_index: usize,
+            outputs: &[RenderTaskId],
+            original: RenderTaskId,
+            color_space: ColorSpace,
+        | {
+            // TODO(cbrewster): Not sure we can assume that the original input is sRGB.
+            let (mut task_id, input_color_space) = match input.to_index(cur_index) {
+                Some(index) => (outputs[index], filter_primitives[index].color_space),
+                None => (original, ColorSpace::Srgb),
+            };
+
+            match (input_color_space, color_space) {
+                (ColorSpace::Srgb, ColorSpace::LinearRgb) => {
+                    task_id = RenderTask::new_svg_filter_primitive(
+                        smallvec![task_id],
+                        content_size,
+                        uv_rect_kind,
+                        SvgFilterInfo::SrgbToLinear,
+                        rg_builder,
+                    );
+                },
+                (ColorSpace::LinearRgb, ColorSpace::Srgb) => {
+                    task_id = RenderTask::new_svg_filter_primitive(
+                        smallvec![task_id],
+                        content_size,
+                        uv_rect_kind,
+                        SvgFilterInfo::LinearToSrgb,
+                        rg_builder,
+                    );
+                },
+                _ => {},
+            }
+
+            task_id
+        };
+
+        let mut outputs = vec![];
+        let mut cur_filter_data = 0;
+        for (cur_index, primitive) in filter_primitives.iter().enumerate() {
+            let render_task_id = match primitive.kind {
+                FilterPrimitiveKind::Identity(ref identity) => {
+                    // Identity does not create a task, it provides its input's render task
+                    get_task_input(
+                        &identity.input,
+                        filter_primitives,
+                        rg_builder,
+                        cur_index,
+                        &outputs,
+                        original_task_id,
+                        primitive.color_space
+                    )
+                }
+                FilterPrimitiveKind::Blend(ref blend) => {
+                    let input_1_task_id = get_task_input(
+                        &blend.input1,
+                        filter_primitives,
+                        rg_builder,
+                        cur_index,
+                        &outputs,
+                        original_task_id,
+                        primitive.color_space
+                    );
+                    let input_2_task_id = get_task_input(
+                        &blend.input2,
+                        filter_primitives,
+                        rg_builder,
+                        cur_index,
+                        &outputs,
+                        original_task_id,
+                        primitive.color_space
+                    );
+
+                    RenderTask::new_svg_filter_primitive(
+                        smallvec![input_1_task_id, input_2_task_id],
+                        content_size,
+                        uv_rect_kind,
+                        SvgFilterInfo::Blend(blend.mode),
+                        rg_builder,
+                    )
+                },
+                FilterPrimitiveKind::Flood(ref flood) => {
+                    RenderTask::new_svg_filter_primitive(
+                        smallvec![],
+                        content_size,
+                        uv_rect_kind,
+                        SvgFilterInfo::Flood(flood.color),
+                        rg_builder,
+                    )
+                }
+                FilterPrimitiveKind::Blur(ref blur) => {
+                    let width_std_deviation = blur.width * device_pixel_scale.0;
+                    let height_std_deviation = blur.height * device_pixel_scale.0;
+                    let input_task_id = get_task_input(
+                        &blur.input,
+                        filter_primitives,
+                        rg_builder,
+                        cur_index,
+                        &outputs,
+                        original_task_id,
+                        primitive.color_space
+                    );
+
+                    RenderTask::new_blur(
+                        DeviceSize::new(width_std_deviation, height_std_deviation),
+                        // TODO: This is a hack to ensure that a blur task's input is always
+                        // in the blur's previous pass.
+                        RenderTask::new_svg_filter_primitive(
+                            smallvec![input_task_id],
+                            content_size,
+                            uv_rect_kind,
+                            SvgFilterInfo::Identity,
+                            rg_builder,
+                        ),
+                        rg_builder,
+                        RenderTargetKind::Color,
+                        None,
+                        content_size,
+                    )
+                }
+                FilterPrimitiveKind::Opacity(ref opacity) => {
+                    let input_task_id = get_task_input(
+                        &opacity.input,
+                        filter_primitives,
+                        rg_builder,
+                        cur_index,
+                        &outputs,
+                        original_task_id,
+                        primitive.color_space
+                    );
+
+                    RenderTask::new_svg_filter_primitive(
+                        smallvec![input_task_id],
+                        content_size,
+                        uv_rect_kind,
+                        SvgFilterInfo::Opacity(opacity.opacity),
+                        rg_builder,
+                    )
+                }
+                FilterPrimitiveKind::ColorMatrix(ref color_matrix) => {
+                    let input_task_id = get_task_input(
+                        &color_matrix.input,
+                        filter_primitives,
+                        rg_builder,
+                        cur_index,
+                        &outputs,
+                        original_task_id,
+                        primitive.color_space
+                    );
+
+                    RenderTask::new_svg_filter_primitive(
+                        smallvec![input_task_id],
+                        content_size,
+                        uv_rect_kind,
+                        SvgFilterInfo::ColorMatrix(Box::new(color_matrix.matrix)),
+                        rg_builder,
+                    )
+                }
+                FilterPrimitiveKind::DropShadow(ref drop_shadow) => {
+                    let input_task_id = get_task_input(
+                        &drop_shadow.input,
+                        filter_primitives,
+                        rg_builder,
+                        cur_index,
+                        &outputs,
+                        original_task_id,
+                        primitive.color_space
+                    );
+
+                    let blur_std_deviation = drop_shadow.shadow.blur_radius * device_pixel_scale.0;
+                    let offset = drop_shadow.shadow.offset * LayoutToWorldScale::new(1.0) * device_pixel_scale;
+
+                    let offset_task_id = RenderTask::new_svg_filter_primitive(
+                        smallvec![input_task_id],
+                        content_size,
+                        uv_rect_kind,
+                        SvgFilterInfo::Offset(offset),
+                        rg_builder,
+                    );
+
+                    let blur_task_id = RenderTask::new_blur(
+                        DeviceSize::new(blur_std_deviation, blur_std_deviation),
+                        offset_task_id,
+                        rg_builder,
+                        RenderTargetKind::Color,
+                        None,
+                        content_size,
+                    );
+
+                    RenderTask::new_svg_filter_primitive(
+                        smallvec![input_task_id, blur_task_id],
+                        content_size,
+                        uv_rect_kind,
+                        SvgFilterInfo::DropShadow(drop_shadow.shadow.color),
+                        rg_builder,
+                    )
+                }
+                FilterPrimitiveKind::ComponentTransfer(ref component_transfer) => {
+                    let input_task_id = get_task_input(
+                        &component_transfer.input,
+                        filter_primitives,
+                        rg_builder,
+                        cur_index,
+                        &outputs,
+                        original_task_id,
+                        primitive.color_space
+                    );
+
+                    let filter_data = &filter_datas[cur_filter_data];
+                    cur_filter_data += 1;
+                    if filter_data.is_identity() {
+                        input_task_id
+                    } else {
+                        RenderTask::new_svg_filter_primitive(
+                            smallvec![input_task_id],
+                            content_size,
+                            uv_rect_kind,
+                            SvgFilterInfo::ComponentTransfer(filter_data.clone()),
+                            rg_builder,
+                        )
+                    }
+                }
+                FilterPrimitiveKind::Offset(ref info) => {
+                    let input_task_id = get_task_input(
+                        &info.input,
+                        filter_primitives,
+                        rg_builder,
+                        cur_index,
+                        &outputs,
+                        original_task_id,
+                        primitive.color_space
+                    );
+
+                    let offset = info.offset * LayoutToWorldScale::new(1.0) * device_pixel_scale;
+                    RenderTask::new_svg_filter_primitive(
+                        smallvec![input_task_id],
+                        content_size,
+                        uv_rect_kind,
+                        SvgFilterInfo::Offset(offset),
+                        rg_builder,
+                    )
+                }
+                FilterPrimitiveKind::Composite(info) => {
+                    let input_1_task_id = get_task_input(
+                        &info.input1,
+                        filter_primitives,
+                        rg_builder,
+                        cur_index,
+                        &outputs,
+                        original_task_id,
+                        primitive.color_space
+                    );
+                    let input_2_task_id = get_task_input(
+                        &info.input2,
+                        filter_primitives,
+                        rg_builder,
+                        cur_index,
+                        &outputs,
+                        original_task_id,
+                        primitive.color_space
+                    );
+
+                    RenderTask::new_svg_filter_primitive(
+                        smallvec![input_1_task_id, input_2_task_id],
+                        content_size,
+                        uv_rect_kind,
+                        SvgFilterInfo::Composite(info.operator),
+                        rg_builder,
+                    )
+                }
+            };
+            outputs.push(render_task_id);
+        }
+
+        // The output of a filter is the output of the last primitive in the chain.
+        let mut render_task_id = *outputs.last().unwrap();
+
+        // Convert to sRGB if needed
+        if filter_primitives.last().unwrap().color_space == ColorSpace::LinearRgb {
+            render_task_id = RenderTask::new_svg_filter_primitive(
+                smallvec![render_task_id],
+                content_size,
+                uv_rect_kind,
+                SvgFilterInfo::LinearToSrgb,
+                rg_builder,
+            );
+        }
+
+        render_task_id
+    }
+
+    pub fn new_svg_filter_primitive(
+        tasks: TaskDependencies,
+        target_size: DeviceIntSize,
+        uv_rect_kind: UvRectKind,
+        info: SvgFilterInfo,
+        rg_builder: &mut RenderTaskGraphBuilder,
+    ) -> RenderTaskId {
+        let task_id = rg_builder.add().init(RenderTask::new_dynamic(
+            target_size,
+            RenderTaskKind::SvgFilter(SvgFilterTask {
+                extra_gpu_cache_handle: None,
+                info,
+            }),
+        ).with_uv_rect_kind(uv_rect_kind));
+
+        for child_id in tasks {
+            rg_builder.add_dependency(task_id, child_id);
+        }
+
+        task_id
+    }
+
+    pub fn uv_rect_kind(&self) -> UvRectKind {
+        self.uv_rect_kind
+    }
+
+    pub fn get_texture_address(&self, gpu_cache: &GpuCache) -> GpuCacheAddress {
+        gpu_cache.get_address(&self.uv_rect_handle)
+    }
+
+    pub fn get_target_texture(&self) -> CacheTextureId {
+        match self.location {
+            RenderTaskLocation::Dynamic { texture_id, .. } => {
+                assert_ne!(texture_id, CacheTextureId::INVALID);
+                texture_id
+            }
+            RenderTaskLocation::Existing { .. } |
+            RenderTaskLocation::CacheRequest { .. } |
+            RenderTaskLocation::Unallocated { .. } |
+            RenderTaskLocation::Static { .. } => {
+                unreachable!();
+            }
+        }
+    }
+
+    pub fn get_texture_source(&self) -> TextureSource {
+        match self.location {
+            RenderTaskLocation::Dynamic { texture_id, .. } => {
+                assert_ne!(texture_id, CacheTextureId::INVALID);
+                TextureSource::TextureCache(texture_id, Swizzle::default())
+            }
+            RenderTaskLocation::Static { surface:  StaticRenderTaskSurface::ReadOnly { source }, .. } => {
+                source
+            }
+            RenderTaskLocation::Static { surface: StaticRenderTaskSurface::TextureCache { texture, .. }, .. } => {
+                TextureSource::TextureCache(texture, Swizzle::default())
+            }
+            RenderTaskLocation::Existing { .. } |
+            RenderTaskLocation::Static { .. } |
+            RenderTaskLocation::CacheRequest { .. } |
+            RenderTaskLocation::Unallocated { .. } => {
+                unreachable!();
+            }
+        }
+    }
+
+    pub fn get_target_rect(&self) -> DeviceIntRect {
+        match self.location {
+            // Previously, we only added render tasks after the entire
+            // primitive chain was determined visible. This meant that
+            // we could assert any render task in the list was also
+            // allocated (assigned to passes). Now, we add render
+            // tasks earlier, and the picture they belong to may be
+            // culled out later, so we can't assert that the task
+            // has been allocated.
+            // Render tasks that are created but not assigned to
+            // passes consume a row in the render task texture, but
+            // don't allocate any space in render targets nor
+            // draw any pixels.
+            // TODO(gw): Consider some kind of tag or other method
+            //           to mark a task as unused explicitly. This
+            //           would allow us to restore this debug check.
+            RenderTaskLocation::Dynamic { rect, .. } => rect,
+            RenderTaskLocation::Static { rect, .. } => rect,
+            RenderTaskLocation::Existing { .. } |
+            RenderTaskLocation::CacheRequest { .. } |
+            RenderTaskLocation::Unallocated { .. } => {
+                panic!("bug: get_target_rect called before allocating");
+            }
+        }
+    }
+
+    pub fn target_kind(&self) -> RenderTargetKind {
+        self.kind.target_kind()
+    }
+
+    pub fn write_gpu_blocks(
+        &mut self,
+        target_rect: DeviceIntRect,
+        gpu_cache: &mut GpuCache,
+    ) {
+        profile_scope!("write_gpu_blocks");
+
+        self.kind.write_gpu_blocks(gpu_cache);
+
+        if self.cache_handle.is_some() {
+            // The uv rect handle of cached render tasks is requested and set by the
+            // render task cache.
+            return;
+        }
+
+        if let Some(mut request) = gpu_cache.request(&mut self.uv_rect_handle) {
+            let p0 = target_rect.min.to_f32();
+            let p1 = target_rect.max.to_f32();
+            let image_source = ImageSource {
+                p0,
+                p1,
+                user_data: [0.0; 4],
+                uv_rect_kind: self.uv_rect_kind,
+            };
+            image_source.write_gpu_blocks(&mut request);
+        }
+    }
+
+    /// Called by the render task cache.
+    ///
+    /// Tells the render task that it is cached (which means its gpu cache
+    /// handle is managed by the texture cache).
+    pub fn mark_cached(&mut self, handle: RenderTaskCacheEntryHandle) {
+        self.cache_handle = Some(handle);
+    }
+}
diff --git a/gfx/wr/webrender/src/render_task_cache.rs b/gfx/wr/webrender/src/render_task_cache.rs
new file mode 100644
index 0000000000..0454c1214f
--- /dev/null
+++ b/gfx/wr/webrender/src/render_task_cache.rs
@@ -0,0 +1,379 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+use api::{ImageDescriptor, ImageDescriptorFlags, DirtyRect};
+use api::units::*;
+use crate::border::BorderSegmentCacheKey;
+use crate::box_shadow::{BoxShadowCacheKey};
+use crate::device::TextureFilter;
+use crate::freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
+use crate::gpu_cache::GpuCache;
+use crate::internal_types::FastHashMap;
+use crate::picture::SurfaceIndex;
+use crate::prim_store::image::ImageCacheKey;
+use crate::prim_store::gradient::{
+    FastLinearGradientCacheKey, LinearGradientCacheKey, RadialGradientCacheKey,
+    ConicGradientCacheKey,
+};
+use crate::prim_store::line_dec::LineDecorationCacheKey;
+use crate::resource_cache::CacheItem;
+use std::{mem, usize, f32, i32};
+use crate::surface::SurfaceBuilder;
+use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction, TargetShader};
+use crate::renderer::GpuBufferBuilder;
+use crate::render_target::RenderTargetKind;
+use crate::render_task::{RenderTask, StaticRenderTaskSurface, RenderTaskLocation, RenderTaskKind, CachedTask};
+use crate::render_task_graph::{RenderTaskGraphBuilder, RenderTaskId};
+use euclid::Scale;
+
+const MAX_CACHE_TASK_SIZE: f32 = 4096.0;
+
+/// Describes a parent dependency for a render task. Render tasks
+/// may depend on a surface (e.g. when a surface uses a cached border)
+/// or an arbitrary render task (e.g. when a clip mask uses a blurred
+/// box-shadow input).
+pub enum RenderTaskParent {
+    /// Parent is a surface
+    Surface(SurfaceIndex),
+    /// Parent is a render task
+    RenderTask(RenderTaskId),
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum RenderTaskCacheKeyKind {
+    BoxShadow(BoxShadowCacheKey),
+    Image(ImageCacheKey),
+    BorderSegment(BorderSegmentCacheKey),
+    LineDecoration(LineDecorationCacheKey),
+    FastLinearGradient(FastLinearGradientCacheKey),
+    LinearGradient(LinearGradientCacheKey),
+    RadialGradient(RadialGradientCacheKey),
+    ConicGradient(ConicGradientCacheKey),
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTaskCacheKey {
+    pub size: DeviceIntSize,
+    pub kind: RenderTaskCacheKeyKind,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTaskCacheEntry {
+    user_data: Option<[f32; 4]>,
+    target_kind: RenderTargetKind,
+    is_opaque: bool,
+    frame_id: u64,
+    pub handle: TextureCacheHandle,
+    /// If a render task was generated for this cache entry on _this_ frame,
+    /// we need to track the task id here. This allows us to hook it up as
+    /// a dependency of any parent tasks that make a reqiest from the render
+    /// task cache.
+    pub render_task_id: Option<RenderTaskId>,
+}
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub enum RenderTaskCacheMarker {}
+
+// A cache of render tasks that are stored in the texture
+// cache for usage across frames.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTaskCache {
+    map: FastHashMap<RenderTaskCacheKey, FreeListHandle<RenderTaskCacheMarker>>,
+    cache_entries: FreeList<RenderTaskCacheEntry, RenderTaskCacheMarker>,
+    frame_id: u64,
+}
+
+pub type RenderTaskCacheEntryHandle = WeakFreeListHandle<RenderTaskCacheMarker>;
+
+impl RenderTaskCache {
+    pub fn new() -> Self {
+        RenderTaskCache {
+            map: FastHashMap::default(),
+            cache_entries: FreeList::new(),
+            frame_id: 0,
+        }
+    }
+
+    pub fn clear(&mut self) {
+        self.map.clear();
+        self.cache_entries.clear();
+    }
+
+    pub fn begin_frame(
+        &mut self,
+        texture_cache: &mut TextureCache,
+    ) {
+        self.frame_id += 1;
+        profile_scope!("begin_frame");
+        // Drop any items from the cache that have been
+        // evicted from the texture cache.
+        //
+        // This isn't actually necessary for the texture
+        // cache to be able to evict old render tasks.
+        // It will evict render tasks as required, since
+        // the access time in the texture cache entry will
+        // be stale if this task hasn't been requested
+        // for a while.
+        //
+        // Nonetheless, we should remove stale entries
+        // from here so that this hash map doesn't
+        // grow indefinitely!
+        let cache_entries = &mut self.cache_entries;
+        let frame_id = self.frame_id;
+
+        self.map.retain(|_, handle| {
+            let mut retain = texture_cache.is_allocated(
+                &cache_entries.get(handle).handle,
+            );
+            if retain {
+                let entry = cache_entries.get_mut(&handle);
+                if frame_id > entry.frame_id + 10 {
+                    texture_cache.evict_handle(&entry.handle);
+                    retain = false;
+                }
+            }
+
+            if !retain {
+                let handle = mem::replace(handle, FreeListHandle::invalid());
+                cache_entries.free(handle);
+            }
+
+            retain
+        });
+
+        // Clear out the render task ID of any remaining cache entries that were drawn
+        // on the previous frame, so we don't accidentally hook up stale dependencies
+        // when building the frame graph.
+        for (_, handle) in &self.map {
+            let entry = self.cache_entries.get_mut(handle);
+            entry.render_task_id = None;
+        }
+    }
+
+    fn alloc_render_task(
+        size: DeviceIntSize,
+        render_task: &mut RenderTask,
+        entry: &mut RenderTaskCacheEntry,
+        gpu_cache: &mut GpuCache,
+        texture_cache: &mut TextureCache,
+    ) {
+        // Find out what size to alloc in the texture cache.
+        let target_kind = render_task.target_kind();
+
+        // Select the right texture page to allocate from.
+        let image_format = match target_kind {
+            RenderTargetKind::Color => texture_cache.shared_color_expected_format(),
+            RenderTargetKind::Alpha => texture_cache.shared_alpha_expected_format(),
+        };
+
+        let flags = if entry.is_opaque {
+            ImageDescriptorFlags::IS_OPAQUE
+        } else {
+            ImageDescriptorFlags::empty()
+        };
+
+        let descriptor = ImageDescriptor::new(
+            size.width,
+            size.height,
+            image_format,
+            flags,
+        );
+
+        // Allocate space in the texture cache, but don't supply
+        // and CPU-side data to be uploaded.
+        texture_cache.update(
+            &mut entry.handle,
+            descriptor,
+            TextureFilter::Linear,
+            None,
+            entry.user_data.unwrap_or([0.0; 4]),
+            DirtyRect::All,
+            gpu_cache,
+            None,
+            render_task.uv_rect_kind(),
+            Eviction::Auto,
+            TargetShader::Default,
+        );
+
+        // Get the allocation details in the texture cache, and store
+        // this in the render task. The renderer will draw this task
+        // into the appropriate rect of the texture cache on this frame.
+        let (texture_id, uv_rect, _, _, _) =
+            texture_cache.get_cache_location(&entry.handle);
+
+        let surface = StaticRenderTaskSurface::TextureCache {
+            texture: texture_id,
+            target_kind,
+        };
+
+        render_task.location = RenderTaskLocation::Static {
+            surface,
+            rect: uv_rect.to_i32(),
+        };
+    }
+
+    pub fn request_render_task<F>(
+        &mut self,
+        key: RenderTaskCacheKey,
+        texture_cache: &mut TextureCache,
+        gpu_cache: &mut GpuCache,
+        gpu_buffer_builder: &mut GpuBufferBuilder,
+        rg_builder: &mut RenderTaskGraphBuilder,
+        user_data: Option<[f32; 4]>,
+        is_opaque: bool,
+        parent: RenderTaskParent,
+        surface_builder: &mut SurfaceBuilder,
+        f: F,
+    ) -> Result<RenderTaskId, ()>
+    where
+        F: FnOnce(&mut RenderTaskGraphBuilder, &mut GpuBufferBuilder) -> Result<RenderTaskId, ()>,
+    {
+        let frame_id = self.frame_id;
+        let size = key.size;
+        // Get the texture cache handle for this cache key,
+        // or create one.
+        let cache_entries = &mut self.cache_entries;
+        let entry_handle = self.map.entry(key).or_insert_with(|| {
+            let entry = RenderTaskCacheEntry {
+                handle: TextureCacheHandle::invalid(),
+                user_data,
+                target_kind: RenderTargetKind::Color, // will be set below.
+                is_opaque,
+                frame_id,
+                render_task_id: None,
+            };
+            cache_entries.insert(entry)
+        });
+        let cache_entry = cache_entries.get_mut(entry_handle);
+        cache_entry.frame_id = self.frame_id;
+
+        // Check if this texture cache handle is valid.
+        if texture_cache.request(&cache_entry.handle, gpu_cache) {
+            // Invoke user closure to get render task chain
+            // to draw this into the texture cache.
+            let render_task_id = f(rg_builder, gpu_buffer_builder)?;
+
+            cache_entry.user_data = user_data;
+            cache_entry.is_opaque = is_opaque;
+            cache_entry.render_task_id = Some(render_task_id);
+
+            let render_task = rg_builder.get_task_mut(render_task_id);
+            let task_size = render_task.location.size();
+
+            render_task.mark_cached(entry_handle.weak());
+            cache_entry.target_kind = render_task.kind.target_kind();
+
+            RenderTaskCache::alloc_render_task(
+                task_size,
+                render_task,
+                cache_entry,
+                gpu_cache,
+                texture_cache,
+            );
+        }
+
+        // If this render task cache is being drawn this frame, ensure we hook up the
+        // render task for it as a dependency of any render task that uses this as
+        // an input source.
+        if let Some(render_task_id) = cache_entry.render_task_id {
+            match parent {
+                // TODO(gw): Remove surface from here as a follow up patch, as it's now implicit
+                //           due to using SurfaceBuilder
+                RenderTaskParent::Surface(_surface_index) => {
+                    // If parent is a surface, use helper fn to add this dependency,
+                    // which correctly takes account of the render task configuration
+                    // of the surface.
+                    surface_builder.add_child_render_task(
+                        render_task_id,
+                        rg_builder,
+                    );
+                }
+                RenderTaskParent::RenderTask(parent_render_task_id) => {
+                    // For render tasks, just add it as a direct dependency on the
+                    // task graph builder.
+                    rg_builder.add_dependency(
+                        parent_render_task_id,
+                        render_task_id,
+                    );
+                }
+            }
+
+            return Ok(render_task_id);
+        }
+
+        let target_kind = cache_entry.target_kind;
+        let mut task = RenderTask::new(
+            RenderTaskLocation::CacheRequest { size, },
+            RenderTaskKind::Cached(CachedTask {
+                target_kind,
+            }),
+        );
+        task.mark_cached(entry_handle.weak());
+        let render_task_id = rg_builder.add().init(task);
+
+        Ok(render_task_id)
+    }
+
+    pub fn get_cache_entry(
+        &self,
+        handle: &RenderTaskCacheEntryHandle,
+    ) -> &RenderTaskCacheEntry {
+        self.cache_entries
+            .get_opt(handle)
+            .expect("bug: invalid render task cache handle")
+    }
+
+    #[allow(dead_code)]
+    pub fn get_cache_item_for_render_task(&self,
+                                          texture_cache: &TextureCache,
+                                          key: &RenderTaskCacheKey)
+                                          -> CacheItem {
+        // Get the texture cache handle for this cache key.
+        let handle = self.map.get(key).unwrap();
+        let cache_entry = self.cache_entries.get(handle);
+        texture_cache.get(&cache_entry.handle)
+    }
+
+    #[allow(dead_code)]
+    pub fn get_allocated_size_for_render_task(&self,
+                                              texture_cache: &TextureCache,
+                                              key: &RenderTaskCacheKey)
+                                              -> Option<usize> {
+        let handle = self.map.get(key).unwrap();
+        let cache_entry = self.cache_entries.get(handle);
+        texture_cache.get_allocated_size(&cache_entry.handle)
+    }
+}
+
+// TODO(gw): Rounding the content rect here to device pixels is not
+// technically correct. Ideally we should ceil() here, and ensure that
+// the extra part pixel in the case of fractional sizes is correctly
+// handled. For now, just use rounding which passes the existing
+// Gecko tests.
+// Note: zero-square tasks are prohibited in WR task graph, so
+// we ensure each dimension to be at least the length of 1 after rounding.
+pub fn to_cache_size(size: LayoutSize, device_pixel_scale: &mut Scale<f32, LayoutPixel, DevicePixel>) -> DeviceIntSize {
+    let mut device_size = (size * *device_pixel_scale).round();
+
+    if device_size.width > MAX_CACHE_TASK_SIZE || device_size.height > MAX_CACHE_TASK_SIZE {
+        let scale = MAX_CACHE_TASK_SIZE / f32::max(device_size.width, device_size.height);
+        *device_pixel_scale = *device_pixel_scale * Scale::new(scale);
+        device_size = (size * *device_pixel_scale).round();
+    }
+
+    DeviceIntSize::new(
+        1.max(device_size.width as i32),
+        1.max(device_size.height as i32),
+    )
+}
diff --git a/gfx/wr/webrender/src/render_task_graph.rs b/gfx/wr/webrender/src/render_task_graph.rs
new file mode 100644
index 0000000000..ca64339cea
--- /dev/null
+++ b/gfx/wr/webrender/src/render_task_graph.rs
@@ -0,0 +1,1213 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+//! This module contains the render task graph.
+//!
+//! Code associated with creating specific render tasks is in the render_task
+//! module.
+
+use api::units::*;
+use api::ImageFormat;
+use crate::gpu_cache::{GpuCache, GpuCacheAddress};
+use crate::internal_types::{TextureSource, CacheTextureId, FastHashMap, FastHashSet, FrameId};
+use crate::render_task::{StaticRenderTaskSurface, RenderTaskLocation, RenderTask};
+use crate::render_target::RenderTargetKind;
+use crate::render_task::{RenderTaskData, RenderTaskKind};
+use crate::resource_cache::ResourceCache;
+use crate::texture_pack::GuillotineAllocator;
+use crate::prim_store::DeferredResolve;
+use crate::image_source::{resolve_image, resolve_cached_render_task};
+use crate::util::VecHelper;
+use smallvec::SmallVec;
+use std::mem;
+use topological_sort::TopologicalSort;
+
+use crate::render_target::{RenderTargetList, ColorRenderTarget};
+use crate::render_target::{PictureCacheTarget, TextureCacheRenderTarget, AlphaRenderTarget};
+use crate::util::Allocation;
+use std::{usize, f32};
+
+/// According to apitrace, textures larger than 2048 break fast clear
+/// optimizations on some intel drivers. We sometimes need to go larger, but
+/// we try to avoid it.
+const MAX_SHARED_SURFACE_SIZE: i32 = 2048;
+
+/// If we ever need a larger texture than the ideal, we better round it up to a
+/// reasonable number in order to have a bit of leeway in case the size of this
+/// this target is changing each frame.
+const TEXTURE_DIMENSION_MASK: i32 = 0xFF;
+
+/// Allows initializing a render task directly into the render task buffer.
+///
+/// See utils::VecHelpers. RenderTask is fairly large so avoiding the move when
+/// pushing into the vector can save a lot of expensive memcpys on pages with many
+/// render tasks.
+pub struct RenderTaskAllocation<'a> {
+    pub alloc: Allocation<'a, RenderTask>,
+}
+
+impl<'l> RenderTaskAllocation<'l> {
+    #[inline(always)]
+    pub fn init(self, value: RenderTask) -> RenderTaskId {
+        RenderTaskId {
+            index: self.alloc.init(value) as u32,
+        }
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+#[derive(MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTaskId {
+    pub index: u32,
+}
+
+impl RenderTaskId {
+    pub const INVALID: RenderTaskId = RenderTaskId {
+        index: u32::MAX,
+    };
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq, PartialOrd, Ord)]
+pub struct PassId(usize);
+
+impl PassId {
+    pub const MIN: PassId = PassId(0);
+    pub const MAX: PassId = PassId(!0 - 1);
+    pub const INVALID: PassId = PassId(!0 - 2);
+}
+
+/// An internal representation of a dynamic surface that tasks can be
+/// allocated into. Maintains some extra metadata about each surface
+/// during the graph build.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct Surface {
+    /// Whether this is a color or alpha render target
+    kind: RenderTargetKind,
+    /// Allocator for this surface texture
+    allocator: GuillotineAllocator,
+    /// We can only allocate into this for reuse if it's a shared surface
+    is_shared: bool,
+    /// The pass that we can free this surface after (guaranteed
+    /// to be the same for all tasks assigned to this surface)
+    free_after: PassId,
+}
+
+impl Surface {
+    /// Allocate a rect within a shared surfce. Returns None if the
+    /// format doesn't match, or allocation fails.
+    fn alloc_rect(
+        &mut self,
+        size: DeviceIntSize,
+        kind: RenderTargetKind,
+        is_shared: bool,
+        free_after: PassId,
+    ) -> Option<DeviceIntPoint> {
+        if self.kind == kind && self.is_shared == is_shared && self.free_after == free_after {
+            self.allocator
+                .allocate(&size)
+                .map(|(_slice, origin)| origin)
+        } else {
+            None
+        }
+    }
+}
+
+/// A sub-pass can draw to either a dynamic (temporary render target) surface,
+/// or a persistent surface (texture or picture cache).
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug)]
+pub enum SubPassSurface {
+    /// A temporary (intermediate) surface.
+    Dynamic {
+        /// The renderer texture id
+        texture_id: CacheTextureId,
+        /// Color / alpha render target
+        target_kind: RenderTargetKind,
+        /// The rectangle occupied by tasks in this surface. Used as a clear
+        /// optimization on some GPUs.
+        used_rect: DeviceIntRect,
+    },
+    Persistent {
+        /// Reference to the texture or picture cache surface being drawn to.
+        surface: StaticRenderTaskSurface,
+    },
+}
+
+/// A subpass is a specific render target, and a list of tasks to draw to it.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SubPass {
+    /// The surface this subpass draws to
+    pub surface: SubPassSurface,
+    /// The tasks assigned to this subpass.
+    pub task_ids: Vec<RenderTaskId>,
+}
+
+/// A pass expresses dependencies between tasks. Each pass consists of a number
+/// of subpasses.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct Pass {
+    /// The tasks assigned to this render pass
+    pub task_ids: Vec<RenderTaskId>,
+    /// The subpasses that make up this dependency pass
+    pub sub_passes: Vec<SubPass>,
+    /// A list of intermediate surfaces that can be invalidated after
+    /// this pass completes.
+    pub textures_to_invalidate: Vec<CacheTextureId>,
+}
+
+/// The RenderTaskGraph is the immutable representation of the render task graph. It is
+/// built by the RenderTaskGraphBuilder, and is constructed once per frame.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTaskGraph {
+    /// List of tasks added to the graph
+    pub tasks: Vec<RenderTask>,
+
+    /// The passes that were created, based on dependencies between tasks
+    pub passes: Vec<Pass>,
+
+    /// Current frame id, used for debug validation
+    frame_id: FrameId,
+
+    /// GPU specific data for each task that is made available to shaders
+    pub task_data: Vec<RenderTaskData>,
+
+    /// Total number of intermediate surfaces that will be drawn to, used for test validation.
+    #[cfg(test)]
+    surface_count: usize,
+
+    /// Total number of real allocated textures that will be drawn to, used for test validation.
+    #[cfg(test)]
+    unique_surfaces: FastHashSet<CacheTextureId>,
+}
+
+/// The persistent interface that is used during frame building to construct the
+/// frame graph.
+pub struct RenderTaskGraphBuilder {
+    /// List of tasks added to the builder
+    tasks: Vec<RenderTask>,
+
+    /// List of task roots
+    roots: FastHashSet<RenderTaskId>,
+
+    /// Current frame id, used for debug validation
+    frame_id: FrameId,
+
+    /// A list of texture surfaces that can be freed at the end of a pass. Retained
+    /// here to reduce heap allocations.
+    textures_to_free: FastHashSet<CacheTextureId>,
+
+    // Keep a map of `texture_id` to metadata about surfaces that are currently
+    // borrowed from the render target pool.
+    active_surfaces: FastHashMap<CacheTextureId, Surface>,
+}
+
+impl RenderTaskGraphBuilder {
+    /// Construct a new graph builder. Typically constructed once and maintained
+    /// over many frames, to avoid extra heap allocations where possible.
+    pub fn new() -> Self {
+        RenderTaskGraphBuilder {
+            tasks: Vec::new(),
+            roots: FastHashSet::default(),
+            frame_id: FrameId::INVALID,
+            textures_to_free: FastHashSet::default(),
+            active_surfaces: FastHashMap::default(),
+        }
+    }
+
+    pub fn frame_id(&self) -> FrameId {
+        self.frame_id
+    }
+
+    /// Begin a new frame
+    pub fn begin_frame(&mut self, frame_id: FrameId) {
+        self.frame_id = frame_id;
+        self.roots.clear();
+    }
+
+    /// Get immutable access to a task
+    // TODO(gw): There's only a couple of places that existing code needs to access
+    //           a task during the building step. Perhaps we can remove this?
+    pub fn get_task(
+        &self,
+        task_id: RenderTaskId,
+    ) -> &RenderTask {
+        &self.tasks[task_id.index as usize]
+    }
+
+    /// Get mutable access to a task
+    // TODO(gw): There's only a couple of places that existing code needs to access
+    //           a task during the building step. Perhaps we can remove this?
+    pub fn get_task_mut(
+        &mut self,
+        task_id: RenderTaskId,
+    ) -> &mut RenderTask {
+        &mut self.tasks[task_id.index as usize]
+    }
+
+    /// Add a new task to the graph.
+    pub fn add(&mut self) -> RenderTaskAllocation {
+        // Assume every task is a root to start with
+        self.roots.insert(
+            RenderTaskId { index: self.tasks.len() as u32 }
+        );
+
+        RenderTaskAllocation {
+            alloc: self.tasks.alloc(),
+        }
+    }
+
+    /// Express a dependency, such that `task_id` depends on `input` as a texture source.
+    pub fn add_dependency(
+        &mut self,
+        task_id: RenderTaskId,
+        input: RenderTaskId,
+    ) {
+        self.tasks[task_id.index as usize].children.push(input);
+
+        // Once a task is an input, it's no longer a root
+        self.roots.remove(&input);
+    }
+
+    /// End the graph building phase and produce the immutable task graph for this frame
+    pub fn end_frame(
+        &mut self,
+        resource_cache: &mut ResourceCache,
+        gpu_cache: &mut GpuCache,
+        deferred_resolves: &mut Vec<DeferredResolve>,
+    ) -> RenderTaskGraph {
+        // Copy the render tasks over to the immutable graph output
+        let task_count = self.tasks.len();
+        let tasks = mem::replace(
+            &mut self.tasks,
+            Vec::with_capacity(task_count),
+        );
+
+        let mut graph = RenderTaskGraph {
+            tasks,
+            passes: Vec::new(),
+            task_data: Vec::with_capacity(task_count),
+            frame_id: self.frame_id,
+            #[cfg(test)]
+            surface_count: 0,
+            #[cfg(test)]
+            unique_surfaces: FastHashSet::default(),
+        };
+
+        // First, use a topological sort of the dependency graph to split the task set in to
+        // a list of passes. This is necessary because when we have a complex graph (e.g. due
+        // to a large number of sibling backdrop-filter primitives) traversing it via a simple
+        // recursion can be too slow. The second pass determines when the last time a render task
+        // is used as an input, and assigns what pass the surface backing that render task can
+        // be freed (the surface is then returned to the render target pool and may be aliased
+        // or reused during subsequent passes).
+
+        let mut pass_count = 0;
+        let mut passes = Vec::new();
+        let mut task_sorter = TopologicalSort::<RenderTaskId>::new();
+
+        // Iterate the task list, and add all the dependencies to the topo sort
+        for (parent_id, task) in graph.tasks.iter().enumerate() {
+            let parent_id = RenderTaskId { index: parent_id as u32 };
+
+            for child_id in &task.children {
+                task_sorter.add_dependency(
+                    parent_id,
+                    *child_id,
+                );
+            }
+        }
+
+        // Pop the sorted passes off the topological sort
+        loop {
+            // Get the next set of tasks that can be drawn
+            let tasks = task_sorter.pop_all();
+
+            // If there are no tasks left, we're done
+            if tasks.is_empty() {
+                // If the task sorter itself isn't empty but we couldn't pop off any
+                // tasks, that implies a circular dependency in the task graph
+                assert!(task_sorter.is_empty());
+                break;
+            } else {
+                // Assign the `render_on` field to the task
+                for task_id in &tasks {
+                    graph.tasks[task_id.index as usize].render_on = PassId(pass_count);
+                }
+
+                // Store the task list for this pass, used later for `assign_free_pass`.
+                passes.push(tasks);
+                pass_count += 1;
+            }
+        }
+
+        // Always create at least one pass for root tasks
+        pass_count = pass_count.max(1);
+
+        // Determine which pass each task can be freed on, which depends on which is
+        // the last task that has this as an input. This must be done in top-down
+        // pass order to ensure that RenderTaskLocation::Existing references are
+        // visited in the correct order
+        for pass in passes {
+            for task_id in pass {
+                assign_free_pass(
+                    task_id,
+                    &mut graph,
+                );
+            }
+        }
+
+        // Construct passes array for tasks to be assigned to below
+        for _ in 0 .. pass_count {
+            graph.passes.push(Pass {
+                task_ids: Vec::new(),
+                sub_passes: Vec::new(),
+                textures_to_invalidate: Vec::new(),
+            });
+        }
+
+        // Assign tasks to each pass based on their `render_on` attribute
+        for (index, task) in graph.tasks.iter().enumerate() {
+            if task.kind.is_a_rendering_operation() {
+                let id = RenderTaskId { index: index as u32 };
+                graph.passes[task.render_on.0].task_ids.push(id);
+            }
+        }
+
+        // At this point, tasks are assigned to each dependency pass. Now we
+        // can go through each pass and create sub-passes, assigning each task
+        // to a target and destination rect.
+        assert!(self.active_surfaces.is_empty());
+
+        for (pass_id, pass) in graph.passes.iter_mut().enumerate().rev() {
+            assert!(self.textures_to_free.is_empty());
+
+            for task_id in &pass.task_ids {
+
+                let task_location = graph.tasks[task_id.index as usize].location.clone();
+
+                match task_location {
+                    RenderTaskLocation::Unallocated { size } => {
+                        let task = &mut graph.tasks[task_id.index as usize];
+
+                        let mut location = None;
+                        let kind = task.kind.target_kind();
+
+                        // If a task is used as part of an existing-chain then we can't
+                        // safely share it (nor would we want to).
+                        let can_use_shared_surface =
+                            task.kind.can_use_shared_surface() &&
+                            task.free_after != PassId::INVALID;
+
+                        if can_use_shared_surface {
+                            // If we can use a shared surface, step through the existing shared
+                            // surfaces for this subpass, and see if we can allocate the task
+                            // to one of these targets.
+                            for sub_pass in &mut pass.sub_passes {
+                                if let SubPassSurface::Dynamic { texture_id, ref mut used_rect, .. } = sub_pass.surface {
+                                    let surface = self.active_surfaces.get_mut(&texture_id).unwrap();
+                                    if let Some(p) = surface.alloc_rect(size, kind, true, task.free_after) {
+                                        location = Some((texture_id, p));
+                                        *used_rect = used_rect.union(&DeviceIntRect::from_origin_and_size(p, size));
+                                        sub_pass.task_ids.push(*task_id);
+                                        break;
+                                    }
+                                }
+                            }
+                        }
+
+                        if location.is_none() {
+                            // If it wasn't possible to allocate the task to a shared surface, get a new
+                            // render target from the resource cache pool/
+
+                            // If this is a really large task, don't bother allocating it as a potential
+                            // shared surface for other tasks.
+
+                            let can_use_shared_surface = can_use_shared_surface &&
+                                size.width <= MAX_SHARED_SURFACE_SIZE &&
+                                size.height <= MAX_SHARED_SURFACE_SIZE;
+
+                            let surface_size = if can_use_shared_surface {
+                                DeviceIntSize::new(
+                                    MAX_SHARED_SURFACE_SIZE,
+                                    MAX_SHARED_SURFACE_SIZE,
+                                )
+                            } else {
+                                // Round up size here to avoid constant re-allocs during resizing
+                                DeviceIntSize::new(
+                                    (size.width + TEXTURE_DIMENSION_MASK) & !TEXTURE_DIMENSION_MASK,
+                                    (size.height + TEXTURE_DIMENSION_MASK) & !TEXTURE_DIMENSION_MASK,
+                                )
+                            };
+
+                            let format = match kind {
+                                RenderTargetKind::Color => ImageFormat::RGBA8,
+                                RenderTargetKind::Alpha => ImageFormat::R8,
+                            };
+
+                            // Get render target of appropriate size and format from resource cache
+                            let texture_id = resource_cache.get_or_create_render_target_from_pool(
+                                surface_size,
+                                format,
+                            );
+
+                            // Allocate metadata we need about this surface while it's active
+                            let mut surface = Surface {
+                                kind,
+                                allocator: GuillotineAllocator::new(Some(surface_size)),
+                                is_shared: can_use_shared_surface,
+                                free_after: task.free_after,
+                            };
+
+                            // Allocation of the task must fit in this new surface!
+                            let p = surface.alloc_rect(
+                                size,
+                                kind,
+                                can_use_shared_surface,
+                                task.free_after,
+                            ).expect("bug: alloc must succeed!");
+
+                            location = Some((texture_id, p));
+
+                            // Store the metadata about this newly active surface. We should never
+                            // get a target surface with the same texture_id as a currently active surface.
+                            let _prev_surface = self.active_surfaces.insert(texture_id, surface);
+                            assert!(_prev_surface.is_none());
+
+                            // Store some information about surface allocations if in test mode
+                            #[cfg(test)]
+                            {
+                                graph.surface_count += 1;
+                                graph.unique_surfaces.insert(texture_id);
+                            }
+
+                            // Add the target as a new subpass for this render pass.
+                            pass.sub_passes.push(SubPass {
+                                surface: SubPassSurface::Dynamic {
+                                    texture_id,
+                                    target_kind: kind,
+                                    used_rect: DeviceIntRect::from_origin_and_size(p, size),
+                                },
+                                task_ids: vec![*task_id],
+                            });
+                        }
+
+                        // By now, we must have allocated a surface and rect for this task, so assign it!
+                        assert!(location.is_some());
+                        task.location = RenderTaskLocation::Dynamic {
+                            texture_id: location.unwrap().0,
+                            rect: DeviceIntRect::from_origin_and_size(location.unwrap().1, size),
+                        };
+                    }
+                    RenderTaskLocation::Existing { parent_task_id, size: existing_size, .. } => {
+                        let parent_task_location = graph.tasks[parent_task_id.index as usize].location.clone();
+
+                        match parent_task_location {
+                            RenderTaskLocation::Unallocated { .. } |
+                            RenderTaskLocation::CacheRequest { .. } |
+                            RenderTaskLocation::Existing { .. } => {
+                                panic!("bug: reference to existing task must be allocated by now");
+                            }
+                            RenderTaskLocation::Dynamic { texture_id, rect, .. } => {
+                                assert_eq!(existing_size, rect.size());
+
+                                let kind = graph.tasks[parent_task_id.index as usize].kind.target_kind();
+
+                                // A sub-pass is always created in this case, as existing tasks by definition can't be shared.
+                                pass.sub_passes.push(SubPass {
+                                    surface: SubPassSurface::Dynamic {
+                                        texture_id,
+                                        target_kind: kind,
+                                        used_rect: rect,        // clear will be skipped due to no-op check anyway
+                                    },
+                                    task_ids: vec![*task_id],
+                                });
+
+                                let task = &mut graph.tasks[task_id.index as usize];
+                                task.location = parent_task_location;
+                            }
+                            RenderTaskLocation::Static { .. } => {
+                                unreachable!("bug: not possible since we don't dup static locations");
+                            }
+                        }
+                    }
+                    RenderTaskLocation::Static { ref surface, .. } => {
+                        // No need to allocate for this surface, since it's a persistent
+                        // target. Instead, just create a new sub-pass for it.
+                        pass.sub_passes.push(SubPass {
+                            surface: SubPassSurface::Persistent {
+                                surface: surface.clone(),
+                            },
+                            task_ids: vec![*task_id],
+                        });
+                    }
+                    RenderTaskLocation::CacheRequest { .. } => {
+                        // No need to allocate nor to create a sub-path for read-only locations.
+                    }
+                    RenderTaskLocation::Dynamic { .. } => {
+                        // Dynamic tasks shouldn't be allocated by this point
+                        panic!("bug: encountered an already allocated task");
+                    }
+                }
+
+                // Return the shared surfaces from this pass
+                let task = &graph.tasks[task_id.index as usize];
+                for child_id in &task.children {
+                    let child_task = &graph.tasks[child_id.index as usize];
+                    match child_task.location {
+                        RenderTaskLocation::Unallocated { .. } |
+                        RenderTaskLocation::Existing { .. } => panic!("bug: must be allocated"),
+                        RenderTaskLocation::Dynamic { texture_id, .. } => {
+                            // If this task can be freed after this pass, include it in the
+                            // unique set of textures to be returned to the render target pool below.
+                            if child_task.free_after == PassId(pass_id) {
+                                self.textures_to_free.insert(texture_id);
+                            }
+                        }
+                        RenderTaskLocation::Static { .. } => {}
+                        RenderTaskLocation::CacheRequest { .. } => {}
+                    }
+                }
+            }
+
+            // Return no longer used textures to the pool, so that they can be reused / aliased
+            // by later passes.
+            for texture_id in self.textures_to_free.drain() {
+                resource_cache.return_render_target_to_pool(texture_id);
+                self.active_surfaces.remove(&texture_id).unwrap();
+                pass.textures_to_invalidate.push(texture_id);
+            }
+        }
+
+        // By now, all surfaces that were borrowed from the render target pool must
+        // be returned to the resource cache, or we are leaking intermediate surfaces!
+        assert!(self.active_surfaces.is_empty());
+
+        // Each task is now allocated to a surface and target rect. Write that to the
+        // GPU blocks and task_data. After this point, the graph is returned and is
+        // considered to be immutable for the rest of the frame building process.
+
+        for task in &mut graph.tasks {
+            // First check whether the render task texture and uv rects are managed
+            // externally. This is the case for image tasks and cached tasks. In both
+            // cases it results in a finding the information in the texture cache.
+            let cache_item = if let Some(ref cache_handle) = task.cache_handle {
+                Some(resolve_cached_render_task(
+                    cache_handle,
+                    resource_cache,
+                ))
+            } else if let RenderTaskKind::Image(request) = &task.kind {
+                Some(resolve_image(
+                    *request,
+                    resource_cache,
+                    gpu_cache,
+                    deferred_resolves,
+                ))
+            } else {
+                // General case (non-cached non-image tasks).
+                None
+            };
+
+            if let Some(cache_item) = cache_item {
+                // Update the render task even if the item is invalid.
+                // We'll handle it later and it's easier to not have to
+                // deal with unexpected location variants like
+                // RenderTaskLocation::CacheRequest when we do.
+                let source = cache_item.texture_id;
+                task.uv_rect_handle = cache_item.uv_rect_handle;
+                task.location = RenderTaskLocation::Static {
+                    surface: StaticRenderTaskSurface::ReadOnly { source },
+                    rect: cache_item.uv_rect,
+                };
+            }
+            // Give the render task an opportunity to add any
+            // information to the GPU cache, if appropriate.
+            let target_rect = task.get_target_rect();
+
+            task.write_gpu_blocks(
+                target_rect,
+                gpu_cache,
+            );
+
+            graph.task_data.push(
+                task.kind.write_task_data(target_rect)
+            );
+        }
+
+        graph
+    }
+}
+
+impl RenderTaskGraph {
+    /// Print the render task graph to console
+    #[allow(dead_code)]
+    pub fn print(
+        &self,
+    ) {
+        debug!("-- RenderTaskGraph --");
+
+        for (i, task) in self.tasks.iter().enumerate() {
+            debug!("Task {} [{}]: render_on={} free_after={} children={:?}",
+                i,
+                task.kind.as_str(),
+                task.render_on.0,
+                task.free_after.0,
+                task.children,
+            );
+        }
+
+        for (p, pass) in self.passes.iter().enumerate() {
+            debug!("Pass {}:", p);
+
+            for (s, sub_pass) in pass.sub_passes.iter().enumerate() {
+                debug!("\tSubPass {}: {:?}",
+                    s,
+                    sub_pass.surface,
+                );
+
+                for task_id in &sub_pass.task_ids {
+                    debug!("\t\tTask {:?}", task_id.index);
+                }
+            }
+        }
+    }
+
+    pub fn resolve_location(
+        &self,
+        task_id: impl Into<Option<RenderTaskId>>,
+        gpu_cache: &GpuCache,
+    ) -> Option<(GpuCacheAddress, TextureSource)> {
+        self.resolve_impl(task_id.into()?, gpu_cache)
+    }
+
+    fn resolve_impl(
+        &self,
+        task_id: RenderTaskId,
+        gpu_cache: &GpuCache,
+    ) -> Option<(GpuCacheAddress, TextureSource)> {
+        let task = &self[task_id];
+        let texture_source = task.get_texture_source();
+
+        if let TextureSource::Invalid = texture_source {
+            return None;
+        }
+
+        let uv_address = task.get_texture_address(gpu_cache);
+
+        Some((uv_address, texture_source))
+    }
+
+
+    /// Return the surface and texture counts, used for testing
+    #[cfg(test)]
+    pub fn surface_counts(&self) -> (usize, usize) {
+        (self.surface_count, self.unique_surfaces.len())
+    }
+
+    /// Return current frame id, used for validation
+    #[cfg(debug_assertions)]
+    pub fn frame_id(&self) -> FrameId {
+        self.frame_id
+    }
+}
+
+/// Batching uses index access to read information about tasks
+impl std::ops::Index<RenderTaskId> for RenderTaskGraph {
+    type Output = RenderTask;
+    fn index(&self, id: RenderTaskId) -> &RenderTask {
+        &self.tasks[id.index as usize]
+    }
+}
+
+fn assign_free_pass(
+    id: RenderTaskId,
+    graph: &mut RenderTaskGraph,
+) {
+    let task = &mut graph.tasks[id.index as usize];
+    let render_on = task.render_on;
+
+    let mut child_task_ids: SmallVec<[RenderTaskId; 8]> = SmallVec::new();
+    child_task_ids.extend_from_slice(&task.children);
+
+    for child_id in child_task_ids {
+        let child_location = graph.tasks[child_id.index as usize].location.clone();
+
+        // Each dynamic child task can free its backing surface after the last
+        // task that references it as an input. Using min here ensures the
+        // safe time to free this surface in the presence of multiple paths
+        // to this task from the root(s).
+        match child_location {
+            RenderTaskLocation::CacheRequest { .. } => {}
+            RenderTaskLocation::Static { .. } => {
+                // never get freed anyway, so can leave untouched
+                // (could validate that they remain at PassId::MIN)
+            }
+            RenderTaskLocation::Dynamic { .. } => {
+                panic!("bug: should not be allocated yet");
+            }
+            RenderTaskLocation::Unallocated { .. } => {
+                let child_task = &mut graph.tasks[child_id.index as usize];
+
+                if child_task.free_after != PassId::INVALID {
+                    child_task.free_after = child_task.free_after.min(render_on);
+                }
+            }
+            RenderTaskLocation::Existing { parent_task_id, .. } => {
+                let parent_task = &mut graph.tasks[parent_task_id.index as usize];
+                parent_task.free_after = PassId::INVALID;
+
+                let child_task = &mut graph.tasks[child_id.index as usize];
+
+                if child_task.free_after != PassId::INVALID {
+                    child_task.free_after = child_task.free_after.min(render_on);
+                }
+            }
+        }
+    }
+}
+
+/// A render pass represents a set of rendering operations that don't depend on one
+/// another.
+///
+/// A render pass can have several render targets if there wasn't enough space in one
+/// target to do all of the rendering for that pass. See `RenderTargetList`.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderPass {
+    /// The subpasses that describe targets being rendered to in this pass
+    pub alpha: RenderTargetList<AlphaRenderTarget>,
+    pub color: RenderTargetList<ColorRenderTarget>,
+    pub texture_cache: FastHashMap<CacheTextureId, TextureCacheRenderTarget>,
+    pub picture_cache: Vec<PictureCacheTarget>,
+    pub textures_to_invalidate: Vec<CacheTextureId>,
+}
+
+impl RenderPass {
+    /// Creates an intermediate off-screen pass.
+    pub fn new(src: &Pass) -> Self {
+        RenderPass {
+            color: RenderTargetList::new(
+                ImageFormat::RGBA8,
+            ),
+            alpha: RenderTargetList::new(
+                ImageFormat::R8,
+            ),
+            texture_cache: FastHashMap::default(),
+            picture_cache: Vec::new(),
+            textures_to_invalidate: src.textures_to_invalidate.clone(),
+        }
+    }
+}
+
+// Dump an SVG visualization of the render graph for debugging purposes
+#[cfg(feature = "capture")]
+pub fn dump_render_tasks_as_svg(
+    render_tasks: &RenderTaskGraph,
+    output: &mut dyn std::io::Write,
+) -> std::io::Result<()> {
+    use svg_fmt::*;
+
+    let node_width = 80.0;
+    let node_height = 30.0;
+    let vertical_spacing = 8.0;
+    let horizontal_spacing = 20.0;
+    let margin = 10.0;
+    let text_size = 10.0;
+
+    let mut pass_rects = Vec::new();
+    let mut nodes = vec![None; render_tasks.tasks.len()];
+
+    let mut x = margin;
+    let mut max_y: f32 = 0.0;
+
+    #[derive(Clone)]
+    struct Node {
+        rect: Rectangle,
+        label: Text,
+        size: Text,
+    }
+
+    for pass in render_tasks.passes.iter().rev() {
+        let mut layout = VerticalLayout::new(x, margin, node_width);
+
+        for task_id in &pass.task_ids {
+            let task_index = task_id.index as usize;
+            let task = &render_tasks.tasks[task_index];
+
+            let rect = layout.push_rectangle(node_height);
+
+            let tx = rect.x + rect.w / 2.0;
+            let ty = rect.y + 10.0;
+
+            let label = text(tx, ty, format!("{}", task.kind.as_str()));
+            let size = text(tx, ty + 12.0, format!("{:?}", task.location.size()));
+
+            nodes[task_index] = Some(Node { rect, label, size });
+
+            layout.advance(vertical_spacing);
+        }
+
+        pass_rects.push(layout.total_rectangle());
+
+        x += node_width + horizontal_spacing;
+        max_y = max_y.max(layout.y + margin);
+    }
+
+    let mut links = Vec::new();
+    for node_index in 0..nodes.len() {
+        if nodes[node_index].is_none() {
+            continue;
+        }
+
+        let task = &render_tasks.tasks[node_index];
+        for dep in &task.children {
+            let dep_index = dep.index as usize;
+
+            if let (&Some(ref node), &Some(ref dep_node)) = (&nodes[node_index], &nodes[dep_index]) {
+                links.push((
+                    dep_node.rect.x + dep_node.rect.w,
+                    dep_node.rect.y + dep_node.rect.h / 2.0,
+                    node.rect.x,
+                    node.rect.y + node.rect.h / 2.0,
+                ));
+            }
+        }
+    }
+
+    let svg_w = x + margin;
+    let svg_h = max_y + margin;
+    writeln!(output, "{}", BeginSvg { w: svg_w, h: svg_h })?;
+
+    // Background.
+    writeln!(output,
+        "    {}",
+        rectangle(0.0, 0.0, svg_w, svg_h)
+            .inflate(1.0, 1.0)
+            .fill(rgb(50, 50, 50))
+    )?;
+
+    // Passes.
+    for rect in pass_rects {
+        writeln!(output,
+            "    {}",
+            rect.inflate(3.0, 3.0)
+                .border_radius(4.0)
+                .opacity(0.4)
+                .fill(black())
+        )?;
+    }
+
+    // Links.
+    for (x1, y1, x2, y2) in links {
+        dump_task_dependency_link(output, x1, y1, x2, y2);
+    }
+
+    // Tasks.
+    for node in &nodes {
+        if let Some(node) = node {
+            writeln!(output,
+                "    {}",
+                node.rect
+                    .clone()
+                    .fill(black())
+                    .border_radius(3.0)
+                    .opacity(0.5)
+                    .offset(0.0, 2.0)
+            )?;
+            writeln!(output,
+                "    {}",
+                node.rect
+                    .clone()
+                    .fill(rgb(200, 200, 200))
+                    .border_radius(3.0)
+                    .opacity(0.8)
+            )?;
+
+            writeln!(output,
+                "    {}",
+                node.label
+                    .clone()
+                    .size(text_size)
+                    .align(Align::Center)
+                    .color(rgb(50, 50, 50))
+            )?;
+            writeln!(output,
+                "    {}",
+                node.size
+                    .clone()
+                    .size(text_size * 0.7)
+                    .align(Align::Center)
+                    .color(rgb(50, 50, 50))
+            )?;
+        }
+    }
+
+    writeln!(output, "{}", EndSvg)
+}
+
+#[allow(dead_code)]
+fn dump_task_dependency_link(
+    output: &mut dyn std::io::Write,
+    x1: f32, y1: f32,
+    x2: f32, y2: f32,
+) {
+    use svg_fmt::*;
+
+    // If the link is a straight horizontal line and spans over multiple passes, it
+    // is likely to go straight though unrelated nodes in a way that makes it look like
+    // they are connected, so we bend the line upward a bit to avoid that.
+    let simple_path = (y1 - y2).abs() > 1.0 || (x2 - x1) < 45.0;
+
+    let mid_x = (x1 + x2) / 2.0;
+    if simple_path {
+        write!(output, "    {}",
+            path().move_to(x1, y1)
+                .cubic_bezier_to(mid_x, y1, mid_x, y2, x2, y2)
+                .fill(Fill::None)
+                .stroke(Stroke::Color(rgb(100, 100, 100), 3.0))
+        ).unwrap();
+    } else {
+        let ctrl1_x = (mid_x + x1) / 2.0;
+        let ctrl2_x = (mid_x + x2) / 2.0;
+        let ctrl_y = y1 - 25.0;
+        write!(output, "    {}",
+            path().move_to(x1, y1)
+                .cubic_bezier_to(ctrl1_x, y1, ctrl1_x, ctrl_y, mid_x, ctrl_y)
+                .cubic_bezier_to(ctrl2_x, ctrl_y, ctrl2_x, y2, x2, y2)
+                .fill(Fill::None)
+                .stroke(Stroke::Color(rgb(100, 100, 100), 3.0))
+        ).unwrap();
+    }
+}
+
+/// Construct a picture cache render task location for testing
+#[cfg(test)]
+fn pc_target(
+    surface_id: u64,
+    tile_x: i32,
+    tile_y: i32,
+) -> RenderTaskLocation {
+    use crate::{
+        composite::{NativeSurfaceId, NativeTileId},
+        picture::ResolvedSurfaceTexture,
+    };
+
+    let width = 512;
+    let height = 512;
+
+    RenderTaskLocation::Static {
+        surface: StaticRenderTaskSurface::PictureCache {
+            surface: ResolvedSurfaceTexture::Native {
+                id: NativeTileId {
+                    surface_id: NativeSurfaceId(surface_id),
+                    x: tile_x,
+                    y: tile_y,
+                },
+                size: DeviceIntSize::new(width, height),
+            },
+        },
+        rect: DeviceIntSize::new(width, height).into(),
+    }
+}
+
+#[cfg(test)]
+impl RenderTaskGraphBuilder {
+    fn test_expect(
+        mut self,
+        pass_count: usize,
+        total_surface_count: usize,
+        unique_surfaces: &[(i32, i32, ImageFormat)],
+    ) {
+        use crate::internal_types::FrameStamp;
+        use api::{DocumentId, IdNamespace};
+
+        let mut rc = ResourceCache::new_for_testing();
+        let mut gc =  GpuCache::new();
+
+        let mut frame_stamp = FrameStamp::first(DocumentId::new(IdNamespace(1), 1));
+        frame_stamp.advance();
+        gc.prepare_for_frames();
+        gc.begin_frame(frame_stamp);
+
+        let g = self.end_frame(&mut rc, &mut gc, &mut Vec::new());
+        g.print();
+
+        assert_eq!(g.passes.len(), pass_count);
+        assert_eq!(g.surface_counts(), (total_surface_count, unique_surfaces.len()));
+
+        rc.validate_surfaces(unique_surfaces);
+    }
+}
+
+/// Construct a testing render task with given location
+#[cfg(test)]
+fn task_location(location: RenderTaskLocation) -> RenderTask {
+    RenderTask::new_test(
+        location,
+        RenderTargetKind::Color,
+    )
+}
+
+/// Construct a dynamic render task location for testing
+#[cfg(test)]
+fn task_dynamic(size: i32) -> RenderTask {
+    RenderTask::new_test(
+        RenderTaskLocation::Unallocated { size: DeviceIntSize::new(size, size) },
+        RenderTargetKind::Color,
+    )
+}
+
+#[test]
+fn fg_test_1() {
+    // Test that a root target can be used as an input for readbacks
+    // This functionality isn't currently used, but will be in future.
+
+    let mut gb = RenderTaskGraphBuilder::new();
+
+    let root_target = pc_target(0, 0, 0);
+
+    let root = gb.add().init(task_location(root_target.clone()));
+
+    let readback = gb.add().init(task_dynamic(100));
+    gb.add_dependency(readback, root);
+
+    let mix_blend_content = gb.add().init(task_dynamic(50));
+
+    let content = gb.add().init(task_location(root_target));
+    gb.add_dependency(content, readback);
+    gb.add_dependency(content, mix_blend_content);
+
+    gb.test_expect(3, 1, &[
+        (2048, 2048, ImageFormat::RGBA8),
+    ]);
+}
+
+#[test]
+fn fg_test_3() {
+    // Test that small targets are allocated in a shared surface, and that large
+    // tasks are allocated in a rounded up texture size.
+
+    let mut gb = RenderTaskGraphBuilder::new();
+
+    let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
+
+    let child_pic_0 = gb.add().init(task_dynamic(128));
+    let child_pic_1 = gb.add().init(task_dynamic(3000));
+
+    gb.add_dependency(pc_root, child_pic_0);
+    gb.add_dependency(pc_root, child_pic_1);
+
+    gb.test_expect(2, 2, &[
+        (2048, 2048, ImageFormat::RGBA8),
+        (3072, 3072, ImageFormat::RGBA8),
+    ]);
+}
+
+#[test]
+fn fg_test_4() {
+    // Test that for a simple dependency chain of tasks, that render
+    // target surfaces are aliased and reused between passes where possible.
+
+    let mut gb = RenderTaskGraphBuilder::new();
+
+    let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
+
+    let child_pic_0 = gb.add().init(task_dynamic(128));
+    let child_pic_1 = gb.add().init(task_dynamic(128));
+    let child_pic_2 = gb.add().init(task_dynamic(128));
+
+    gb.add_dependency(pc_root, child_pic_0);
+    gb.add_dependency(child_pic_0, child_pic_1);
+    gb.add_dependency(child_pic_1, child_pic_2);
+
+    gb.test_expect(4, 3, &[
+        (2048, 2048, ImageFormat::RGBA8),
+        (2048, 2048, ImageFormat::RGBA8),
+    ]);
+}
+
+#[test]
+fn fg_test_5() {
+    // Test that a task that is used as an input by direct parent and also
+    // distance ancestor are scheduled correctly, and allocates the correct
+    // number of passes, taking advantage of surface reuse / aliasing where feasible.
+
+    let mut gb = RenderTaskGraphBuilder::new();
+
+    let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
+
+    let child_pic_0 = gb.add().init(task_dynamic(128));
+    let child_pic_1 = gb.add().init(task_dynamic(64));
+    let child_pic_2 = gb.add().init(task_dynamic(32));
+    let child_pic_3 = gb.add().init(task_dynamic(16));
+
+    gb.add_dependency(pc_root, child_pic_0);
+    gb.add_dependency(child_pic_0, child_pic_1);
+    gb.add_dependency(child_pic_1, child_pic_2);
+    gb.add_dependency(child_pic_2, child_pic_3);
+    gb.add_dependency(pc_root, child_pic_3);
+
+    gb.test_expect(5, 4, &[
+        (2048, 2048, ImageFormat::RGBA8),
+        (2048, 2048, ImageFormat::RGBA8),
+        (2048, 2048, ImageFormat::RGBA8),
+    ]);
+}
+
+#[test]
+fn fg_test_6() {
+    // Test that a task that is used as an input dependency by two parent
+    // tasks is correctly allocated and freed.
+
+    let mut gb = RenderTaskGraphBuilder::new();
+
+    let pc_root_1 = gb.add().init(task_location(pc_target(0, 0, 0)));
+    let pc_root_2 = gb.add().init(task_location(pc_target(0, 1, 0)));
+
+    let child_pic = gb.add().init(task_dynamic(128));
+
+    gb.add_dependency(pc_root_1, child_pic);
+    gb.add_dependency(pc_root_2, child_pic);
+
+    gb.test_expect(2, 1, &[
+        (2048, 2048, ImageFormat::RGBA8),
+    ]);
+}
+
+#[test]
+fn fg_test_7() {
+    // Test that a standalone surface is not incorrectly used to
+    // allocate subsequent shared task rects.
+
+    let mut gb = RenderTaskGraphBuilder::new();
+
+    let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
+
+    let child0 = gb.add().init(task_dynamic(16));
+    let child1 = gb.add().init(task_dynamic(16));
+
+    let child2 = gb.add().init(task_dynamic(16));
+    let child3 = gb.add().init(task_dynamic(16));
+
+    gb.add_dependency(pc_root, child0);
+    gb.add_dependency(child0, child1);
+    gb.add_dependency(pc_root, child1);
+
+    gb.add_dependency(pc_root, child2);
+    gb.add_dependency(child2, child3);
+
+    gb.test_expect(3, 3, &[
+        (2048, 2048, ImageFormat::RGBA8),
+        (2048, 2048, ImageFormat::RGBA8),
+        (2048, 2048, ImageFormat::RGBA8),
+    ]);
+}
diff --git a/gfx/wr/webrender/src/renderer/debug.rs b/gfx/wr/webrender/src/renderer/debug.rs
new file mode 100644
index 0000000000..7e16d15d76
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/debug.rs
@@ -0,0 +1,415 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ColorU, ImageFormat, ImageBufferKind};
+use api::units::*;
+use crate::debug_font_data;
+use crate::device::{Device, Program, Texture, TextureSlot, VertexDescriptor, ShaderError, VAO};
+use crate::device::{TextureFilter, VertexAttribute, VertexAttributeKind, VertexUsageHint};
+use euclid::{Point2D, Rect, Size2D, Transform3D, default};
+use crate::internal_types::Swizzle;
+use std::f32;
+
+#[derive(Debug, Copy, Clone)]
+enum DebugSampler {
+    Font,
+}
+
+impl Into<TextureSlot> for DebugSampler {
+    fn into(self) -> TextureSlot {
+        match self {
+            DebugSampler::Font => TextureSlot(0),
+        }
+    }
+}
+
+const DESC_FONT: VertexDescriptor = VertexDescriptor {
+    vertex_attributes: &[
+        VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::F32,
+        },
+        VertexAttribute {
+            name: "aColor",
+            count: 4,
+            kind: VertexAttributeKind::U8Norm,
+        },
+        VertexAttribute {
+            name: "aColorTexCoord",
+            count: 2,
+            kind: VertexAttributeKind::F32,
+        },
+    ],
+    instance_attributes: &[],
+};
+
+const DESC_COLOR: VertexDescriptor = VertexDescriptor {
+    vertex_attributes: &[
+        VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::F32,
+        },
+        VertexAttribute {
+            name: "aColor",
+            count: 4,
+            kind: VertexAttributeKind::U8Norm,
+        },
+    ],
+    instance_attributes: &[],
+};
+
+#[repr(C)]
+pub struct DebugFontVertex {
+    pub x: f32,
+    pub y: f32,
+    pub color: ColorU,
+    pub u: f32,
+    pub v: f32,
+}
+
+impl DebugFontVertex {
+    pub fn new(x: f32, y: f32, u: f32, v: f32, color: ColorU) -> DebugFontVertex {
+        DebugFontVertex { x, y, color, u, v }
+    }
+}
+
+#[repr(C)]
+pub struct DebugColorVertex {
+    pub x: f32,
+    pub y: f32,
+    pub color: ColorU,
+}
+
+impl DebugColorVertex {
+    pub fn new(x: f32, y: f32, color: ColorU) -> DebugColorVertex {
+        DebugColorVertex { x, y, color }
+    }
+}
+
+pub struct DebugRenderer {
+    font_vertices: Vec<DebugFontVertex>,
+    font_indices: Vec<u32>,
+    font_program: Program,
+    font_vao: VAO,
+    font_texture: Texture,
+
+    tri_vertices: Vec<DebugColorVertex>,
+    tri_indices: Vec<u32>,
+    tri_vao: VAO,
+    line_vertices: Vec<DebugColorVertex>,
+    line_vao: VAO,
+    color_program: Program,
+}
+
+impl DebugRenderer {
+    pub fn new(device: &mut Device) -> Result<Self, ShaderError> {
+        let font_program = device.create_program_linked(
+            "debug_font",
+            &[],
+            &DESC_FONT,
+        )?;
+        device.bind_program(&font_program);
+        device.bind_shader_samplers(&font_program, &[("sColor0", DebugSampler::Font)]);
+
+        let color_program = device.create_program_linked(
+            "debug_color",
+            &[],
+            &DESC_COLOR,
+        )?;
+
+        let font_vao = device.create_vao(&DESC_FONT, 1);
+        let line_vao = device.create_vao(&DESC_COLOR, 1);
+        let tri_vao = device.create_vao(&DESC_COLOR, 1);
+
+        let font_texture = device.create_texture(
+            ImageBufferKind::Texture2D,
+            ImageFormat::R8,
+            debug_font_data::BMP_WIDTH,
+            debug_font_data::BMP_HEIGHT,
+            TextureFilter::Linear,
+            None,
+        );
+        device.upload_texture_immediate(
+            &font_texture,
+            &debug_font_data::FONT_BITMAP
+        );
+
+        Ok(DebugRenderer {
+            font_vertices: Vec::new(),
+            font_indices: Vec::new(),
+            line_vertices: Vec::new(),
+            tri_vao,
+            tri_vertices: Vec::new(),
+            tri_indices: Vec::new(),
+            font_program,
+            color_program,
+            font_vao,
+            line_vao,
+            font_texture,
+        })
+    }
+
+    pub fn deinit(self, device: &mut Device) {
+        device.delete_texture(self.font_texture);
+        device.delete_program(self.font_program);
+        device.delete_program(self.color_program);
+        device.delete_vao(self.tri_vao);
+        device.delete_vao(self.line_vao);
+        device.delete_vao(self.font_vao);
+    }
+
+    pub fn line_height(&self) -> f32 {
+        debug_font_data::FONT_SIZE as f32 * 1.1
+    }
+
+    /// Draws a line of text at the provided starting coordinates.
+    ///
+    /// If |bounds| is specified, glyphs outside the bounds are discarded.
+    ///
+    /// Y-coordinates is relative to screen top, along with everything else in
+    /// this file.
+    pub fn add_text(
+        &mut self,
+        x: f32,
+        y: f32,
+        text: &str,
+        color: ColorU,
+        bounds: Option<DeviceRect>,
+    ) -> default::Rect<f32> {
+        let mut x_start = x;
+        let ipw = 1.0 / debug_font_data::BMP_WIDTH as f32;
+        let iph = 1.0 / debug_font_data::BMP_HEIGHT as f32;
+
+        let mut min_x = f32::MAX;
+        let mut max_x = -f32::MAX;
+        let mut min_y = f32::MAX;
+        let mut max_y = -f32::MAX;
+
+        for c in text.chars() {
+            let c = c as usize - debug_font_data::FIRST_GLYPH_INDEX as usize;
+            if c < debug_font_data::GLYPHS.len() {
+                let glyph = &debug_font_data::GLYPHS[c];
+
+                let x0 = (x_start + glyph.xo + 0.5).floor();
+                let y0 = (y + glyph.yo + 0.5).floor();
+
+                let x1 = x0 + glyph.x1 as f32 - glyph.x0 as f32;
+                let y1 = y0 + glyph.y1 as f32 - glyph.y0 as f32;
+
+                // If either corner of the glyph will end up out of bounds, drop it.
+                if let Some(b) = bounds {
+                    let rect = DeviceRect {
+                        min: DevicePoint::new(x0, y0),
+                        max: DevicePoint::new(x1, y1),
+                    };
+                    if !b.contains_box(&rect) {
+                        continue;
+                    }
+                }
+
+                let s0 = glyph.x0 as f32 * ipw;
+                let t0 = glyph.y0 as f32 * iph;
+                let s1 = glyph.x1 as f32 * ipw;
+                let t1 = glyph.y1 as f32 * iph;
+
+                x_start += glyph.xa;
+
+                let vertex_count = self.font_vertices.len() as u32;
+
+                self.font_vertices
+                    .push(DebugFontVertex::new(x0, y0, s0, t0, color));
+                self.font_vertices
+                    .push(DebugFontVertex::new(x1, y0, s1, t0, color));
+                self.font_vertices
+                    .push(DebugFontVertex::new(x0, y1, s0, t1, color));
+                self.font_vertices
+                    .push(DebugFontVertex::new(x1, y1, s1, t1, color));
+
+                self.font_indices.push(vertex_count + 0);
+                self.font_indices.push(vertex_count + 1);
+                self.font_indices.push(vertex_count + 2);
+                self.font_indices.push(vertex_count + 2);
+                self.font_indices.push(vertex_count + 1);
+                self.font_indices.push(vertex_count + 3);
+
+                min_x = min_x.min(x0);
+                max_x = max_x.max(x1);
+                min_y = min_y.min(y0);
+                max_y = max_y.max(y1);
+            }
+        }
+
+        Rect::new(
+            Point2D::new(min_x, min_y),
+            Size2D::new(max_x - min_x, max_y - min_y),
+        )
+    }
+
+    pub fn add_quad(
+        &mut self,
+        x0: f32,
+        y0: f32,
+        x1: f32,
+        y1: f32,
+        color_top: ColorU,
+        color_bottom: ColorU,
+    ) {
+        let vertex_count = self.tri_vertices.len() as u32;
+
+        self.tri_vertices
+            .push(DebugColorVertex::new(x0, y0, color_top));
+        self.tri_vertices
+            .push(DebugColorVertex::new(x1, y0, color_top));
+        self.tri_vertices
+            .push(DebugColorVertex::new(x0, y1, color_bottom));
+        self.tri_vertices
+            .push(DebugColorVertex::new(x1, y1, color_bottom));
+
+        self.tri_indices.push(vertex_count + 0);
+        self.tri_indices.push(vertex_count + 1);
+        self.tri_indices.push(vertex_count + 2);
+        self.tri_indices.push(vertex_count + 2);
+        self.tri_indices.push(vertex_count + 1);
+        self.tri_indices.push(vertex_count + 3);
+    }
+
+    #[allow(dead_code)]
+    pub fn add_line(&mut self, x0: i32, y0: i32, color0: ColorU, x1: i32, y1: i32, color1: ColorU) {
+        self.line_vertices
+            .push(DebugColorVertex::new(x0 as f32, y0 as f32, color0));
+        self.line_vertices
+            .push(DebugColorVertex::new(x1 as f32, y1 as f32, color1));
+    }
+
+
+    pub fn add_rect(&mut self, rect: &DeviceIntRect, color: ColorU) {
+        let p0 = rect.min;
+        let p1 = rect.max;
+        self.add_line(p0.x, p0.y, color, p1.x, p0.y, color);
+        self.add_line(p1.x, p0.y, color, p1.x, p1.y, color);
+        self.add_line(p1.x, p1.y, color, p0.x, p1.y, color);
+        self.add_line(p0.x, p1.y, color, p0.x, p0.y, color);
+    }
+
+    pub fn render(
+        &mut self,
+        device: &mut Device,
+        viewport_size: Option<DeviceIntSize>,
+        scale: f32,
+        surface_origin_is_top_left: bool,
+    ) {
+        if let Some(viewport_size) = viewport_size {
+            device.disable_depth();
+            device.set_blend(true);
+            device.set_blend_mode_premultiplied_alpha();
+
+            let (bottom, top) = if surface_origin_is_top_left {
+                (0.0, viewport_size.height as f32 * scale)
+            } else {
+                (viewport_size.height as f32 * scale, 0.0)
+            };
+
+            let projection = Transform3D::ortho(
+                0.0,
+                viewport_size.width as f32 * scale,
+                bottom,
+                top,
+                device.ortho_near_plane(),
+                device.ortho_far_plane(),
+            );
+
+            // Triangles
+            if !self.tri_vertices.is_empty() {
+                device.bind_program(&self.color_program);
+                device.set_uniforms(&self.color_program, &projection);
+                device.bind_vao(&self.tri_vao);
+                device.update_vao_indices(&self.tri_vao, &self.tri_indices, VertexUsageHint::Dynamic);
+                device.update_vao_main_vertices(
+                    &self.tri_vao,
+                    &self.tri_vertices,
+                    VertexUsageHint::Dynamic,
+                );
+                device.draw_triangles_u32(0, self.tri_indices.len() as i32);
+            }
+
+            // Lines
+            if !self.line_vertices.is_empty() {
+                device.bind_program(&self.color_program);
+                device.set_uniforms(&self.color_program, &projection);
+                device.bind_vao(&self.line_vao);
+                device.update_vao_main_vertices(
+                    &self.line_vao,
+                    &self.line_vertices,
+                    VertexUsageHint::Dynamic,
+                );
+                device.draw_nonindexed_lines(0, self.line_vertices.len() as i32);
+            }
+
+            // Glyph
+            if !self.font_indices.is_empty() {
+                device.bind_program(&self.font_program);
+                device.set_uniforms(&self.font_program, &projection);
+                device.bind_texture(DebugSampler::Font, &self.font_texture, Swizzle::default());
+                device.bind_vao(&self.font_vao);
+                device.update_vao_indices(&self.font_vao, &self.font_indices, VertexUsageHint::Dynamic);
+                device.update_vao_main_vertices(
+                    &self.font_vao,
+                    &self.font_vertices,
+                    VertexUsageHint::Dynamic,
+                );
+                device.draw_triangles_u32(0, self.font_indices.len() as i32);
+            }
+        }
+
+        self.font_indices.clear();
+        self.font_vertices.clear();
+        self.line_vertices.clear();
+        self.tri_vertices.clear();
+        self.tri_indices.clear();
+    }
+}
+
+pub struct LazyInitializedDebugRenderer {
+    debug_renderer: Option<DebugRenderer>,
+    failed: bool,
+}
+
+impl LazyInitializedDebugRenderer {
+    pub fn new() -> Self {
+        Self {
+            debug_renderer: None,
+            failed: false,
+        }
+    }
+
+    pub fn get_mut<'a>(&'a mut self, device: &mut Device) -> Option<&'a mut DebugRenderer> {
+        if self.failed {
+            return None;
+        }
+        if self.debug_renderer.is_none() {
+            match DebugRenderer::new(device) {
+                Ok(renderer) => { self.debug_renderer = Some(renderer); }
+                Err(_) => {
+                    // The shader compilation code already logs errors.
+                    self.failed = true;
+                }
+            }
+        }
+
+        self.debug_renderer.as_mut()
+    }
+
+    /// Returns mut ref to `debug::DebugRenderer` if one already exists, otherwise returns `None`.
+    pub fn try_get_mut<'a>(&'a mut self) -> Option<&'a mut DebugRenderer> {
+        self.debug_renderer.as_mut()
+    }
+
+    pub fn deinit(self, device: &mut Device) {
+        if let Some(debug_renderer) = self.debug_renderer {
+            debug_renderer.deinit(device);
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/renderer/gpu_buffer.rs b/gfx/wr/webrender/src/renderer/gpu_buffer.rs
new file mode 100644
index 0000000000..52a312f89b
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/gpu_buffer.rs
@@ -0,0 +1,266 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+
+    TODO:
+        Recycle GpuBuffers in a pool (support return from render thread)
+        Efficiently allow writing to buffer (better push interface)
+        Support other texel types (e.g. i32)
+
+ */
+
+use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH;
+use api::units::{DeviceIntRect, DeviceIntSize, LayoutRect};
+use api::{ColorF, PremultipliedColorF};
+use crate::device::Texel;
+use crate::render_task_graph::{RenderTaskGraph, RenderTaskId};
+
+
+unsafe impl Texel for GpuBufferBlock {}
+
+/// A single texel in RGBAF32 texture - 16 bytes.
+#[derive(Copy, Clone, Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuBufferBlock {
+    data: [f32; 4],
+}
+
+#[derive(Copy, Debug, Clone, MallocSizeOf, Eq, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuBufferAddress {
+    pub u: u16,
+    pub v: u16,
+}
+
+impl GpuBufferAddress {
+    #[allow(dead_code)]
+    pub fn as_int(self) -> i32 {
+        // TODO(gw): Temporarily encode GPU Cache addresses as a single int.
+        //           In the future, we can change the PrimitiveInstanceData struct
+        //           to use 2x u16 for the vertex attribute instead of an i32.
+        self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32
+    }
+}
+
+impl GpuBufferBlock {
+    pub const EMPTY: Self = GpuBufferBlock { data: [0.0; 4] };
+}
+
+impl Into<GpuBufferBlock> for LayoutRect {
+    fn into(self) -> GpuBufferBlock {
+        GpuBufferBlock {
+            data: [
+                self.min.x,
+                self.min.y,
+                self.max.x,
+                self.max.y,
+            ],
+        }
+    }
+}
+
+impl Into<GpuBufferBlock> for ColorF {
+    fn into(self) -> GpuBufferBlock {
+        GpuBufferBlock {
+            data: [
+                self.r,
+                self.g,
+                self.b,
+                self.a,
+            ],
+        }
+    }
+}
+
+impl Into<GpuBufferBlock> for PremultipliedColorF {
+    fn into(self) -> GpuBufferBlock {
+        GpuBufferBlock {
+            data: [
+                self.r,
+                self.g,
+                self.b,
+                self.a,
+            ],
+        }
+    }
+}
+
+impl Into<GpuBufferBlock> for DeviceIntRect {
+    fn into(self) -> GpuBufferBlock {
+        GpuBufferBlock {
+            data: [
+                self.min.x as f32,
+                self.min.y as f32,
+                self.max.x as f32,
+                self.max.y as f32,
+            ],
+        }
+    }
+}
+
+/// Record a patch to the GPU buffer for a render task
+struct DeferredBlock {
+    task_id: RenderTaskId,
+    index: usize,
+}
+
+/// Interface to allow writing multiple GPU blocks, possibly of different types
+pub struct GpuBufferWriter<'a> {
+    buffer: &'a mut Vec<GpuBufferBlock>,
+    deferred: &'a mut Vec<DeferredBlock>,
+    index: usize,
+    block_count: usize,
+}
+
+impl<'a> GpuBufferWriter<'a> {
+    fn new(
+        buffer: &'a mut Vec<GpuBufferBlock>,
+        deferred: &'a mut Vec<DeferredBlock>,
+        index: usize,
+        block_count: usize,
+    ) -> Self {
+        GpuBufferWriter {
+            buffer,
+            deferred,
+            index,
+            block_count,
+        }
+    }
+
+    /// Push one (16 byte) block of data in to the writer
+    pub fn push_one<B>(&mut self, block: B) where B: Into<GpuBufferBlock> {
+        self.buffer.push(block.into());
+    }
+
+    /// Push a reference to a render task in to the writer. Once the render
+    /// task graph is resolved, this will be patched with the UV rect of the task
+    pub fn push_render_task(&mut self, task_id: RenderTaskId) {
+        self.deferred.push(DeferredBlock {
+            task_id,
+            index: self.buffer.len(),
+        });
+        self.buffer.push(GpuBufferBlock::EMPTY);
+    }
+
+    /// Close this writer, returning the GPU address of this set of block(s).
+    pub fn finish(self) -> GpuBufferAddress {
+        assert_eq!(self.buffer.len(), self.index + self.block_count);
+
+        GpuBufferAddress {
+            u: (self.index % MAX_VERTEX_TEXTURE_WIDTH) as u16,
+            v: (self.index / MAX_VERTEX_TEXTURE_WIDTH) as u16,
+        }
+    }
+}
+
+impl<'a> Drop for GpuBufferWriter<'a> {
+    fn drop(&mut self) {
+        assert_eq!(self.buffer.len(), self.index + self.block_count, "Claimed block_count was not written");
+    }
+}
+
+pub struct GpuBufferBuilder {
+    data: Vec<GpuBufferBlock>,
+    deferred: Vec<DeferredBlock>,
+}
+
+impl GpuBufferBuilder {
+    pub fn new() -> Self {
+        GpuBufferBuilder {
+            data: Vec::new(),
+            deferred: Vec::new(),
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn push(
+        &mut self,
+        blocks: &[GpuBufferBlock],
+    ) -> GpuBufferAddress {
+        assert!(blocks.len() < MAX_VERTEX_TEXTURE_WIDTH);
+
+        if self.data.len() + blocks.len() >= MAX_VERTEX_TEXTURE_WIDTH {
+            while self.data.len() % MAX_VERTEX_TEXTURE_WIDTH != 0 {
+                self.data.push(GpuBufferBlock::EMPTY);
+            }
+        }
+
+        let index = self.data.len();
+
+        self.data.extend_from_slice(blocks);
+
+        GpuBufferAddress {
+            u: (index % MAX_VERTEX_TEXTURE_WIDTH) as u16,
+            v: (index / MAX_VERTEX_TEXTURE_WIDTH) as u16,
+        }
+    }
+
+    /// Begin writing a specific number of blocks
+    pub fn write_blocks(
+        &mut self,
+        block_count: usize,
+    ) -> GpuBufferWriter {
+        assert!(block_count < MAX_VERTEX_TEXTURE_WIDTH);
+
+        if self.data.len() + block_count >= MAX_VERTEX_TEXTURE_WIDTH {
+            while self.data.len() % MAX_VERTEX_TEXTURE_WIDTH != 0 {
+                self.data.push(GpuBufferBlock::EMPTY);
+            }
+        }
+
+        let index = self.data.len();
+
+        GpuBufferWriter::new(
+            &mut self.data,
+            &mut self.deferred,
+            index,
+            block_count,
+        )
+    }
+
+    pub fn finalize(
+        mut self,
+        render_tasks: &RenderTaskGraph,
+    ) -> GpuBuffer {
+        let required_len = (self.data.len() + MAX_VERTEX_TEXTURE_WIDTH-1) & !(MAX_VERTEX_TEXTURE_WIDTH-1);
+
+        for _ in 0 .. required_len - self.data.len() {
+            self.data.push(GpuBufferBlock::EMPTY);
+        }
+
+        let len = self.data.len();
+        assert!(len % MAX_VERTEX_TEXTURE_WIDTH == 0);
+
+        // At this point, we know that the render task graph has been built, and we can
+        // query the location of any dynamic (render target) or static (texture cache)
+        // task. This allows us to patch the UV rects in to the GPU buffer before upload
+        // to the GPU.
+        for block in self.deferred.drain(..) {
+            let render_task = &render_tasks[block.task_id];
+            let target_rect = render_task.get_target_rect();
+            self.data[block.index] = target_rect.into();
+        }
+
+        GpuBuffer {
+            data: self.data,
+            size: DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as i32, (len / MAX_VERTEX_TEXTURE_WIDTH) as i32),
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuBuffer {
+    pub data: Vec<GpuBufferBlock>,
+    pub size: DeviceIntSize,
+}
+
+impl GpuBuffer {
+    pub fn is_empty(&self) -> bool {
+        self.data.is_empty()
+    }
+}
diff --git a/gfx/wr/webrender/src/renderer/gpu_cache.rs b/gfx/wr/webrender/src/renderer/gpu_cache.rs
new file mode 100644
index 0000000000..fde649cb08
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/gpu_cache.rs
@@ -0,0 +1,525 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use std::{cmp, mem};
+use api::units::*;
+use malloc_size_of::MallocSizeOfOps;
+use crate::{
+    device::{CustomVAO, Device, DrawTarget, Program, ReadTarget, Texture, TextureFilter, UploadPBOPool, VBO},
+    gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList},
+    internal_types::{RenderTargetInfo, Swizzle},
+    prim_store::DeferredResolve,
+    profiler,
+    render_api::MemoryReport,
+    internal_types::FrameId,
+};
+
+/// Enabling this toggle would force the GPU cache scattered texture to
+/// be resized every frame, which enables GPU debuggers to see if this
+/// is performed correctly.
+const GPU_CACHE_RESIZE_TEST: bool = false;
+
+/// Tracks the state of each row in the GPU cache texture.
+struct CacheRow {
+    /// Mirrored block data on CPU for this row. We store a copy of
+    /// the data on the CPU side to improve upload batching.
+    cpu_blocks: Box<[GpuBlockData; super::MAX_VERTEX_TEXTURE_WIDTH]>,
+    /// The first offset in this row that is dirty.
+    min_dirty: u16,
+    /// The last offset in this row that is dirty.
+    max_dirty: u16,
+}
+
+impl CacheRow {
+    fn new() -> Self {
+        CacheRow {
+            cpu_blocks: Box::new([GpuBlockData::EMPTY; super::MAX_VERTEX_TEXTURE_WIDTH]),
+            min_dirty: super::MAX_VERTEX_TEXTURE_WIDTH as _,
+            max_dirty: 0,
+        }
+    }
+
+    fn is_dirty(&self) -> bool {
+        return self.min_dirty < self.max_dirty;
+    }
+
+    fn clear_dirty(&mut self) {
+        self.min_dirty = super::MAX_VERTEX_TEXTURE_WIDTH as _;
+        self.max_dirty = 0;
+    }
+
+    fn add_dirty(&mut self, block_offset: usize, block_count: usize) {
+        self.min_dirty = self.min_dirty.min(block_offset as _);
+        self.max_dirty = self.max_dirty.max((block_offset + block_count) as _);
+    }
+
+    fn dirty_blocks(&self) -> &[GpuBlockData] {
+        return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize];
+    }
+}
+
+/// The bus over which CPU and GPU versions of the GPU cache
+/// get synchronized.
+enum GpuCacheBus {
+    /// PBO-based updates, currently operate on a row granularity.
+    /// Therefore, are subject to fragmentation issues.
+    PixelBuffer {
+        /// Per-row data.
+        rows: Vec<CacheRow>,
+    },
+    /// Shader-based scattering updates. Currently rendered by a set
+    /// of points into the GPU texture, each carrying a `GpuBlockData`.
+    Scatter {
+        /// Special program to run the scattered update.
+        program: Program,
+        /// VAO containing the source vertex buffers.
+        vao: CustomVAO,
+        /// VBO for positional data, supplied as normalized `u16`.
+        buf_position: VBO<[u16; 2]>,
+        /// VBO for gpu block data.
+        buf_value: VBO<GpuBlockData>,
+        /// Currently stored block count.
+        count: usize,
+    },
+}
+
+/// The device-specific representation of the cache texture in gpu_cache.rs
+pub struct GpuCacheTexture {
+    texture: Option<Texture>,
+    bus: GpuCacheBus,
+}
+
+impl GpuCacheTexture {
+    /// Ensures that we have an appropriately-sized texture.
+    fn ensure_texture(&mut self, device: &mut Device, height: i32) {
+        // If we already have a texture that works, we're done.
+        if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
+            if GPU_CACHE_RESIZE_TEST {
+                // Special debug mode - resize the texture even though it's fine.
+            } else {
+                return;
+            }
+        }
+
+        // Take the old texture, if any.
+        let blit_source = self.texture.take();
+
+        // Create the new texture.
+        assert!(height >= 2, "Height is too small for ANGLE");
+        let new_size = DeviceIntSize::new(super::MAX_VERTEX_TEXTURE_WIDTH as _, height);
+        // GpuCacheBus::Scatter always requires the texture to be a render target. For
+        // GpuCacheBus::PixelBuffer, we only create the texture with a render target if
+        // RGBAF32 render targets are actually supported, and only if glCopyImageSubData
+        // is not. glCopyImageSubData does not require a render target to copy the texture
+        // data, and if neither RGBAF32 render targets nor glCopyImageSubData is supported,
+        // we simply re-upload the entire contents rather than copying upon resize.
+        let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data;
+        let supports_color_buffer_float = device.get_capabilities().supports_color_buffer_float;
+        let rt_info = if matches!(self.bus, GpuCacheBus::PixelBuffer { .. })
+            && (supports_copy_image_sub_data || !supports_color_buffer_float)
+        {
+            None
+        } else {
+            Some(RenderTargetInfo { has_depth: false })
+        };
+        let mut texture = device.create_texture(
+            api::ImageBufferKind::Texture2D,
+            api::ImageFormat::RGBAF32,
+            new_size.width,
+            new_size.height,
+            TextureFilter::Nearest,
+            rt_info,
+        );
+
+        // Copy the contents of the previous texture, if applicable.
+        if let Some(blit_source) = blit_source {
+            if !supports_copy_image_sub_data && !supports_color_buffer_float {
+                // Cannot copy texture, so must re-upload everything.
+                match self.bus {
+                    GpuCacheBus::PixelBuffer { ref mut rows } => {
+                        for row in rows {
+                            row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
+                        }
+                    }
+                    GpuCacheBus::Scatter { .. } => {
+                        panic!("Texture must be copyable to use scatter GPU cache bus method");
+                    }
+                }
+            } else {
+                device.copy_entire_texture(&mut texture, &blit_source);
+            }
+            device.delete_texture(blit_source);
+        }
+
+        self.texture = Some(texture);
+    }
+
+    pub fn new(device: &mut Device, use_scatter: bool) -> Result<Self, super::RendererError> {
+        use super::desc::GPU_CACHE_UPDATE;
+
+        let bus = if use_scatter {
+            assert!(
+                device.get_capabilities().supports_color_buffer_float,
+                "GpuCache scatter method requires EXT_color_buffer_float",
+            );
+            let program = device.create_program_linked(
+                "gpu_cache_update",
+                &[],
+                &GPU_CACHE_UPDATE,
+            )?;
+            let buf_position = device.create_vbo();
+            let buf_value = device.create_vbo();
+            //Note: the vertex attributes have to be supplied in the same order
+            // as for program creation, but each assigned to a different stream.
+            let vao = device.create_custom_vao(&[
+                buf_position.stream_with(&GPU_CACHE_UPDATE.vertex_attributes[0..1]),
+                buf_value   .stream_with(&GPU_CACHE_UPDATE.vertex_attributes[1..2]),
+            ]);
+            GpuCacheBus::Scatter {
+                program,
+                vao,
+                buf_position,
+                buf_value,
+                count: 0,
+            }
+        } else {
+            GpuCacheBus::PixelBuffer {
+                rows: Vec::new(),
+            }
+        };
+
+        Ok(GpuCacheTexture {
+            texture: None,
+            bus,
+        })
+    }
+
+    pub fn deinit(mut self, device: &mut Device) {
+        if let Some(t) = self.texture.take() {
+            device.delete_texture(t);
+        }
+        if let GpuCacheBus::Scatter { program, vao, buf_position, buf_value, .. } = self.bus {
+            device.delete_program(program);
+            device.delete_custom_vao(vao);
+            device.delete_vbo(buf_position);
+            device.delete_vbo(buf_value);
+        }
+    }
+
+    pub fn get_height(&self) -> i32 {
+        self.texture.as_ref().map_or(0, |t| t.get_dimensions().height)
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn get_texture(&self) -> &Texture {
+        self.texture.as_ref().unwrap()
+    }
+
+    fn prepare_for_updates(
+        &mut self,
+        device: &mut Device,
+        total_block_count: usize,
+        max_height: i32,
+    ) {
+        self.ensure_texture(device, max_height);
+        match self.bus {
+            GpuCacheBus::PixelBuffer { .. } => {},
+            GpuCacheBus::Scatter {
+                ref mut buf_position,
+                ref mut buf_value,
+                ref mut count,
+                ..
+            } => {
+                *count = 0;
+                if total_block_count > buf_value.allocated_count() {
+                    device.allocate_vbo(buf_position, total_block_count, super::ONE_TIME_USAGE_HINT);
+                    device.allocate_vbo(buf_value,    total_block_count, super::ONE_TIME_USAGE_HINT);
+                }
+            }
+        }
+    }
+
+    pub fn invalidate(&mut self) {
+        match self.bus {
+            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
+                info!("Invalidating GPU caches");
+                for row in rows {
+                    row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
+                }
+            }
+            GpuCacheBus::Scatter { .. } => {
+                warn!("Unable to invalidate scattered GPU cache");
+            }
+        }
+    }
+
+    fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
+        match self.bus {
+            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
+                for update in &updates.updates {
+                    match *update {
+                        GpuCacheUpdate::Copy {
+                            block_index,
+                            block_count,
+                            address,
+                        } => {
+                            let row = address.v as usize;
+
+                            // Ensure that the CPU-side shadow copy of the GPU cache data has enough
+                            // rows to apply this patch.
+                            while rows.len() <= row {
+                                // Add a new row.
+                                rows.push(CacheRow::new());
+                            }
+
+                            // Copy the blocks from the patch array in the shadow CPU copy.
+                            let block_offset = address.u as usize;
+                            let data = &mut rows[row].cpu_blocks;
+                            for i in 0 .. block_count {
+                                data[block_offset + i] = updates.blocks[block_index + i];
+                            }
+
+                            // This row is dirty (needs to be updated in GPU texture).
+                            rows[row].add_dirty(block_offset, block_count);
+                        }
+                    }
+                }
+            }
+            GpuCacheBus::Scatter {
+                ref buf_position,
+                ref buf_value,
+                ref mut count,
+                ..
+            } => {
+                //TODO: re-use this heap allocation
+                // Unused positions will be left as 0xFFFF, which translates to
+                // (1.0, 1.0) in the vertex output position and gets culled out
+                let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
+                let size = self.texture.as_ref().unwrap().get_dimensions().to_usize();
+
+                for update in &updates.updates {
+                    match *update {
+                        GpuCacheUpdate::Copy {
+                            block_index,
+                            block_count,
+                            address,
+                        } => {
+                            // Convert the absolute texel position into normalized
+                            let y = ((2*address.v as usize + 1) << 15) / size.height;
+                            for i in 0 .. block_count {
+                                let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
+                                position_data[block_index + i] = [x as _, y as _];
+                            }
+                        }
+                    }
+                }
+
+                device.fill_vbo(buf_value, &updates.blocks, *count);
+                device.fill_vbo(buf_position, &position_data, *count);
+                *count += position_data.len();
+            }
+        }
+    }
+
+    fn flush(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool) -> usize {
+        let texture = self.texture.as_ref().unwrap();
+        match self.bus {
+            GpuCacheBus::PixelBuffer { ref mut rows } => {
+                let rows_dirty = rows
+                    .iter()
+                    .filter(|row| row.is_dirty())
+                    .count();
+                if rows_dirty == 0 {
+                    return 0
+                }
+
+                let mut uploader = device.upload_texture(pbo_pool);
+
+                for (row_index, row) in rows.iter_mut().enumerate() {
+                    if !row.is_dirty() {
+                        continue;
+                    }
+
+                    let blocks = row.dirty_blocks();
+                    let rect = DeviceIntRect::from_origin_and_size(
+                        DeviceIntPoint::new(row.min_dirty as i32, row_index as i32),
+                        DeviceIntSize::new(blocks.len() as i32, 1),
+                    );
+
+                    uploader.upload(device, texture, rect, None, None, blocks.as_ptr(), blocks.len());
+
+                    row.clear_dirty();
+                }
+
+                uploader.flush(device);
+
+                rows_dirty
+            }
+            GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
+                device.disable_depth();
+                device.set_blend(false);
+                device.bind_program(program);
+                device.bind_custom_vao(vao);
+                device.bind_draw_target(
+                    DrawTarget::from_texture(
+                        texture,
+                        false,
+                    ),
+                );
+                device.draw_nonindexed_points(0, count as _);
+                0
+            }
+        }
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn remove_texture(&mut self, device: &mut Device) {
+        if let Some(t) = self.texture.take() {
+            device.delete_texture(t);
+        }
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn load_from_data(&mut self, texture: Texture, data: Vec<u8>) {
+        assert!(self.texture.is_none());
+        match self.bus {
+            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
+                let dim = texture.get_dimensions();
+                let blocks = unsafe {
+                    std::slice::from_raw_parts(
+                        data.as_ptr() as *const GpuBlockData,
+                        data.len() / mem::size_of::<GpuBlockData>(),
+                    )
+                };
+                // fill up the CPU cache from the contents we just loaded
+                rows.clear();
+                rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
+                let chunks = blocks.chunks(super::MAX_VERTEX_TEXTURE_WIDTH);
+                debug_assert_eq!(chunks.len(), rows.len());
+                for (row, chunk) in rows.iter_mut().zip(chunks) {
+                    row.cpu_blocks.copy_from_slice(chunk);
+                }
+            }
+            GpuCacheBus::Scatter { .. } => {}
+        }
+        self.texture = Some(texture);
+    }
+
+    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
+        if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.bus {
+            for row in rows.iter() {
+                report.gpu_cache_cpu_mirror += unsafe { (size_op_funs.size_of_op)(row.cpu_blocks.as_ptr() as *const _) };
+            }
+        }
+
+        // GPU cache GPU memory.
+        report.gpu_cache_textures +=
+            self.texture.as_ref().map_or(0, |t| t.size_in_bytes());
+    }
+}
+
+impl super::Renderer {
+    pub fn update_gpu_cache(&mut self) {
+        let _gm = self.gpu_profiler.start_marker("gpu cache update");
+
+        // For an artificial stress test of GPU cache resizing,
+        // always pass an extra update list with at least one block in it.
+        let gpu_cache_height = self.gpu_cache_texture.get_height();
+        if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST {
+            self.pending_gpu_cache_updates.push(GpuCacheUpdateList {
+                frame_id: FrameId::INVALID,
+                clear: false,
+                height: gpu_cache_height,
+                blocks: vec![[1f32; 4].into()],
+                updates: Vec::new(),
+                debug_commands: Vec::new(),
+            });
+        }
+
+        let (updated_blocks, max_requested_height) = self
+            .pending_gpu_cache_updates
+            .iter()
+            .fold((0, gpu_cache_height), |(count, height), list| {
+                (count + list.blocks.len(), cmp::max(height, list.height))
+            });
+
+        if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow {
+            self.gpu_cache_overflow = true;
+            self.renderer_errors.push(super::RendererError::MaxTextureSize);
+        }
+
+        // Note: if we decide to switch to scatter-style GPU cache update
+        // permanently, we can have this code nicer with `BufferUploader` kind
+        // of helper, similarly to how `TextureUploader` API is used.
+        self.gpu_cache_texture.prepare_for_updates(
+            &mut self.device,
+            updated_blocks,
+            max_requested_height,
+        );
+
+        for update_list in self.pending_gpu_cache_updates.drain(..) {
+            assert!(update_list.height <= max_requested_height);
+            if update_list.frame_id > self.gpu_cache_frame_id {
+                self.gpu_cache_frame_id = update_list.frame_id
+            }
+            self.gpu_cache_texture
+                .update(&mut self.device, &update_list);
+        }
+
+        self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME);
+        let updated_rows = self.gpu_cache_texture.flush(
+            &mut self.device,
+            &mut self.texture_upload_pbo_pool
+        );
+        self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME);
+
+        self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows);
+        self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks);
+    }
+
+    pub fn prepare_gpu_cache(
+        &mut self,
+        deferred_resolves: &[DeferredResolve],
+    ) -> Result<(), super::RendererError> {
+        if self.pending_gpu_cache_clear {
+            let use_scatter =
+                matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
+            let new_cache = GpuCacheTexture::new(&mut self.device, use_scatter)?;
+            let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
+            old_cache.deinit(&mut self.device);
+            self.pending_gpu_cache_clear = false;
+        }
+
+        let deferred_update_list = self.update_deferred_resolves(deferred_resolves);
+        self.pending_gpu_cache_updates.extend(deferred_update_list);
+
+        self.update_gpu_cache();
+
+        // Note: the texture might have changed during the `update`,
+        // so we need to bind it here.
+        self.device.bind_texture(
+            super::TextureSampler::GpuCache,
+            self.gpu_cache_texture.texture.as_ref().unwrap(),
+            Swizzle::default(),
+        );
+
+        Ok(())
+    }
+
+    pub fn read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>) {
+        let texture = self.gpu_cache_texture.texture.as_ref().unwrap();
+        let size = device_size_as_framebuffer_size(texture.get_dimensions());
+        let mut texels = vec![0; (size.width * size.height * 16) as usize];
+        self.device.begin_frame();
+        self.device.bind_read_target(ReadTarget::from_texture(texture));
+        self.device.read_pixels_into(
+            size.into(),
+            api::ImageFormat::RGBAF32,
+            &mut texels,
+        );
+        self.device.reset_read_target();
+        self.device.end_frame();
+        (texture.get_dimensions(), texels)
+    }
+}
diff --git a/gfx/wr/webrender/src/renderer/init.rs b/gfx/wr/webrender/src/renderer/init.rs
new file mode 100644
index 0000000000..38274b20b9
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/init.rs
@@ -0,0 +1,783 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{BlobImageHandler, ColorF, IdNamespace, DocumentId, CrashAnnotator};
+use api::{VoidPtrToSizeFn, FontRenderMode, ImageFormat};
+use api::{RenderNotifier, ImageBufferKind};
+use api::units::*;
+use api::channel::unbounded_channel;
+pub use api::DebugFlags;
+
+use crate::render_api::{RenderApiSender, FrameMsg};
+use crate::composite::{CompositorKind, CompositorConfig};
+use crate::device::{
+    UploadMethod, UploadPBOPool, VertexUsageHint, Device, ProgramCache, TextureFilter
+};
+use crate::frame_builder::FrameBuilderConfig;
+use crate::glyph_cache::GlyphCache;
+use glyph_rasterizer::{GlyphRasterizer, SharedFontResources};
+use crate::gpu_types::PrimitiveInstanceData;
+use crate::internal_types::{FastHashMap, FastHashSet, FrameId};
+use crate::picture;
+use crate::profiler::{self, Profiler, TransactionProfile};
+use crate::device::query::{GpuProfiler, GpuDebugMethod};
+use crate::render_backend::RenderBackend;
+use crate::resource_cache::ResourceCache;
+use crate::scene_builder_thread::{SceneBuilderThread, SceneBuilderThreadChannels, LowPrioritySceneBuilderThread};
+use crate::texture_cache::{TextureCache, TextureCacheConfig};
+use crate::picture_textures::PictureTextures;
+use crate::renderer::{
+    debug, gpu_cache, vertex, gl,
+    Renderer, DebugOverlayState, BufferDamageTracker, PipelineInfo, TextureResolver,
+    RendererError, ShaderPrecacheFlags, VERTEX_DATA_TEXTURE_COUNT,
+    upload::UploadTexturePool,
+    shade::{Shaders, SharedShaders},
+};
+
+use std::{
+    mem,
+    thread,
+    cell::RefCell,
+    collections::VecDeque,
+    rc::Rc,
+    sync::{Arc, atomic::{AtomicBool, Ordering}},
+    num::NonZeroUsize,
+    path::PathBuf,
+};
+
+use tracy_rs::register_thread_with_profiler;
+use rayon::{ThreadPool, ThreadPoolBuilder};
+use malloc_size_of::MallocSizeOfOps;
+
+/// Use this hint for all vertex data re-initialization. This allows
+/// the driver to better re-use RBOs internally.
+pub const ONE_TIME_USAGE_HINT: VertexUsageHint = VertexUsageHint::Stream;
+
+/// Is only false if no WR instances have ever been created.
+static HAS_BEEN_INITIALIZED: AtomicBool = AtomicBool::new(false);
+
+/// Returns true if a WR instance has ever been initialized in this process.
+pub fn wr_has_been_initialized() -> bool {
+    HAS_BEEN_INITIALIZED.load(Ordering::SeqCst)
+}
+
+/// Allows callers to hook in at certain points of the async scene build. These
+/// functions are all called from the scene builder thread.
+pub trait SceneBuilderHooks {
+    /// This is called exactly once, when the scene builder thread is started
+    /// and before it processes anything.
+    fn register(&self);
+    /// This is called before each scene build starts.
+    fn pre_scene_build(&self);
+    /// This is called before each scene swap occurs.
+    fn pre_scene_swap(&self);
+    /// This is called after each scene swap occurs. The PipelineInfo contains
+    /// the updated epochs and pipelines removed in the new scene compared to
+    /// the old scene.
+    fn post_scene_swap(&self, document_id: &Vec<DocumentId>, info: PipelineInfo);
+    /// This is called after a resource update operation on the scene builder
+    /// thread, in the case where resource updates were applied without a scene
+    /// build.
+    fn post_resource_update(&self, document_ids: &Vec<DocumentId>);
+    /// This is called after a scene build completes without any changes being
+    /// made. We guarantee that each pre_scene_build call will be matched with
+    /// exactly one of post_scene_swap, post_resource_update or
+    /// post_empty_scene_build.
+    fn post_empty_scene_build(&self);
+    /// This is a generic callback which provides an opportunity to run code
+    /// on the scene builder thread. This is called as part of the main message
+    /// loop of the scene builder thread, but outside of any specific message
+    /// handler.
+    fn poke(&self);
+    /// This is called exactly once, when the scene builder thread is about to
+    /// terminate.
+    fn deregister(&self);
+}
+
+/// Allows callers to hook into the main render_backend loop and provide
+/// additional frame ops for generate_frame transactions. These functions
+/// are all called from the render backend thread.
+pub trait AsyncPropertySampler {
+    /// This is called exactly once, when the render backend thread is started
+    /// and before it processes anything.
+    fn register(&self);
+    /// This is called for each transaction with the generate_frame flag set
+    /// (i.e. that will trigger a render). The list of frame messages returned
+    /// are processed as though they were part of the original transaction.
+    fn sample(&self, document_id: DocumentId, generated_frame_id: Option<u64>) -> Vec<FrameMsg>;
+    /// This is called exactly once, when the render backend thread is about to
+    /// terminate.
+    fn deregister(&self);
+}
+
+pub struct WebRenderOptions {
+    pub resource_override_path: Option<PathBuf>,
+    /// Whether to use shaders that have been optimized at build time.
+    pub use_optimized_shaders: bool,
+    pub enable_aa: bool,
+    pub enable_dithering: bool,
+    pub max_recorded_profiles: usize,
+    pub precache_flags: ShaderPrecacheFlags,
+    /// Enable sub-pixel anti-aliasing if a fast implementation is available.
+    pub enable_subpixel_aa: bool,
+    pub clear_color: ColorF,
+    pub enable_clear_scissor: Option<bool>,
+    pub max_internal_texture_size: Option<i32>,
+    pub image_tiling_threshold: i32,
+    pub upload_method: UploadMethod,
+    /// The default size in bytes for PBOs used to upload texture data.
+    pub upload_pbo_default_size: usize,
+    pub batched_upload_threshold: i32,
+    pub workers: Option<Arc<ThreadPool>>,
+    pub enable_multithreading: bool,
+    pub blob_image_handler: Option<Box<dyn BlobImageHandler>>,
+    pub crash_annotator: Option<Box<dyn CrashAnnotator>>,
+    pub size_of_op: Option<VoidPtrToSizeFn>,
+    pub enclosing_size_of_op: Option<VoidPtrToSizeFn>,
+    pub cached_programs: Option<Rc<ProgramCache>>,
+    pub debug_flags: DebugFlags,
+    pub renderer_id: Option<u64>,
+    pub scene_builder_hooks: Option<Box<dyn SceneBuilderHooks + Send>>,
+    pub sampler: Option<Box<dyn AsyncPropertySampler + Send>>,
+    pub support_low_priority_transactions: bool,
+    pub namespace_alloc_by_client: bool,
+    /// If namespaces are allocated by the client, then the namespace for fonts
+    /// must also be allocated by the client to avoid namespace collisions with
+    /// the backend.
+    pub shared_font_namespace: Option<IdNamespace>,
+    pub testing: bool,
+    /// Set to true if this GPU supports hardware fast clears as a performance
+    /// optimization. Likely requires benchmarking on various GPUs to see if
+    /// it is a performance win. The default is false, which tends to be best
+    /// performance on lower end / integrated GPUs.
+    pub gpu_supports_fast_clears: bool,
+    pub allow_dual_source_blending: bool,
+    pub allow_advanced_blend_equation: bool,
+    /// If true, allow textures to be initialized with glTexStorage.
+    /// This affects VRAM consumption and data upload paths.
+    pub allow_texture_storage_support: bool,
+    /// If true, we allow the data uploaded in a different format from the
+    /// one expected by the driver, pretending the format is matching, and
+    /// swizzling the components on all the shader sampling.
+    pub allow_texture_swizzling: bool,
+    /// Use `ps_clear` shader with batched quad rendering to clear the rects
+    /// in texture cache and picture cache tasks.
+    /// This helps to work around some Intel drivers
+    /// that incorrectly synchronize clears to following draws.
+    pub clear_caches_with_quads: bool,
+    /// Output the source of the shader with the given name.
+    pub dump_shader_source: Option<String>,
+    pub surface_origin_is_top_left: bool,
+    /// The configuration options defining how WR composites the final scene.
+    pub compositor_config: CompositorConfig,
+    pub enable_gpu_markers: bool,
+    /// If true, panic whenever a GL error occurs. This has a significant
+    /// performance impact, so only use when debugging specific problems!
+    pub panic_on_gl_error: bool,
+    pub picture_tile_size: Option<DeviceIntSize>,
+    pub texture_cache_config: TextureCacheConfig,
+    /// If true, we'll use instanced vertex attributes. Each instace is a quad.
+    /// If false, we'll duplicate the instance attributes per vertex and issue
+    /// regular indexed draws instead.
+    pub enable_instancing: bool,
+    /// If true, we'll reject contexts backed by a software rasterizer, except
+    /// Software WebRender.
+    pub reject_software_rasterizer: bool,
+    /// If enabled, pinch-zoom will apply the zoom factor during compositing
+    /// of picture cache tiles. This is higher performance (tiles are not
+    /// re-rasterized during zoom) but lower quality result. For most display
+    /// items, if the zoom factor is relatively small, bilinear filtering should
+    /// make the result look quite close to the high-quality zoom, except for glyphs.
+    pub low_quality_pinch_zoom: bool,
+}
+
+impl WebRenderOptions {
+    /// Number of batches to look back in history for adding the current
+    /// transparent instance into.
+    const BATCH_LOOKBACK_COUNT: usize = 10;
+
+    /// Since we are re-initializing the instance buffers on every draw call,
+    /// the driver has to internally manage PBOs in flight.
+    /// It's typically done by bucketing up to a specific limit, and then
+    /// just individually managing the largest buffers.
+    /// Having a limit here allows the drivers to more easily manage
+    /// the PBOs for us.
+    const MAX_INSTANCE_BUFFER_SIZE: usize = 0x20000; // actual threshold in macOS GL drivers
+}
+
+impl Default for WebRenderOptions {
+    fn default() -> Self {
+        WebRenderOptions {
+            resource_override_path: None,
+            use_optimized_shaders: false,
+            enable_aa: true,
+            enable_dithering: false,
+            debug_flags: DebugFlags::empty(),
+            max_recorded_profiles: 0,
+            precache_flags: ShaderPrecacheFlags::empty(),
+            enable_subpixel_aa: false,
+            clear_color: ColorF::new(1.0, 1.0, 1.0, 1.0),
+            enable_clear_scissor: None,
+            max_internal_texture_size: None,
+            image_tiling_threshold: 4096,
+            // This is best as `Immediate` on Angle, or `Pixelbuffer(Dynamic)` on GL,
+            // but we are unable to make this decision here, so picking the reasonable medium.
+            upload_method: UploadMethod::PixelBuffer(ONE_TIME_USAGE_HINT),
+            upload_pbo_default_size: 512 * 512 * 4,
+            batched_upload_threshold: 512 * 512,
+            workers: None,
+            enable_multithreading: true,
+            blob_image_handler: None,
+            crash_annotator: None,
+            size_of_op: None,
+            enclosing_size_of_op: None,
+            renderer_id: None,
+            cached_programs: None,
+            scene_builder_hooks: None,
+            sampler: None,
+            support_low_priority_transactions: false,
+            namespace_alloc_by_client: false,
+            shared_font_namespace: None,
+            testing: false,
+            gpu_supports_fast_clears: false,
+            allow_dual_source_blending: true,
+            allow_advanced_blend_equation: false,
+            allow_texture_storage_support: true,
+            allow_texture_swizzling: true,
+            clear_caches_with_quads: true,
+            dump_shader_source: None,
+            surface_origin_is_top_left: false,
+            compositor_config: CompositorConfig::default(),
+            enable_gpu_markers: true,
+            panic_on_gl_error: false,
+            picture_tile_size: None,
+            texture_cache_config: TextureCacheConfig::DEFAULT,
+            // Disabling instancing means more vertex data to upload and potentially
+            // process by the vertex shaders.
+            enable_instancing: true,
+            reject_software_rasterizer: false,
+            low_quality_pinch_zoom: false,
+        }
+    }
+}
+
+/// Initializes WebRender and creates a `Renderer` and `RenderApiSender`.
+///
+/// # Examples
+/// Initializes a `Renderer` with some reasonable values. For more information see
+/// [`WebRenderOptions`][WebRenderOptions].
+///
+/// ```rust,ignore
+/// # use webrender::renderer::Renderer;
+/// # use std::path::PathBuf;
+/// let opts = webrender::WebRenderOptions {
+///    device_pixel_ratio: 1.0,
+///    resource_override_path: None,
+///    enable_aa: false,
+/// };
+/// let (renderer, sender) = Renderer::new(opts);
+/// ```
+/// [WebRenderOptions]: struct.WebRenderOptions.html
+pub fn create_webrender_instance(
+    gl: Rc<dyn gl::Gl>,
+    notifier: Box<dyn RenderNotifier>,
+    mut options: WebRenderOptions,
+    shaders: Option<&SharedShaders>,
+) -> Result<(Renderer, RenderApiSender), RendererError> {
+    if !wr_has_been_initialized() {
+        // If the profiler feature is enabled, try to load the profiler shared library
+        // if the path was provided.
+        #[cfg(feature = "profiler")]
+        unsafe {
+            if let Ok(ref tracy_path) = std::env::var("WR_TRACY_PATH") {
+                let ok = tracy_rs::load(tracy_path);
+                info!("Load tracy from {} -> {}", tracy_path, ok);
+            }
+        }
+
+        register_thread_with_profiler("Compositor".to_owned());
+    }
+
+    HAS_BEEN_INITIALIZED.store(true, Ordering::SeqCst);
+
+    let (api_tx, api_rx) = unbounded_channel();
+    let (result_tx, result_rx) = unbounded_channel();
+    let gl_type = gl.get_type();
+
+    let mut device = Device::new(
+        gl,
+        options.crash_annotator.clone(),
+        options.resource_override_path.clone(),
+        options.use_optimized_shaders,
+        options.upload_method.clone(),
+        options.batched_upload_threshold,
+        options.cached_programs.take(),
+        options.allow_texture_storage_support,
+        options.allow_texture_swizzling,
+        options.dump_shader_source.take(),
+        options.surface_origin_is_top_left,
+        options.panic_on_gl_error,
+    );
+
+    let color_cache_formats = device.preferred_color_formats();
+    let swizzle_settings = device.swizzle_settings();
+    let use_dual_source_blending =
+        device.get_capabilities().supports_dual_source_blending &&
+        options.allow_dual_source_blending;
+    let ext_blend_equation_advanced =
+        options.allow_advanced_blend_equation &&
+        device.get_capabilities().supports_advanced_blend_equation;
+    let ext_blend_equation_advanced_coherent =
+        device.supports_extension("GL_KHR_blend_equation_advanced_coherent");
+
+    let enable_clear_scissor = options
+        .enable_clear_scissor
+        .unwrap_or(device.get_capabilities().prefers_clear_scissor);
+
+    // 2048 is the minimum that the texture cache can work with.
+    const MIN_TEXTURE_SIZE: i32 = 2048;
+    let mut max_internal_texture_size = device.max_texture_size();
+    if max_internal_texture_size < MIN_TEXTURE_SIZE {
+        // Broken GL contexts can return a max texture size of zero (See #1260).
+        // Better to gracefully fail now than panic as soon as a texture is allocated.
+        error!(
+            "Device reporting insufficient max texture size ({})",
+            max_internal_texture_size
+        );
+        return Err(RendererError::MaxTextureSize);
+    }
+    if let Some(internal_limit) = options.max_internal_texture_size {
+        assert!(internal_limit >= MIN_TEXTURE_SIZE);
+        max_internal_texture_size = max_internal_texture_size.min(internal_limit);
+    }
+
+    if options.reject_software_rasterizer {
+        let renderer_name_lc = device.get_capabilities().renderer_name.to_lowercase();
+        if renderer_name_lc.contains("llvmpipe") || renderer_name_lc.contains("softpipe") || renderer_name_lc.contains("software rasterizer") {
+        return Err(RendererError::SoftwareRasterizer);
+        }
+    }
+
+    let image_tiling_threshold = options.image_tiling_threshold
+        .min(max_internal_texture_size);
+
+    device.begin_frame();
+
+    let shaders = match shaders {
+        Some(shaders) => Rc::clone(shaders),
+        None => Rc::new(RefCell::new(Shaders::new(&mut device, gl_type, &options)?)),
+    };
+
+    let dither_matrix_texture = if options.enable_dithering {
+        let dither_matrix: [u8; 64] = [
+            0,
+            48,
+            12,
+            60,
+            3,
+            51,
+            15,
+            63,
+            32,
+            16,
+            44,
+            28,
+            35,
+            19,
+            47,
+            31,
+            8,
+            56,
+            4,
+            52,
+            11,
+            59,
+            7,
+            55,
+            40,
+            24,
+            36,
+            20,
+            43,
+            27,
+            39,
+            23,
+            2,
+            50,
+            14,
+            62,
+            1,
+            49,
+            13,
+            61,
+            34,
+            18,
+            46,
+            30,
+            33,
+            17,
+            45,
+            29,
+            10,
+            58,
+            6,
+            54,
+            9,
+            57,
+            5,
+            53,
+            42,
+            26,
+            38,
+            22,
+            41,
+            25,
+            37,
+            21,
+        ];
+
+        let texture = device.create_texture(
+            ImageBufferKind::Texture2D,
+            ImageFormat::R8,
+            8,
+            8,
+            TextureFilter::Nearest,
+            None,
+        );
+        device.upload_texture_immediate(&texture, &dither_matrix);
+
+        Some(texture)
+    } else {
+        None
+    };
+
+    let max_primitive_instance_count =
+        WebRenderOptions::MAX_INSTANCE_BUFFER_SIZE / mem::size_of::<PrimitiveInstanceData>();
+    let vaos = vertex::RendererVAOs::new(
+        &mut device,
+        if options.enable_instancing { None } else { NonZeroUsize::new(max_primitive_instance_count) },
+    );
+
+    let texture_upload_pbo_pool = UploadPBOPool::new(&mut device, options.upload_pbo_default_size);
+    let staging_texture_pool = UploadTexturePool::new();
+    let texture_resolver = TextureResolver::new(&mut device);
+
+    let mut vertex_data_textures = Vec::new();
+    for _ in 0 .. VERTEX_DATA_TEXTURE_COUNT {
+        vertex_data_textures.push(vertex::VertexDataTextures::new());
+    }
+
+    // On some (mostly older, integrated) GPUs, the normal GPU texture cache update path
+    // doesn't work well when running on ANGLE, causing CPU stalls inside D3D and/or the
+    // GPU driver. See https://bugzilla.mozilla.org/show_bug.cgi?id=1576637 for much
+    // more detail. To reduce the number of code paths we have active that require testing,
+    // we will enable the GPU cache scatter update path on all devices running with ANGLE.
+    // We want a better solution long-term, but for now this is a significant performance
+    // improvement on HD4600 era GPUs, and shouldn't hurt performance in a noticeable
+    // way on other systems running under ANGLE.
+    let is_software = device.get_capabilities().renderer_name.starts_with("Software");
+
+    // On other GL platforms, like macOS or Android, creating many PBOs is very inefficient.
+    // This is what happens in GPU cache updates in PBO path. Instead, we switch everything
+    // except software GL to use the GPU scattered updates.
+    let supports_scatter = device.get_capabilities().supports_color_buffer_float;
+    let gpu_cache_texture = gpu_cache::GpuCacheTexture::new(
+        &mut device,
+        supports_scatter && !is_software,
+    )?;
+
+    device.end_frame();
+
+    let backend_notifier = notifier.clone();
+
+    let clear_alpha_targets_with_quads = !device.get_capabilities().supports_alpha_target_clears;
+
+    let prefer_subpixel_aa = options.enable_subpixel_aa && use_dual_source_blending;
+    let default_font_render_mode = match (options.enable_aa, prefer_subpixel_aa) {
+        (true, true) => FontRenderMode::Subpixel,
+        (true, false) => FontRenderMode::Alpha,
+        (false, _) => FontRenderMode::Mono,
+    };
+
+    let compositor_kind = match options.compositor_config {
+        CompositorConfig::Draw { max_partial_present_rects, draw_previous_partial_present_regions, .. } => {
+            CompositorKind::Draw { max_partial_present_rects, draw_previous_partial_present_regions }
+        }
+        CompositorConfig::Native { ref compositor } => {
+            let capabilities = compositor.get_capabilities();
+
+            CompositorKind::Native {
+                capabilities,
+            }
+        }
+    };
+
+    let config = FrameBuilderConfig {
+        default_font_render_mode,
+        dual_source_blending_is_supported: use_dual_source_blending,
+        testing: options.testing,
+        gpu_supports_fast_clears: options.gpu_supports_fast_clears,
+        gpu_supports_advanced_blend: ext_blend_equation_advanced,
+        advanced_blend_is_coherent: ext_blend_equation_advanced_coherent,
+        gpu_supports_render_target_partial_update: device.get_capabilities().supports_render_target_partial_update,
+        external_images_require_copy: !device.get_capabilities().supports_image_external_essl3,
+        batch_lookback_count: WebRenderOptions::BATCH_LOOKBACK_COUNT,
+        background_color: Some(options.clear_color),
+        compositor_kind,
+        tile_size_override: None,
+        max_surface_override: None,
+        max_depth_ids: device.max_depth_ids(),
+        max_target_size: max_internal_texture_size,
+        force_invalidation: false,
+        is_software,
+        low_quality_pinch_zoom: options.low_quality_pinch_zoom,
+    };
+    info!("WR {:?}", config);
+
+    let debug_flags = options.debug_flags;
+    let size_of_op = options.size_of_op;
+    let enclosing_size_of_op = options.enclosing_size_of_op;
+    let make_size_of_ops =
+        move || size_of_op.map(|o| MallocSizeOfOps::new(o, enclosing_size_of_op));
+    let workers = options
+        .workers
+        .take()
+        .unwrap_or_else(|| {
+            let worker = ThreadPoolBuilder::new()
+                .thread_name(|idx|{ format!("WRWorker#{}", idx) })
+                .start_handler(move |idx| {
+                    register_thread_with_profiler(format!("WRWorker#{}", idx));
+                    profiler::register_thread(&format!("WRWorker#{}", idx));
+                })
+                .exit_handler(move |_idx| {
+                    profiler::unregister_thread();
+                })
+                .build();
+            Arc::new(worker.unwrap())
+        });
+    let sampler = options.sampler;
+    let namespace_alloc_by_client = options.namespace_alloc_by_client;
+
+    // Ensure shared font keys exist within their own unique namespace so
+    // that they don't accidentally collide across Renderer instances.
+    let font_namespace = if namespace_alloc_by_client {
+        options.shared_font_namespace.expect("Shared font namespace must be allocated by client")
+    } else {
+        RenderBackend::next_namespace_id()
+    };
+    let fonts = SharedFontResources::new(font_namespace);
+
+    let blob_image_handler = options.blob_image_handler.take();
+    let scene_builder_hooks = options.scene_builder_hooks;
+    let rb_thread_name = format!("WRRenderBackend#{}", options.renderer_id.unwrap_or(0));
+    let scene_thread_name = format!("WRSceneBuilder#{}", options.renderer_id.unwrap_or(0));
+    let lp_scene_thread_name = format!("WRSceneBuilderLP#{}", options.renderer_id.unwrap_or(0));
+    let glyph_rasterizer = GlyphRasterizer::new(workers, device.get_capabilities().supports_r8_texture_upload);
+
+    let (scene_builder_channels, scene_tx) =
+        SceneBuilderThreadChannels::new(api_tx.clone());
+
+    let sb_fonts = fonts.clone();
+
+    thread::Builder::new().name(scene_thread_name.clone()).spawn(move || {
+        register_thread_with_profiler(scene_thread_name.clone());
+        profiler::register_thread(&scene_thread_name);
+
+        let mut scene_builder = SceneBuilderThread::new(
+            config,
+            sb_fonts,
+            make_size_of_ops(),
+            scene_builder_hooks,
+            scene_builder_channels,
+        );
+        scene_builder.run();
+
+        profiler::unregister_thread();
+    })?;
+
+    let low_priority_scene_tx = if options.support_low_priority_transactions {
+        let (low_priority_scene_tx, low_priority_scene_rx) = unbounded_channel();
+        let lp_builder = LowPrioritySceneBuilderThread {
+            rx: low_priority_scene_rx,
+            tx: scene_tx.clone(),
+        };
+
+        thread::Builder::new().name(lp_scene_thread_name.clone()).spawn(move || {
+            register_thread_with_profiler(lp_scene_thread_name.clone());
+            profiler::register_thread(&lp_scene_thread_name);
+
+            let mut scene_builder = lp_builder;
+            scene_builder.run();
+
+            profiler::unregister_thread();
+        })?;
+
+        low_priority_scene_tx
+    } else {
+        scene_tx.clone()
+    };
+
+    let rb_blob_handler = blob_image_handler
+        .as_ref()
+        .map(|handler| handler.create_similar());
+
+    let texture_cache_config = options.texture_cache_config.clone();
+    let mut picture_tile_size = options.picture_tile_size.unwrap_or(picture::TILE_SIZE_DEFAULT);
+    // Clamp the picture tile size to reasonable values.
+    picture_tile_size.width = picture_tile_size.width.max(128).min(4096);
+    picture_tile_size.height = picture_tile_size.height.max(128).min(4096);
+
+    let picture_texture_filter = if options.low_quality_pinch_zoom {
+        TextureFilter::Linear
+    } else {
+        TextureFilter::Nearest
+    };
+
+    let rb_scene_tx = scene_tx.clone();
+    let rb_fonts = fonts.clone();
+    let enable_multithreading = options.enable_multithreading;
+    thread::Builder::new().name(rb_thread_name.clone()).spawn(move || {
+        register_thread_with_profiler(rb_thread_name.clone());
+        profiler::register_thread(&rb_thread_name);
+
+        let texture_cache = TextureCache::new(
+            max_internal_texture_size,
+            image_tiling_threshold,
+            color_cache_formats,
+            swizzle_settings,
+            &texture_cache_config,
+        );
+
+        let picture_textures = PictureTextures::new(
+            picture_tile_size,
+            picture_texture_filter,
+        );
+
+        let glyph_cache = GlyphCache::new();
+
+        let mut resource_cache = ResourceCache::new(
+            texture_cache,
+            picture_textures,
+            glyph_rasterizer,
+            glyph_cache,
+            rb_fonts,
+            rb_blob_handler,
+        );
+
+        resource_cache.enable_multithreading(enable_multithreading);
+
+        let mut backend = RenderBackend::new(
+            api_rx,
+            result_tx,
+            rb_scene_tx,
+            resource_cache,
+            backend_notifier,
+            config,
+            sampler,
+            make_size_of_ops(),
+            debug_flags,
+            namespace_alloc_by_client,
+        );
+        backend.run();
+        profiler::unregister_thread();
+    })?;
+
+    let debug_method = if !options.enable_gpu_markers {
+        // The GPU markers are disabled.
+        GpuDebugMethod::None
+    } else if device.supports_extension("GL_KHR_debug") {
+        GpuDebugMethod::KHR
+    } else if device.supports_extension("GL_EXT_debug_marker") {
+        GpuDebugMethod::MarkerEXT
+    } else {
+        warn!("asking to enable_gpu_markers but no supporting extension was found");
+        GpuDebugMethod::None
+    };
+
+    info!("using {:?}", debug_method);
+
+    let gpu_profiler = GpuProfiler::new(Rc::clone(device.rc_gl()), debug_method);
+    #[cfg(feature = "capture")]
+    let read_fbo = device.create_fbo();
+
+    let mut renderer = Renderer {
+        result_rx,
+        api_tx: api_tx.clone(),
+        device,
+        active_documents: FastHashMap::default(),
+        pending_texture_updates: Vec::new(),
+        pending_texture_cache_updates: false,
+        pending_native_surface_updates: Vec::new(),
+        pending_gpu_cache_updates: Vec::new(),
+        pending_gpu_cache_clear: false,
+        pending_shader_updates: Vec::new(),
+        shaders,
+        debug: debug::LazyInitializedDebugRenderer::new(),
+        debug_flags: DebugFlags::empty(),
+        profile: TransactionProfile::new(),
+        frame_counter: 0,
+        resource_upload_time: 0.0,
+        gpu_cache_upload_time: 0.0,
+        profiler: Profiler::new(),
+        max_recorded_profiles: options.max_recorded_profiles,
+        clear_color: options.clear_color,
+        enable_clear_scissor,
+        enable_advanced_blend_barriers: !ext_blend_equation_advanced_coherent,
+        clear_caches_with_quads: options.clear_caches_with_quads,
+        clear_alpha_targets_with_quads,
+        last_time: 0,
+        gpu_profiler,
+        vaos,
+        vertex_data_textures,
+        current_vertex_data_textures: 0,
+        pipeline_info: PipelineInfo::default(),
+        dither_matrix_texture,
+        external_image_handler: None,
+        size_of_ops: make_size_of_ops(),
+        cpu_profiles: VecDeque::new(),
+        gpu_profiles: VecDeque::new(),
+        gpu_cache_texture,
+        gpu_cache_debug_chunks: Vec::new(),
+        gpu_cache_frame_id: FrameId::INVALID,
+        gpu_cache_overflow: false,
+        texture_upload_pbo_pool,
+        staging_texture_pool,
+        texture_resolver,
+        renderer_errors: Vec::new(),
+        async_frame_recorder: None,
+        async_screenshots: None,
+        #[cfg(feature = "capture")]
+        read_fbo,
+        #[cfg(feature = "replay")]
+        owned_external_images: FastHashMap::default(),
+        notifications: Vec::new(),
+        device_size: None,
+        zoom_debug_texture: None,
+        cursor_position: DeviceIntPoint::zero(),
+        shared_texture_cache_cleared: false,
+        documents_seen: FastHashSet::default(),
+        force_redraw: true,
+        compositor_config: options.compositor_config,
+        current_compositor_kind: compositor_kind,
+        allocated_native_surfaces: FastHashSet::default(),
+        debug_overlay_state: DebugOverlayState::new(),
+        buffer_damage_tracker: BufferDamageTracker::default(),
+        max_primitive_instance_count,
+        enable_instancing: options.enable_instancing,
+        consecutive_oom_frames: 0,
+    };
+
+    // We initially set the flags to default and then now call set_debug_flags
+    // to ensure any potential transition when enabling a flag is run.
+    renderer.set_debug_flags(debug_flags);
+
+    let sender = RenderApiSender::new(
+        api_tx,
+        scene_tx,
+        low_priority_scene_tx,
+        blob_image_handler,
+        fonts,
+    );
+    Ok((renderer, sender))
+}
diff --git a/gfx/wr/webrender/src/renderer/mod.rs b/gfx/wr/webrender/src/renderer/mod.rs
new file mode 100644
index 0000000000..012a6a58cf
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/mod.rs
@@ -0,0 +1,5752 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! The high-level module responsible for interfacing with the GPU.
+//!
+//! Much of WebRender's design is driven by separating work into different
+//! threads. To avoid the complexities of multi-threaded GPU access, we restrict
+//! all communication with the GPU to one thread, the render thread. But since
+//! issuing GPU commands is often a bottleneck, we move everything else (i.e.
+//! the computation of what commands to issue) to another thread, the
+//! RenderBackend thread. The RenderBackend, in turn, may delegate work to other
+//! thread (like the SceneBuilder threads or Rayon workers), but the
+//! Render-vs-RenderBackend distinction is the most important.
+//!
+//! The consumer is responsible for initializing the render thread before
+//! calling into WebRender, which means that this module also serves as the
+//! initial entry point into WebRender, and is responsible for spawning the
+//! various other threads discussed above. That said, WebRender initialization
+//! returns both the `Renderer` instance as well as a channel for communicating
+//! directly with the `RenderBackend`. Aside from a few high-level operations
+//! like 'render now', most of interesting commands from the consumer go over
+//! that channel and operate on the `RenderBackend`.
+//!
+//! ## Space conversion guidelines
+//! At this stage, we shuld be operating with `DevicePixel` and `FramebufferPixel` only.
+//! "Framebuffer" space represents the final destination of our rendeing,
+//! and it happens to be Y-flipped on OpenGL. The conversion is done as follows:
+//!   - for rasterized primitives, the orthographics projection transforms
+//! the content rectangle to -1 to 1
+//!   - the viewport transformation is setup to map the whole range to
+//! the framebuffer rectangle provided by the document view, stored in `DrawTarget`
+//!   - all the direct framebuffer operations, like blitting, reading pixels, and setting
+//! up the scissor, are accepting already transformed coordinates, which we can get by
+//! calling `DrawTarget::to_framebuffer_rect`
+
+use api::{ColorF, ColorU, MixBlendMode};
+use api::{DocumentId, Epoch, ExternalImageHandler, RenderReasons};
+#[cfg(feature = "replay")]
+use api::ExternalImageId;
+use api::{ExternalImageSource, ExternalImageType, ImageFormat, PremultipliedColorF};
+use api::{PipelineId, ImageRendering, Checkpoint, NotificationRequest, ImageBufferKind};
+#[cfg(feature = "replay")]
+use api::ExternalImage;
+use api::units::*;
+use api::channel::{Sender, Receiver};
+pub use api::DebugFlags;
+use core::time::Duration;
+
+use crate::render_api::{DebugCommand, ApiMsg, MemoryReport};
+use crate::batch::{AlphaBatchContainer, BatchKind, BatchFeatures, BatchTextures, BrushBatchKind, ClipBatchList};
+#[cfg(any(feature = "capture", feature = "replay"))]
+use crate::capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage};
+use crate::composite::{CompositeState, CompositeTileSurface, ResolvedExternalSurface, CompositorSurfaceTransform};
+use crate::composite::{CompositorKind, Compositor, NativeTileId, CompositeFeatures, CompositeSurfaceFormat, ResolvedExternalSurfaceColorData};
+use crate::composite::{CompositorConfig, NativeSurfaceOperationDetails, NativeSurfaceId, NativeSurfaceOperation};
+use crate::composite::{TileKind};
+use crate::debug_colors;
+use crate::device::{DepthFunction, Device, DrawTarget, ExternalTexture, GpuFrameId, UploadPBOPool};
+use crate::device::{ReadTarget, ShaderError, Texture, TextureFilter, TextureFlags, TextureSlot};
+use crate::device::query::{GpuSampler, GpuTimer};
+#[cfg(feature = "capture")]
+use crate::device::FBOId;
+use crate::debug_item::DebugItem;
+use crate::frame_builder::Frame;
+use glyph_rasterizer::GlyphFormat;
+use crate::gpu_cache::{GpuCacheUpdate, GpuCacheUpdateList};
+use crate::gpu_cache::{GpuCacheDebugChunk, GpuCacheDebugCmd};
+use crate::gpu_types::{ScalingInstance, SvgFilterInstance, CopyInstance};
+use crate::gpu_types::{BlurInstance, ClearInstance, CompositeInstance, ZBufferId, CompositorTransform};
+use crate::internal_types::{TextureSource, TextureCacheCategory, FrameId};
+#[cfg(any(feature = "capture", feature = "replay"))]
+use crate::internal_types::DebugOutput;
+use crate::internal_types::{CacheTextureId, FastHashMap, FastHashSet, RenderedDocument, ResultMsg};
+use crate::internal_types::{TextureCacheAllocInfo, TextureCacheAllocationKind, TextureUpdateList};
+use crate::internal_types::{RenderTargetInfo, Swizzle, DeferredResolveIndex};
+use crate::picture::ResolvedSurfaceTexture;
+use crate::prim_store::DeferredResolve;
+use crate::profiler::{self, GpuProfileTag, TransactionProfile};
+use crate::profiler::{Profiler, add_event_marker, add_text_marker, thread_is_being_profiled};
+use crate::device::query::GpuProfiler;
+use crate::render_target::ResolveOp;
+use crate::render_task_graph::RenderTaskGraph;
+use crate::render_task::{RenderTask, RenderTaskKind, ReadbackTask};
+use crate::screen_capture::AsyncScreenshotGrabber;
+use crate::render_target::{AlphaRenderTarget, ColorRenderTarget, PictureCacheTarget, PictureCacheTargetKind};
+use crate::render_target::{RenderTarget, TextureCacheRenderTarget};
+use crate::render_target::{RenderTargetKind, BlitJob};
+use crate::telemetry::Telemetry;
+use crate::tile_cache::PictureCacheDebugInfo;
+use crate::util::drain_filter;
+use crate::rectangle_occlusion as occlusion;
+use upload::{upload_to_texture_cache, UploadTexturePool};
+use init::*;
+
+use euclid::{rect, Transform3D, Scale, default};
+use gleam::gl;
+use malloc_size_of::MallocSizeOfOps;
+
+#[cfg(feature = "replay")]
+use std::sync::Arc;
+
+use std::{
+    cell::RefCell,
+    collections::VecDeque,
+    f32,
+    ffi::c_void,
+    mem,
+    num::NonZeroUsize,
+    path::PathBuf,
+    rc::Rc,
+};
+#[cfg(any(feature = "capture", feature = "replay"))]
+use std::collections::hash_map::Entry;
+use time::precise_time_ns;
+
+mod debug;
+mod gpu_buffer;
+mod gpu_cache;
+mod shade;
+mod vertex;
+mod upload;
+pub(crate) mod init;
+
+pub use debug::DebugRenderer;
+pub use shade::{Shaders, SharedShaders};
+pub use vertex::{desc, VertexArrayKind, MAX_VERTEX_TEXTURE_WIDTH};
+pub use gpu_buffer::{GpuBuffer, GpuBufferBuilder, GpuBufferAddress};
+
+/// The size of the array of each type of vertex data texture that
+/// is round-robin-ed each frame during bind_frame_data. Doing this
+/// helps avoid driver stalls while updating the texture in some
+/// drivers. The size of these textures are typically very small
+/// (e.g. < 16 kB) so it's not a huge waste of memory. Despite that,
+/// this is a short-term solution - we want to find a better way
+/// to provide this frame data, which will likely involve some
+/// combination of UBO/SSBO usage. Although this only affects some
+/// platforms, it's enabled on all platforms to reduce testing
+/// differences between platforms.
+pub const VERTEX_DATA_TEXTURE_COUNT: usize = 3;
+
+/// Number of GPU blocks per UV rectangle provided for an image.
+pub const BLOCKS_PER_UV_RECT: usize = 2;
+
+const GPU_TAG_BRUSH_OPACITY: GpuProfileTag = GpuProfileTag {
+    label: "B_Opacity",
+    color: debug_colors::DARKMAGENTA,
+};
+const GPU_TAG_BRUSH_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "B_LinearGradient",
+    color: debug_colors::POWDERBLUE,
+};
+const GPU_TAG_BRUSH_YUV_IMAGE: GpuProfileTag = GpuProfileTag {
+    label: "B_YuvImage",
+    color: debug_colors::DARKGREEN,
+};
+const GPU_TAG_BRUSH_MIXBLEND: GpuProfileTag = GpuProfileTag {
+    label: "B_MixBlend",
+    color: debug_colors::MAGENTA,
+};
+const GPU_TAG_BRUSH_BLEND: GpuProfileTag = GpuProfileTag {
+    label: "B_Blend",
+    color: debug_colors::ORANGE,
+};
+const GPU_TAG_BRUSH_IMAGE: GpuProfileTag = GpuProfileTag {
+    label: "B_Image",
+    color: debug_colors::SPRINGGREEN,
+};
+const GPU_TAG_BRUSH_SOLID: GpuProfileTag = GpuProfileTag {
+    label: "B_Solid",
+    color: debug_colors::RED,
+};
+const GPU_TAG_CACHE_CLIP: GpuProfileTag = GpuProfileTag {
+    label: "C_Clip",
+    color: debug_colors::PURPLE,
+};
+const GPU_TAG_CACHE_BORDER: GpuProfileTag = GpuProfileTag {
+    label: "C_Border",
+    color: debug_colors::CORNSILK,
+};
+const GPU_TAG_CACHE_LINE_DECORATION: GpuProfileTag = GpuProfileTag {
+    label: "C_LineDecoration",
+    color: debug_colors::YELLOWGREEN,
+};
+const GPU_TAG_CACHE_FAST_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "C_FastLinearGradient",
+    color: debug_colors::BROWN,
+};
+const GPU_TAG_CACHE_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "C_LinearGradient",
+    color: debug_colors::BROWN,
+};
+const GPU_TAG_CACHE_RADIAL_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "C_RadialGradient",
+    color: debug_colors::BROWN,
+};
+const GPU_TAG_CACHE_CONIC_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "C_ConicGradient",
+    color: debug_colors::BROWN,
+};
+const GPU_TAG_SETUP_TARGET: GpuProfileTag = GpuProfileTag {
+    label: "target init",
+    color: debug_colors::SLATEGREY,
+};
+const GPU_TAG_SETUP_DATA: GpuProfileTag = GpuProfileTag {
+    label: "data init",
+    color: debug_colors::LIGHTGREY,
+};
+const GPU_TAG_PRIM_SPLIT_COMPOSITE: GpuProfileTag = GpuProfileTag {
+    label: "SplitComposite",
+    color: debug_colors::DARKBLUE,
+};
+const GPU_TAG_PRIM_TEXT_RUN: GpuProfileTag = GpuProfileTag {
+    label: "TextRun",
+    color: debug_colors::BLUE,
+};
+const GPU_TAG_BLUR: GpuProfileTag = GpuProfileTag {
+    label: "Blur",
+    color: debug_colors::VIOLET,
+};
+const GPU_TAG_BLIT: GpuProfileTag = GpuProfileTag {
+    label: "Blit",
+    color: debug_colors::LIME,
+};
+const GPU_TAG_SCALE: GpuProfileTag = GpuProfileTag {
+    label: "Scale",
+    color: debug_colors::GHOSTWHITE,
+};
+const GPU_SAMPLER_TAG_ALPHA: GpuProfileTag = GpuProfileTag {
+    label: "Alpha targets",
+    color: debug_colors::BLACK,
+};
+const GPU_SAMPLER_TAG_OPAQUE: GpuProfileTag = GpuProfileTag {
+    label: "Opaque pass",
+    color: debug_colors::BLACK,
+};
+const GPU_SAMPLER_TAG_TRANSPARENT: GpuProfileTag = GpuProfileTag {
+    label: "Transparent pass",
+    color: debug_colors::BLACK,
+};
+const GPU_TAG_SVG_FILTER: GpuProfileTag = GpuProfileTag {
+    label: "SvgFilter",
+    color: debug_colors::LEMONCHIFFON,
+};
+const GPU_TAG_COMPOSITE: GpuProfileTag = GpuProfileTag {
+    label: "Composite",
+    color: debug_colors::TOMATO,
+};
+const GPU_TAG_CLEAR: GpuProfileTag = GpuProfileTag {
+    label: "Clear",
+    color: debug_colors::CHOCOLATE,
+};
+
+/// The clear color used for the texture cache when the debug display is enabled.
+/// We use a shade of blue so that we can still identify completely blue items in
+/// the texture cache.
+pub const TEXTURE_CACHE_DBG_CLEAR_COLOR: [f32; 4] = [0.0, 0.0, 0.8, 1.0];
+
+impl BatchKind {
+    fn sampler_tag(&self) -> GpuProfileTag {
+        match *self {
+            BatchKind::SplitComposite => GPU_TAG_PRIM_SPLIT_COMPOSITE,
+            BatchKind::Brush(kind) => {
+                match kind {
+                    BrushBatchKind::Solid => GPU_TAG_BRUSH_SOLID,
+                    BrushBatchKind::Image(..) => GPU_TAG_BRUSH_IMAGE,
+                    BrushBatchKind::Blend => GPU_TAG_BRUSH_BLEND,
+                    BrushBatchKind::MixBlend { .. } => GPU_TAG_BRUSH_MIXBLEND,
+                    BrushBatchKind::YuvImage(..) => GPU_TAG_BRUSH_YUV_IMAGE,
+                    BrushBatchKind::LinearGradient => GPU_TAG_BRUSH_LINEAR_GRADIENT,
+                    BrushBatchKind::Opacity => GPU_TAG_BRUSH_OPACITY,
+                }
+            }
+            BatchKind::TextRun(_) => GPU_TAG_PRIM_TEXT_RUN,
+        }
+    }
+}
+
+fn flag_changed(before: DebugFlags, after: DebugFlags, select: DebugFlags) -> Option<bool> {
+    if before & select != after & select {
+        Some(after.contains(select))
+    } else {
+        None
+    }
+}
+
+#[repr(C)]
+#[derive(Copy, Clone, Debug)]
+pub enum ShaderColorMode {
+    FromRenderPassMode = 0,
+    Alpha = 1,
+    SubpixelWithBgColorPass0 = 2,
+    SubpixelWithBgColorPass1 = 3,
+    SubpixelWithBgColorPass2 = 4,
+    SubpixelDualSource = 5,
+    BitmapShadow = 6,
+    ColorBitmap = 7,
+    Image = 8,
+    MultiplyDualSource = 9,
+}
+
+impl From<GlyphFormat> for ShaderColorMode {
+    fn from(format: GlyphFormat) -> ShaderColorMode {
+        match format {
+            GlyphFormat::Alpha |
+            GlyphFormat::TransformedAlpha |
+            GlyphFormat::Bitmap => ShaderColorMode::Alpha,
+            GlyphFormat::Subpixel | GlyphFormat::TransformedSubpixel => {
+                panic!("Subpixel glyph formats must be handled separately.");
+            }
+            GlyphFormat::ColorBitmap => ShaderColorMode::ColorBitmap,
+        }
+    }
+}
+
+/// Enumeration of the texture samplers used across the various WebRender shaders.
+///
+/// Each variant corresponds to a uniform declared in shader source. We only bind
+/// the variants we need for a given shader, so not every variant is bound for every
+/// batch.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub(crate) enum TextureSampler {
+    Color0,
+    Color1,
+    Color2,
+    GpuCache,
+    TransformPalette,
+    RenderTasks,
+    Dither,
+    PrimitiveHeadersF,
+    PrimitiveHeadersI,
+    ClipMask,
+    GpuBuffer,
+}
+
+impl TextureSampler {
+    pub(crate) fn color(n: usize) -> TextureSampler {
+        match n {
+            0 => TextureSampler::Color0,
+            1 => TextureSampler::Color1,
+            2 => TextureSampler::Color2,
+            _ => {
+                panic!("There are only 3 color samplers.");
+            }
+        }
+    }
+}
+
+impl Into<TextureSlot> for TextureSampler {
+    fn into(self) -> TextureSlot {
+        match self {
+            TextureSampler::Color0 => TextureSlot(0),
+            TextureSampler::Color1 => TextureSlot(1),
+            TextureSampler::Color2 => TextureSlot(2),
+            TextureSampler::GpuCache => TextureSlot(3),
+            TextureSampler::TransformPalette => TextureSlot(4),
+            TextureSampler::RenderTasks => TextureSlot(5),
+            TextureSampler::Dither => TextureSlot(6),
+            TextureSampler::PrimitiveHeadersF => TextureSlot(7),
+            TextureSampler::PrimitiveHeadersI => TextureSlot(8),
+            TextureSampler::ClipMask => TextureSlot(9),
+            TextureSampler::GpuBuffer => TextureSlot(10),
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum GraphicsApi {
+    OpenGL,
+}
+
+#[derive(Clone, Debug)]
+pub struct GraphicsApiInfo {
+    pub kind: GraphicsApi,
+    pub renderer: String,
+    pub version: String,
+}
+
+#[derive(Debug)]
+pub struct GpuProfile {
+    pub frame_id: GpuFrameId,
+    pub paint_time_ns: u64,
+}
+
+impl GpuProfile {
+    fn new(frame_id: GpuFrameId, timers: &[GpuTimer]) -> GpuProfile {
+        let mut paint_time_ns = 0;
+        for timer in timers {
+            paint_time_ns += timer.time_ns;
+        }
+        GpuProfile {
+            frame_id,
+            paint_time_ns,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct CpuProfile {
+    pub frame_id: GpuFrameId,
+    pub backend_time_ns: u64,
+    pub composite_time_ns: u64,
+    pub draw_calls: usize,
+}
+
+impl CpuProfile {
+    fn new(
+        frame_id: GpuFrameId,
+        backend_time_ns: u64,
+        composite_time_ns: u64,
+        draw_calls: usize,
+    ) -> CpuProfile {
+        CpuProfile {
+            frame_id,
+            backend_time_ns,
+            composite_time_ns,
+            draw_calls,
+        }
+    }
+}
+
+/// The selected partial present mode for a given frame.
+#[derive(Debug, Copy, Clone)]
+enum PartialPresentMode {
+    /// The device supports fewer dirty rects than the number of dirty rects
+    /// that WR produced. In this case, the WR dirty rects are union'ed into
+    /// a single dirty rect, that is provided to the caller.
+    Single {
+        dirty_rect: DeviceRect,
+    },
+}
+
+struct CacheTexture {
+    texture: Texture,
+    category: TextureCacheCategory,
+}
+
+/// Helper struct for resolving device Textures for use during rendering passes.
+///
+/// Manages the mapping between the at-a-distance texture handles used by the
+/// `RenderBackend` (which does not directly interface with the GPU) and actual
+/// device texture handles.
+struct TextureResolver {
+    /// A map to resolve texture cache IDs to native textures.
+    texture_cache_map: FastHashMap<CacheTextureId, CacheTexture>,
+
+    /// Map of external image IDs to native textures.
+    external_images: FastHashMap<DeferredResolveIndex, ExternalTexture>,
+
+    /// A special 1x1 dummy texture used for shaders that expect to work with
+    /// the output of the previous pass but are actually running in the first
+    /// pass.
+    dummy_cache_texture: Texture,
+}
+
+impl TextureResolver {
+    fn new(device: &mut Device) -> TextureResolver {
+        let dummy_cache_texture = device
+            .create_texture(
+                ImageBufferKind::Texture2D,
+                ImageFormat::RGBA8,
+                1,
+                1,
+                TextureFilter::Linear,
+                None,
+            );
+        device.upload_texture_immediate(
+            &dummy_cache_texture,
+            &[0xff, 0xff, 0xff, 0xff],
+        );
+
+        TextureResolver {
+            texture_cache_map: FastHashMap::default(),
+            external_images: FastHashMap::default(),
+            dummy_cache_texture,
+        }
+    }
+
+    fn deinit(self, device: &mut Device) {
+        device.delete_texture(self.dummy_cache_texture);
+
+        for (_id, item) in self.texture_cache_map {
+            device.delete_texture(item.texture);
+        }
+    }
+
+    fn begin_frame(&mut self) {
+    }
+
+    fn end_pass(
+        &mut self,
+        device: &mut Device,
+        textures_to_invalidate: &[CacheTextureId],
+    ) {
+        // For any texture that is no longer needed, immediately
+        // invalidate it so that tiled GPUs don't need to resolve it
+        // back to memory.
+        for texture_id in textures_to_invalidate {
+            let render_target = &self.texture_cache_map[texture_id].texture;
+            device.invalidate_render_target(render_target);
+        }
+    }
+
+    // Bind a source texture to the device.
+    fn bind(&self, texture_id: &TextureSource, sampler: TextureSampler, device: &mut Device) -> Swizzle {
+        match *texture_id {
+            TextureSource::Invalid => {
+                Swizzle::default()
+            }
+            TextureSource::Dummy => {
+                let swizzle = Swizzle::default();
+                device.bind_texture(sampler, &self.dummy_cache_texture, swizzle);
+                swizzle
+            }
+            TextureSource::External(ref index, _) => {
+                let texture = self.external_images
+                    .get(index)
+                    .expect("BUG: External image should be resolved by now");
+                device.bind_external_texture(sampler, texture);
+                Swizzle::default()
+            }
+            TextureSource::TextureCache(index, swizzle) => {
+                let texture = &self.texture_cache_map[&index].texture;
+                device.bind_texture(sampler, texture, swizzle);
+                swizzle
+            }
+        }
+    }
+
+    // Get the real (OpenGL) texture ID for a given source texture.
+    // For a texture cache texture, the IDs are stored in a vector
+    // map for fast access.
+    fn resolve(&self, texture_id: &TextureSource) -> Option<(&Texture, Swizzle)> {
+        match *texture_id {
+            TextureSource::Invalid => None,
+            TextureSource::Dummy => {
+                Some((&self.dummy_cache_texture, Swizzle::default()))
+            }
+            TextureSource::External(..) => {
+                panic!("BUG: External textures cannot be resolved, they can only be bound.");
+            }
+            TextureSource::TextureCache(index, swizzle) => {
+                Some((&self.texture_cache_map[&index].texture, swizzle))
+            }
+        }
+    }
+
+    // Retrieve the deferred / resolved UV rect if an external texture, otherwise
+    // return the default supplied UV rect.
+    fn get_uv_rect(
+        &self,
+        source: &TextureSource,
+        default_value: TexelRect,
+    ) -> TexelRect {
+        match source {
+            TextureSource::External(ref index, _) => {
+                let texture = self.external_images
+                    .get(index)
+                    .expect("BUG: External image should be resolved by now");
+                texture.get_uv_rect()
+            }
+            _ => {
+                default_value
+            }
+        }
+    }
+
+    /// Returns the size of the texture in pixels
+    fn get_texture_size(&self, texture: &TextureSource) -> DeviceIntSize {
+        match *texture {
+            TextureSource::Invalid => DeviceIntSize::zero(),
+            TextureSource::TextureCache(id, _) => {
+                self.texture_cache_map[&id].texture.get_dimensions()
+            },
+            TextureSource::External(index, _) => {
+                let uv_rect = self.external_images[&index].get_uv_rect();
+                (uv_rect.uv1 - uv_rect.uv0).abs().to_size().to_i32()
+            },
+            TextureSource::Dummy => DeviceIntSize::new(1, 1),
+        }
+    }
+
+    fn report_memory(&self) -> MemoryReport {
+        let mut report = MemoryReport::default();
+
+        // We're reporting GPU memory rather than heap-allocations, so we don't
+        // use size_of_op.
+        for item in self.texture_cache_map.values() {
+            let counter = match item.category {
+                TextureCacheCategory::Atlas => &mut report.atlas_textures,
+                TextureCacheCategory::Standalone => &mut report.standalone_textures,
+                TextureCacheCategory::PictureTile => &mut report.picture_tile_textures,
+                TextureCacheCategory::RenderTarget => &mut report.render_target_textures,
+            };
+            *counter += item.texture.size_in_bytes();
+        }
+
+        report
+    }
+
+    fn update_profile(&self, profile: &mut TransactionProfile) {
+        let mut external_image_bytes = 0;
+        for img in self.external_images.values() {
+            let uv_rect = img.get_uv_rect();
+            let size = (uv_rect.uv1 - uv_rect.uv0).abs().to_size().to_i32();
+
+            // Assume 4 bytes per pixels which is true most of the time but
+            // not always.
+            let bpp = 4;
+            external_image_bytes += size.area() as usize * bpp;
+        }
+
+        profile.set(profiler::EXTERNAL_IMAGE_BYTES, profiler::bytes_to_mb(external_image_bytes));
+    }
+
+    fn get_cache_texture_mut(&mut self, id: &CacheTextureId) -> &mut Texture {
+        &mut self.texture_cache_map
+            .get_mut(id)
+            .expect("bug: texture not allocated")
+            .texture
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum BlendMode {
+    None,
+    Alpha,
+    PremultipliedAlpha,
+    PremultipliedDestOut,
+    SubpixelDualSource,
+    SubpixelWithBgColor,
+    Advanced(MixBlendMode),
+    MultiplyDualSource,
+    Screen,
+    Exclusion,
+    PlusLighter,
+}
+
+impl BlendMode {
+    /// Decides when a given mix-blend-mode can be implemented in terms of
+    /// simple blending, dual-source blending, advanced blending, or not at
+    /// all based on available capabilities.
+    pub fn from_mix_blend_mode(
+        mode: MixBlendMode,
+        advanced_blend: bool,
+        coherent: bool,
+        dual_source: bool,
+    ) -> Option<BlendMode> {
+        // If we emulate a mix-blend-mode via simple or dual-source blending,
+        // care must be taken to output alpha As + Ad*(1-As) regardless of what
+        // the RGB output is to comply with the mix-blend-mode spec.
+        Some(match mode {
+            // If we have coherent advanced blend, just use that.
+            _ if advanced_blend && coherent => BlendMode::Advanced(mode),
+            // Screen can be implemented as Cs + Cd - Cs*Cd => Cs + Cd*(1-Cs)
+            MixBlendMode::Screen => BlendMode::Screen,
+            // Exclusion can be implemented as Cs + Cd - 2*Cs*Cd => Cs*(1-Cd) + Cd*(1-Cs)
+            MixBlendMode::Exclusion => BlendMode::Exclusion,
+            // PlusLighter is basically a clamped add.
+            MixBlendMode::PlusLighter => BlendMode::PlusLighter,
+            // Multiply can be implemented as Cs*Cd + Cs*(1-Ad) + Cd*(1-As) => Cs*(1-Ad) + Cd*(1 - SRC1=(As-Cs))
+            MixBlendMode::Multiply if dual_source => BlendMode::MultiplyDualSource,
+            // Otherwise, use advanced blend without coherency if available.
+            _ if advanced_blend => BlendMode::Advanced(mode),
+            // If advanced blend is not available, then we have to use brush_mix_blend.
+            _ => return None,
+        })
+    }
+}
+
+/// Information about the state of the debugging / profiler overlay in native compositing mode.
+struct DebugOverlayState {
+    /// True if any of the current debug flags will result in drawing a debug overlay.
+    is_enabled: bool,
+
+    /// The current size of the debug overlay surface. None implies that the
+    /// debug surface isn't currently allocated.
+    current_size: Option<DeviceIntSize>,
+}
+
+impl DebugOverlayState {
+    fn new() -> Self {
+        DebugOverlayState {
+            is_enabled: false,
+            current_size: None,
+        }
+    }
+}
+
+/// Tracks buffer damage rects over a series of frames.
+#[derive(Debug, Default)]
+pub(crate) struct BufferDamageTracker {
+    damage_rects: [DeviceRect; 2],
+    current_offset: usize,
+}
+
+impl BufferDamageTracker {
+    /// Sets the damage rect for the current frame. Should only be called *after*
+    /// get_damage_rect() has been called to get the current backbuffer's damage rect.
+    fn push_dirty_rect(&mut self, rect: &DeviceRect) {
+        self.damage_rects[self.current_offset] = rect.clone();
+        self.current_offset = match self.current_offset {
+            0 => self.damage_rects.len() - 1,
+            n => n - 1,
+        }
+    }
+
+    /// Gets the damage rect for the current backbuffer, given the backbuffer's age.
+    /// (The number of frames since it was previously the backbuffer.)
+    /// Returns an empty rect if the buffer is valid, and None if the entire buffer is invalid.
+    fn get_damage_rect(&self, buffer_age: usize) -> Option<DeviceRect> {
+        match buffer_age {
+            // 0 means this is a new buffer, so is completely invalid.
+            0 => None,
+            // 1 means this backbuffer was also the previous frame's backbuffer
+            // (so must have been copied to the frontbuffer). It is therefore entirely valid.
+            1 => Some(DeviceRect::zero()),
+            // We must calculate the union of the damage rects since this buffer was previously
+            // the backbuffer.
+            n if n <= self.damage_rects.len() + 1 => {
+                Some(
+                    self.damage_rects.iter()
+                        .cycle()
+                        .skip(self.current_offset + 1)
+                        .take(n - 1)
+                        .fold(DeviceRect::zero(), |acc, r| acc.union(r))
+                )
+            }
+            // The backbuffer is older than the number of frames for which we track,
+            // so we treat it as entirely invalid.
+            _ => None,
+        }
+    }
+}
+
+/// The renderer is responsible for submitting to the GPU the work prepared by the
+/// RenderBackend.
+///
+/// We have a separate `Renderer` instance for each instance of WebRender (generally
+/// one per OS window), and all instances share the same thread.
+pub struct Renderer {
+    result_rx: Receiver<ResultMsg>,
+    api_tx: Sender<ApiMsg>,
+    pub device: Device,
+    pending_texture_updates: Vec<TextureUpdateList>,
+    /// True if there are any TextureCacheUpdate pending.
+    pending_texture_cache_updates: bool,
+    pending_native_surface_updates: Vec<NativeSurfaceOperation>,
+    pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
+    pending_gpu_cache_clear: bool,
+    pending_shader_updates: Vec<PathBuf>,
+    active_documents: FastHashMap<DocumentId, RenderedDocument>,
+
+    shaders: Rc<RefCell<Shaders>>,
+
+    max_recorded_profiles: usize,
+
+    clear_color: ColorF,
+    enable_clear_scissor: bool,
+    enable_advanced_blend_barriers: bool,
+    clear_caches_with_quads: bool,
+    clear_alpha_targets_with_quads: bool,
+
+    debug: debug::LazyInitializedDebugRenderer,
+    debug_flags: DebugFlags,
+    profile: TransactionProfile,
+    frame_counter: u64,
+    resource_upload_time: f64,
+    gpu_cache_upload_time: f64,
+    profiler: Profiler,
+
+    last_time: u64,
+
+    pub gpu_profiler: GpuProfiler,
+    vaos: vertex::RendererVAOs,
+
+    gpu_cache_texture: gpu_cache::GpuCacheTexture,
+    vertex_data_textures: Vec<vertex::VertexDataTextures>,
+    current_vertex_data_textures: usize,
+
+    /// When the GPU cache debugger is enabled, we keep track of the live blocks
+    /// in the GPU cache so that we can use them for the debug display. This
+    /// member stores those live blocks, indexed by row.
+    gpu_cache_debug_chunks: Vec<Vec<GpuCacheDebugChunk>>,
+
+    gpu_cache_frame_id: FrameId,
+    gpu_cache_overflow: bool,
+
+    pipeline_info: PipelineInfo,
+
+    // Manages and resolves source textures IDs to real texture IDs.
+    texture_resolver: TextureResolver,
+
+    texture_upload_pbo_pool: UploadPBOPool,
+    staging_texture_pool: UploadTexturePool,
+
+    dither_matrix_texture: Option<Texture>,
+
+    /// Optional trait object that allows the client
+    /// application to provide external buffers for image data.
+    external_image_handler: Option<Box<dyn ExternalImageHandler>>,
+
+    /// Optional function pointers for measuring memory used by a given
+    /// heap-allocated pointer.
+    size_of_ops: Option<MallocSizeOfOps>,
+
+    pub renderer_errors: Vec<RendererError>,
+
+    pub(in crate) async_frame_recorder: Option<AsyncScreenshotGrabber>,
+    pub(in crate) async_screenshots: Option<AsyncScreenshotGrabber>,
+
+    /// List of profile results from previous frames. Can be retrieved
+    /// via get_frame_profiles().
+    cpu_profiles: VecDeque<CpuProfile>,
+    gpu_profiles: VecDeque<GpuProfile>,
+
+    /// Notification requests to be fulfilled after rendering.
+    notifications: Vec<NotificationRequest>,
+
+    device_size: Option<DeviceIntSize>,
+
+    /// A lazily created texture for the zoom debugging widget.
+    zoom_debug_texture: Option<Texture>,
+
+    /// The current mouse position. This is used for debugging
+    /// functionality only, such as the debug zoom widget.
+    cursor_position: DeviceIntPoint,
+
+    /// Guards to check if we might be rendering a frame with expired texture
+    /// cache entries.
+    shared_texture_cache_cleared: bool,
+
+    /// The set of documents which we've seen a publish for since last render.
+    documents_seen: FastHashSet<DocumentId>,
+
+    #[cfg(feature = "capture")]
+    read_fbo: FBOId,
+    #[cfg(feature = "replay")]
+    owned_external_images: FastHashMap<(ExternalImageId, u8), ExternalTexture>,
+
+    /// The compositing config, affecting how WR composites into the final scene.
+    compositor_config: CompositorConfig,
+
+    current_compositor_kind: CompositorKind,
+
+    /// Maintains a set of allocated native composite surfaces. This allows any
+    /// currently allocated surfaces to be cleaned up as soon as deinit() is
+    /// called (the normal bookkeeping for native surfaces exists in the
+    /// render backend thread).
+    allocated_native_surfaces: FastHashSet<NativeSurfaceId>,
+
+    /// If true, partial present state has been reset and everything needs to
+    /// be drawn on the next render.
+    force_redraw: bool,
+
+    /// State related to the debug / profiling overlays
+    debug_overlay_state: DebugOverlayState,
+
+    /// Tracks the dirty rectangles from previous frames. Used on platforms
+    /// that require keeping the front buffer fully correct when doing
+    /// partial present (e.g. unix desktop with EGL_EXT_buffer_age).
+    buffer_damage_tracker: BufferDamageTracker,
+
+    max_primitive_instance_count: usize,
+    enable_instancing: bool,
+
+    /// Count consecutive oom frames to detectif we are stuck unable to render
+    /// in a loop.
+    consecutive_oom_frames: u32,
+}
+
+#[derive(Debug)]
+pub enum RendererError {
+    Shader(ShaderError),
+    Thread(std::io::Error),
+    MaxTextureSize,
+    SoftwareRasterizer,
+    OutOfMemory,
+}
+
+impl From<ShaderError> for RendererError {
+    fn from(err: ShaderError) -> Self {
+        RendererError::Shader(err)
+    }
+}
+
+impl From<std::io::Error> for RendererError {
+    fn from(err: std::io::Error) -> Self {
+        RendererError::Thread(err)
+    }
+}
+
+impl Renderer {
+    pub fn device_size(&self) -> Option<DeviceIntSize> {
+        self.device_size
+    }
+
+    /// Update the current position of the debug cursor.
+    pub fn set_cursor_position(
+        &mut self,
+        position: DeviceIntPoint,
+    ) {
+        self.cursor_position = position;
+    }
+
+    pub fn get_max_texture_size(&self) -> i32 {
+        self.device.max_texture_size()
+    }
+
+    pub fn get_graphics_api_info(&self) -> GraphicsApiInfo {
+        GraphicsApiInfo {
+            kind: GraphicsApi::OpenGL,
+            version: self.device.gl().get_string(gl::VERSION),
+            renderer: self.device.gl().get_string(gl::RENDERER),
+        }
+    }
+
+    pub fn preferred_color_format(&self) -> ImageFormat {
+        self.device.preferred_color_formats().external
+    }
+
+    pub fn required_texture_stride_alignment(&self, format: ImageFormat) -> usize {
+        self.device.required_pbo_stride().num_bytes(format).get()
+    }
+
+    pub fn set_clear_color(&mut self, color: ColorF) {
+        self.clear_color = color;
+    }
+
+    pub fn flush_pipeline_info(&mut self) -> PipelineInfo {
+        mem::replace(&mut self.pipeline_info, PipelineInfo::default())
+    }
+
+    /// Returns the Epoch of the current frame in a pipeline.
+    pub fn current_epoch(&self, document_id: DocumentId, pipeline_id: PipelineId) -> Option<Epoch> {
+        self.pipeline_info.epochs.get(&(pipeline_id, document_id)).cloned()
+    }
+
+    /// Processes the result queue.
+    ///
+    /// Should be called before `render()`, as texture cache updates are done here.
+    pub fn update(&mut self) {
+        profile_scope!("update");
+
+        // Pull any pending results and return the most recent.
+        while let Ok(msg) = self.result_rx.try_recv() {
+            match msg {
+                ResultMsg::PublishPipelineInfo(mut pipeline_info) => {
+                    for ((pipeline_id, document_id), epoch) in pipeline_info.epochs {
+                        self.pipeline_info.epochs.insert((pipeline_id, document_id), epoch);
+                    }
+                    self.pipeline_info.removed_pipelines.extend(pipeline_info.removed_pipelines.drain(..));
+                }
+                ResultMsg::PublishDocument(
+                    document_id,
+                    mut doc,
+                    resource_update_list,
+                ) => {
+                    // Add a new document to the active set
+
+                    // If the document we are replacing must be drawn (in order to
+                    // update the texture cache), issue a render just to
+                    // off-screen targets, ie pass None to render_impl. We do this
+                    // because a) we don't need to render to the main framebuffer
+                    // so it is cheaper not to, and b) doing so without a
+                    // subsequent present would break partial present.
+                    if let Some(mut prev_doc) = self.active_documents.remove(&document_id) {
+                        doc.profile.merge(&mut prev_doc.profile);
+
+                        if prev_doc.frame.must_be_drawn() {
+                            prev_doc.render_reasons |= RenderReasons::TEXTURE_CACHE_FLUSH;
+                            self.render_impl(
+                                document_id,
+                                &mut prev_doc,
+                                None,
+                                0,
+                            ).ok();
+                        }
+                    }
+
+                    self.active_documents.insert(document_id, doc);
+
+                    // IMPORTANT: The pending texture cache updates must be applied
+                    //            *after* the previous frame has been rendered above
+                    //            (if neceessary for a texture cache update). For
+                    //            an example of why this is required:
+                    //            1) Previous frame contains a render task that
+                    //               targets Texture X.
+                    //            2) New frame contains a texture cache update which
+                    //               frees Texture X.
+                    //            3) bad stuff happens.
+
+                    //TODO: associate `document_id` with target window
+                    self.pending_texture_cache_updates |= !resource_update_list.texture_updates.updates.is_empty();
+                    self.pending_texture_updates.push(resource_update_list.texture_updates);
+                    self.pending_native_surface_updates.extend(resource_update_list.native_surface_updates);
+                    self.documents_seen.insert(document_id);
+                }
+                ResultMsg::UpdateGpuCache(mut list) => {
+                    if list.clear {
+                        self.pending_gpu_cache_clear = true;
+                    }
+                    if list.clear {
+                        self.gpu_cache_debug_chunks = Vec::new();
+                    }
+                    for cmd in mem::replace(&mut list.debug_commands, Vec::new()) {
+                        match cmd {
+                            GpuCacheDebugCmd::Alloc(chunk) => {
+                                let row = chunk.address.v as usize;
+                                if row >= self.gpu_cache_debug_chunks.len() {
+                                    self.gpu_cache_debug_chunks.resize(row + 1, Vec::new());
+                                }
+                                self.gpu_cache_debug_chunks[row].push(chunk);
+                            },
+                            GpuCacheDebugCmd::Free(address) => {
+                                let chunks = &mut self.gpu_cache_debug_chunks[address.v as usize];
+                                let pos = chunks.iter()
+                                    .position(|x| x.address == address).unwrap();
+                                chunks.remove(pos);
+                            },
+                        }
+                    }
+                    self.pending_gpu_cache_updates.push(list);
+                }
+                ResultMsg::UpdateResources {
+                    resource_updates,
+                    memory_pressure,
+                } => {
+                    if memory_pressure {
+                        // If a memory pressure event arrives _after_ a new scene has
+                        // been published that writes persistent targets (i.e. cached
+                        // render tasks to the texture cache, or picture cache tiles)
+                        // but _before_ the next update/render loop, those targets
+                        // will not be updated due to the active_documents list being
+                        // cleared at the end of this message. To work around that,
+                        // if any of the existing documents have not rendered yet, and
+                        // have picture/texture cache targets, force a render so that
+                        // those targets are updated.
+                        let active_documents = mem::replace(
+                            &mut self.active_documents,
+                            FastHashMap::default(),
+                        );
+                        for (doc_id, mut doc) in active_documents {
+                            if doc.frame.must_be_drawn() {
+                                // As this render will not be presented, we must pass None to
+                                // render_impl. This avoids interfering with partial present
+                                // logic, as well as being more efficient.
+                                self.render_impl(
+                                    doc_id,
+                                    &mut doc,
+                                    None,
+                                    0,
+                                ).ok();
+                            }
+                        }
+                    }
+
+                    self.pending_texture_cache_updates |= !resource_updates.texture_updates.updates.is_empty();
+                    self.pending_texture_updates.push(resource_updates.texture_updates);
+                    self.pending_native_surface_updates.extend(resource_updates.native_surface_updates);
+                    self.device.begin_frame();
+
+                    self.update_texture_cache();
+                    self.update_native_surfaces();
+
+                    // Flush the render target pool on memory pressure.
+                    //
+                    // This needs to be separate from the block below because
+                    // the device module asserts if we delete textures while
+                    // not in a frame.
+                    if memory_pressure {
+                        self.texture_upload_pbo_pool.on_memory_pressure(&mut self.device);
+                        self.staging_texture_pool.delete_textures(&mut self.device);
+                    }
+
+                    self.device.end_frame();
+                }
+                ResultMsg::AppendNotificationRequests(mut notifications) => {
+                    // We need to know specifically if there are any pending
+                    // TextureCacheUpdate updates in any of the entries in
+                    // pending_texture_updates. They may simply be nops, which do not
+                    // need to prevent issuing the notification, and if so, may not
+                    // cause a timely frame render to occur to wake up any listeners.
+                    if !self.pending_texture_cache_updates {
+                        drain_filter(
+                            &mut notifications,
+                            |n| { n.when() == Checkpoint::FrameTexturesUpdated },
+                            |n| { n.notify(); },
+                        );
+                    }
+                    self.notifications.append(&mut notifications);
+                }
+                ResultMsg::ForceRedraw => {
+                    self.force_redraw = true;
+                }
+                ResultMsg::RefreshShader(path) => {
+                    self.pending_shader_updates.push(path);
+                }
+                ResultMsg::SetParameter(ref param) => {
+                    self.device.set_parameter(param);
+                }
+                ResultMsg::DebugOutput(output) => match output {
+                    #[cfg(feature = "capture")]
+                    DebugOutput::SaveCapture(config, deferred) => {
+                        self.save_capture(config, deferred);
+                    }
+                    #[cfg(feature = "replay")]
+                    DebugOutput::LoadCapture(config, plain_externals) => {
+                        self.active_documents.clear();
+                        self.load_capture(config, plain_externals);
+                    }
+                },
+                ResultMsg::DebugCommand(command) => {
+                    self.handle_debug_command(command);
+                }
+            }
+        }
+    }
+
+    fn handle_debug_command(&mut self, command: DebugCommand) {
+        match command {
+            DebugCommand::SetPictureTileSize(_) |
+            DebugCommand::SetMaximumSurfaceSize(_) => {
+                panic!("Should be handled by render backend");
+            }
+            DebugCommand::SaveCapture(..) |
+            DebugCommand::LoadCapture(..) |
+            DebugCommand::StartCaptureSequence(..) |
+            DebugCommand::StopCaptureSequence => {
+                panic!("Capture commands are not welcome here! Did you build with 'capture' feature?")
+            }
+            DebugCommand::ClearCaches(_)
+            | DebugCommand::SimulateLongSceneBuild(_)
+            | DebugCommand::EnableNativeCompositor(_)
+            | DebugCommand::SetBatchingLookback(_) => {}
+            DebugCommand::InvalidateGpuCache => {
+                self.gpu_cache_texture.invalidate();
+            }
+            DebugCommand::SetFlags(flags) => {
+                self.set_debug_flags(flags);
+            }
+        }
+    }
+
+    /// Set a callback for handling external images.
+    pub fn set_external_image_handler(&mut self, handler: Box<dyn ExternalImageHandler>) {
+        self.external_image_handler = Some(handler);
+    }
+
+    /// Retrieve (and clear) the current list of recorded frame profiles.
+    pub fn get_frame_profiles(&mut self) -> (Vec<CpuProfile>, Vec<GpuProfile>) {
+        let cpu_profiles = self.cpu_profiles.drain(..).collect();
+        let gpu_profiles = self.gpu_profiles.drain(..).collect();
+        (cpu_profiles, gpu_profiles)
+    }
+
+    /// Reset the current partial present state. This forces the entire framebuffer
+    /// to be refreshed next time `render` is called.
+    pub fn force_redraw(&mut self) {
+        self.force_redraw = true;
+    }
+
+    /// Renders the current frame.
+    ///
+    /// A Frame is supplied by calling [`generate_frame()`][webrender_api::Transaction::generate_frame].
+    /// buffer_age is the age of the current backbuffer. It is only relevant if partial present
+    /// is active, otherwise 0 should be passed here.
+    pub fn render(
+        &mut self,
+        device_size: DeviceIntSize,
+        buffer_age: usize,
+    ) -> Result<RenderResults, Vec<RendererError>> {
+        self.device_size = Some(device_size);
+
+        // TODO(gw): We want to make the active document that is
+        //           being rendered configurable via the public
+        //           API in future. For now, just select the last
+        //           added document as the active one to render
+        //           (Gecko only ever creates a single document
+        //           per renderer right now).
+        let doc_id = self.active_documents.keys().last().cloned();
+
+        let result = match doc_id {
+            Some(doc_id) => {
+                // Remove the doc from the map to appease the borrow checker
+                let mut doc = self.active_documents
+                    .remove(&doc_id)
+                    .unwrap();
+
+                let result = self.render_impl(
+                    doc_id,
+                    &mut doc,
+                    Some(device_size),
+                    buffer_age,
+                );
+
+                self.active_documents.insert(doc_id, doc);
+
+                result
+            }
+            None => {
+                self.last_time = precise_time_ns();
+                Ok(RenderResults::default())
+            }
+        };
+
+        drain_filter(
+            &mut self.notifications,
+            |n| { n.when() == Checkpoint::FrameRendered },
+            |n| { n.notify(); },
+        );
+
+        let mut oom = false;
+        if let Err(ref errors) = result {
+            for error in errors {
+                if matches!(error, &RendererError::OutOfMemory) {
+                    oom = true;
+                    break;
+                }
+            }
+        }
+
+        if oom {
+            let _ = self.api_tx.send(ApiMsg::MemoryPressure);
+            // Ensure we don't get stuck in a loop.
+            self.consecutive_oom_frames += 1;
+            assert!(self.consecutive_oom_frames < 5, "Renderer out of memory");
+        } else {
+            self.consecutive_oom_frames = 0;
+        }
+
+        // This is the end of the rendering pipeline. If some notifications are is still there,
+        // just clear them and they will autimatically fire the Checkpoint::TransactionDropped
+        // event. Otherwise they would just pile up in this vector forever.
+        self.notifications.clear();
+
+        tracy_frame_marker!();
+
+        result
+    }
+
+    /// Update the state of any debug / profiler overlays. This is currently only needed
+    /// when running with the native compositor enabled.
+    fn update_debug_overlay(
+        &mut self,
+        framebuffer_size: DeviceIntSize,
+        has_debug_items: bool,
+    ) {
+        // If any of the following debug flags are set, something will be drawn on the debug overlay.
+        self.debug_overlay_state.is_enabled = has_debug_items || self.debug_flags.intersects(
+            DebugFlags::PROFILER_DBG |
+            DebugFlags::RENDER_TARGET_DBG |
+            DebugFlags::TEXTURE_CACHE_DBG |
+            DebugFlags::EPOCHS |
+            DebugFlags::GPU_CACHE_DBG |
+            DebugFlags::PICTURE_CACHING_DBG |
+            DebugFlags::PRIMITIVE_DBG |
+            DebugFlags::ZOOM_DBG |
+            DebugFlags::WINDOW_VISIBILITY_DBG
+        );
+
+        // Update the debug overlay surface, if we are running in native compositor mode.
+        if let CompositorKind::Native { .. } = self.current_compositor_kind {
+            let compositor = self.compositor_config.compositor().unwrap();
+
+            // If there is a current surface, destroy it if we don't need it for this frame, or if
+            // the size has changed.
+            if let Some(current_size) = self.debug_overlay_state.current_size {
+                if !self.debug_overlay_state.is_enabled || current_size != framebuffer_size {
+                    compositor.destroy_surface(NativeSurfaceId::DEBUG_OVERLAY);
+                    self.debug_overlay_state.current_size = None;
+                }
+            }
+
+            // Allocate a new surface, if we need it and there isn't one.
+            if self.debug_overlay_state.is_enabled && self.debug_overlay_state.current_size.is_none() {
+                compositor.create_surface(
+                    NativeSurfaceId::DEBUG_OVERLAY,
+                    DeviceIntPoint::zero(),
+                    framebuffer_size,
+                    false,
+                );
+                compositor.create_tile(
+                    NativeTileId::DEBUG_OVERLAY,
+                );
+                self.debug_overlay_state.current_size = Some(framebuffer_size);
+            }
+        }
+    }
+
+    /// Bind a draw target for the debug / profiler overlays, if required.
+    fn bind_debug_overlay(&mut self, device_size: DeviceIntSize) -> Option<DrawTarget> {
+        // Debug overlay setup are only required in native compositing mode
+        if self.debug_overlay_state.is_enabled {
+            if let CompositorKind::Native { .. } = self.current_compositor_kind {
+                let compositor = self.compositor_config.compositor().unwrap();
+                let surface_size = self.debug_overlay_state.current_size.unwrap();
+
+                // Ensure old surface is invalidated before binding
+                compositor.invalidate_tile(
+                    NativeTileId::DEBUG_OVERLAY,
+                    DeviceIntRect::from_size(surface_size),
+                );
+                // Bind the native surface
+                let surface_info = compositor.bind(
+                    NativeTileId::DEBUG_OVERLAY,
+                    DeviceIntRect::from_size(surface_size),
+                    DeviceIntRect::from_size(surface_size),
+                );
+
+                // Bind the native surface to current FBO target
+                let draw_target = DrawTarget::NativeSurface {
+                    offset: surface_info.origin,
+                    external_fbo_id: surface_info.fbo_id,
+                    dimensions: surface_size,
+                };
+                self.device.bind_draw_target(draw_target);
+
+                // When native compositing, clear the debug overlay each frame.
+                self.device.clear_target(
+                    Some([0.0, 0.0, 0.0, 0.0]),
+                    None, // debug renderer does not use depth
+                    None,
+                );
+
+                Some(draw_target)
+            } else {
+                // If we're not using the native compositor, then the default
+                // frame buffer is already bound. Create a DrawTarget for it and
+                // return it.
+                Some(DrawTarget::new_default(device_size, self.device.surface_origin_is_top_left()))
+            }
+        } else {
+            None
+        }
+    }
+
+    /// Unbind the draw target for debug / profiler overlays, if required.
+    fn unbind_debug_overlay(&mut self) {
+        // Debug overlay setup are only required in native compositing mode
+        if self.debug_overlay_state.is_enabled {
+            if let CompositorKind::Native { .. } = self.current_compositor_kind {
+                let compositor = self.compositor_config.compositor().unwrap();
+                // Unbind the draw target and add it to the visual tree to be composited
+                compositor.unbind();
+
+                compositor.add_surface(
+                    NativeSurfaceId::DEBUG_OVERLAY,
+                    CompositorSurfaceTransform::identity(),
+                    DeviceIntRect::from_size(
+                        self.debug_overlay_state.current_size.unwrap(),
+                    ),
+                    ImageRendering::Auto,
+                );
+            }
+        }
+    }
+
+    // If device_size is None, don't render to the main frame buffer. This is useful to
+    // update texture cache render tasks but avoid doing a full frame render. If the
+    // render is not going to be presented, then this must be set to None, as performing a
+    // composite without a present will confuse partial present.
+    fn render_impl(
+        &mut self,
+        doc_id: DocumentId,
+        active_doc: &mut RenderedDocument,
+        device_size: Option<DeviceIntSize>,
+        buffer_age: usize,
+    ) -> Result<RenderResults, Vec<RendererError>> {
+        profile_scope!("render");
+        let mut results = RenderResults::default();
+        self.profile.start_time(profiler::RENDERER_TIME);
+
+        self.staging_texture_pool.begin_frame();
+
+        let compositor_kind = active_doc.frame.composite_state.compositor_kind;
+        // CompositorKind is updated
+        if self.current_compositor_kind != compositor_kind {
+            let enable = match (self.current_compositor_kind, compositor_kind) {
+                (CompositorKind::Native { .. }, CompositorKind::Draw { .. }) => {
+                    if self.debug_overlay_state.current_size.is_some() {
+                        self.compositor_config
+                            .compositor()
+                            .unwrap()
+                            .destroy_surface(NativeSurfaceId::DEBUG_OVERLAY);
+                        self.debug_overlay_state.current_size = None;
+                    }
+                    false
+                }
+                (CompositorKind::Draw { .. }, CompositorKind::Native { .. }) => {
+                    true
+                }
+                (current_compositor_kind, active_doc_compositor_kind) => {
+                    warn!("Compositor mismatch, assuming this is Wrench running. Current {:?}, active {:?}",
+                        current_compositor_kind, active_doc_compositor_kind);
+                    false
+                }
+            };
+
+            if let Some(config) = self.compositor_config.compositor() {
+                config.enable_native_compositor(enable);
+            }
+            self.current_compositor_kind = compositor_kind;
+        }
+
+        // The texture resolver scope should be outside of any rendering, including
+        // debug rendering. This ensures that when we return render targets to the
+        // pool via glInvalidateFramebuffer, we don't do any debug rendering after
+        // that point. Otherwise, the bind / invalidate / bind logic trips up the
+        // render pass logic in tiled / mobile GPUs, resulting in an extra copy /
+        // resolve step when the debug overlay is enabled.
+        self.texture_resolver.begin_frame();
+
+        if let Some(device_size) = device_size {
+            self.update_gpu_profile(device_size);
+        }
+
+        let cpu_frame_id = {
+            let _gm = self.gpu_profiler.start_marker("begin frame");
+            let frame_id = self.device.begin_frame();
+            self.gpu_profiler.begin_frame(frame_id);
+
+            self.device.disable_scissor();
+            self.device.disable_depth();
+            self.set_blend(false, FramebufferKind::Main);
+            //self.update_shaders();
+
+            self.update_texture_cache();
+            self.update_native_surfaces();
+
+            frame_id
+        };
+
+        if let Some(device_size) = device_size {
+            // Inform the client that we are starting a composition transaction if native
+            // compositing is enabled. This needs to be done early in the frame, so that
+            // we can create debug overlays after drawing the main surfaces.
+            if let CompositorKind::Native { .. } = self.current_compositor_kind {
+                let compositor = self.compositor_config.compositor().unwrap();
+                compositor.begin_frame();
+            }
+
+            // Update the state of the debug overlay surface, ensuring that
+            // the compositor mode has a suitable surface to draw to, if required.
+            self.update_debug_overlay(device_size, !active_doc.frame.debug_items.is_empty());
+        }
+
+        let frame = &mut active_doc.frame;
+        let profile = &mut active_doc.profile;
+        assert!(self.current_compositor_kind == frame.composite_state.compositor_kind);
+
+        if self.shared_texture_cache_cleared {
+            assert!(self.documents_seen.contains(&doc_id),
+                    "Cleared texture cache without sending new document frame.");
+        }
+
+        match self.prepare_gpu_cache(&frame.deferred_resolves) {
+            Ok(..) => {
+                assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id,
+                    "Received frame depends on a later GPU cache epoch ({:?}) than one we received last via `UpdateGpuCache` ({:?})",
+                    frame.gpu_cache_frame_id, self.gpu_cache_frame_id);
+
+                {
+                    profile_scope!("gl.flush");
+                    self.device.gl().flush();  // early start on gpu cache updates
+                }
+
+                self.draw_frame(
+                    frame,
+                    device_size,
+                    buffer_age,
+                    &mut results,
+                );
+
+                // TODO(nical): do this automatically by selecting counters in the wr profiler
+                // Profile marker for the number of invalidated picture cache
+                if thread_is_being_profiled() {
+                    let duration = Duration::new(0,0);
+                    if let Some(n) = self.profile.get(profiler::RENDERED_PICTURE_TILES) {
+                        let message = (n as usize).to_string();
+                        add_text_marker("NumPictureCacheInvalidated", &message, duration);
+                    }
+                }
+
+                if device_size.is_some() {
+                    self.draw_frame_debug_items(&frame.debug_items);
+                }
+
+                self.profile.merge(profile);
+            }
+            Err(e) => {
+                self.renderer_errors.push(e);
+            }
+        }
+
+        self.unlock_external_images(&frame.deferred_resolves);
+
+        let _gm = self.gpu_profiler.start_marker("end frame");
+        self.gpu_profiler.end_frame();
+
+        let debug_overlay = device_size.and_then(|device_size| {
+            // Bind a surface to draw the debug / profiler information to.
+            self.bind_debug_overlay(device_size).map(|draw_target| {
+                self.draw_render_target_debug(&draw_target);
+                self.draw_texture_cache_debug(&draw_target);
+                self.draw_gpu_cache_debug(device_size);
+                self.draw_zoom_debug(device_size);
+                self.draw_epoch_debug();
+                self.draw_window_visibility_debug();
+                draw_target
+            })
+        });
+
+        let t = self.profile.end_time(profiler::RENDERER_TIME);
+        self.profile.end_time_if_started(profiler::TOTAL_FRAME_CPU_TIME);
+        Telemetry::record_renderer_time(Duration::from_micros((t * 1000.00) as u64));
+        if self.profile.get(profiler::SHADER_BUILD_TIME).is_none() {
+          Telemetry::record_renderer_time_no_sc(Duration::from_micros((t * 1000.00) as u64));
+        }
+
+        let current_time = precise_time_ns();
+        if device_size.is_some() {
+            let time = profiler::ns_to_ms(current_time - self.last_time);
+            self.profile.set(profiler::FRAME_TIME, time);
+        }
+
+        if self.max_recorded_profiles > 0 {
+            while self.cpu_profiles.len() >= self.max_recorded_profiles {
+                self.cpu_profiles.pop_front();
+            }
+            let cpu_profile = CpuProfile::new(
+                cpu_frame_id,
+                (self.profile.get_or(profiler::FRAME_BUILDING_TIME, 0.0) * 1000000.0) as u64,
+                (self.profile.get_or(profiler::RENDERER_TIME, 0.0) * 1000000.0) as u64,
+                self.profile.get_or(profiler::DRAW_CALLS, 0.0) as usize,
+            );
+            self.cpu_profiles.push_back(cpu_profile);
+        }
+
+        if thread_is_being_profiled() {
+            let duration = Duration::new(0,0);
+            let message = (self.profile.get_or(profiler::DRAW_CALLS, 0.0) as usize).to_string();
+            add_text_marker("NumDrawCalls", &message, duration);
+        }
+
+        let report = self.texture_resolver.report_memory();
+        self.profile.set(profiler::RENDER_TARGET_MEM, profiler::bytes_to_mb(report.render_target_textures));
+        self.profile.set(profiler::PICTURE_TILES_MEM, profiler::bytes_to_mb(report.picture_tile_textures));
+        self.profile.set(profiler::ATLAS_TEXTURES_MEM, profiler::bytes_to_mb(report.atlas_textures));
+        self.profile.set(profiler::STANDALONE_TEXTURES_MEM, profiler::bytes_to_mb(report.standalone_textures));
+
+        self.profile.set(profiler::DEPTH_TARGETS_MEM, profiler::bytes_to_mb(self.device.depth_targets_memory()));
+
+        self.profile.set(profiler::TEXTURES_CREATED, self.device.textures_created);
+        self.profile.set(profiler::TEXTURES_DELETED, self.device.textures_deleted);
+
+        results.stats.texture_upload_mb = self.profile.get_or(profiler::TEXTURE_UPLOADS_MEM, 0.0);
+        self.frame_counter += 1;
+        results.stats.resource_upload_time = self.resource_upload_time;
+        self.resource_upload_time = 0.0;
+        results.stats.gpu_cache_upload_time = self.gpu_cache_upload_time;
+        self.gpu_cache_upload_time = 0.0;
+
+        if let Some(stats) = active_doc.frame_stats.take() {
+          // Copy the full frame stats to RendererStats
+          results.stats.merge(&stats);
+
+          self.profiler.update_frame_stats(stats);
+        }
+
+        // Turn the render reasons bitflags into something we can see in the profiler.
+        // For now this is just a binary yes/no for each bit, which means that when looking
+        // at "Render reasons" in the profiler HUD the average view indicates the proportion
+        // of frames that had the bit set over a half second window whereas max shows whether
+        // the bit as been set at least once during that time window.
+        // We could implement better ways to visualize this information.
+        let add_markers = thread_is_being_profiled();
+        for i in 0..RenderReasons::NUM_BITS {
+            let counter = profiler::RENDER_REASON_FIRST + i as usize;
+            let mut val = 0.0;
+            let reason_bit = RenderReasons::from_bits_truncate(1 << i);
+            if active_doc.render_reasons.contains(reason_bit) {
+                val = 1.0;
+                if add_markers {
+                    let event_str = format!("Render reason {:?}", reason_bit);
+                    add_event_marker(&event_str);
+                }
+            }
+            self.profile.set(counter, val);
+        }
+        active_doc.render_reasons = RenderReasons::empty();
+
+
+        self.texture_resolver.update_profile(&mut self.profile);
+
+        // Note: this clears the values in self.profile.
+        self.profiler.set_counters(&mut self.profile);
+
+        // Note: profile counters must be set before this or they will count for next frame.
+        self.profiler.update();
+
+        if self.debug_flags.intersects(DebugFlags::PROFILER_DBG | DebugFlags::PROFILER_CAPTURE) {
+            if let Some(device_size) = device_size {
+                //TODO: take device/pixel ratio into equation?
+                if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) {
+                    self.profiler.draw_profile(
+                        self.frame_counter,
+                        debug_renderer,
+                        device_size,
+                    );
+                }
+            }
+        }
+
+        if self.debug_flags.contains(DebugFlags::ECHO_DRIVER_MESSAGES) {
+            self.device.echo_driver_messages();
+        }
+
+        if let Some(debug_renderer) = self.debug.try_get_mut() {
+            let small_screen = self.debug_flags.contains(DebugFlags::SMALL_SCREEN);
+            let scale = if small_screen { 1.6 } else { 1.0 };
+            // TODO(gw): Tidy this up so that compositor config integrates better
+            //           with the (non-compositor) surface y-flip options.
+            let surface_origin_is_top_left = match self.current_compositor_kind {
+                CompositorKind::Native { .. } => true,
+                CompositorKind::Draw { .. } => self.device.surface_origin_is_top_left(),
+            };
+            // If there is a debug overlay, render it. Otherwise, just clear
+            // the debug renderer.
+            debug_renderer.render(
+                &mut self.device,
+                debug_overlay.and(device_size),
+                scale,
+                surface_origin_is_top_left,
+            );
+        }
+
+        self.staging_texture_pool.end_frame(&mut self.device);
+        self.texture_upload_pbo_pool.end_frame(&mut self.device);
+        self.device.end_frame();
+
+        if debug_overlay.is_some() {
+            self.last_time = current_time;
+
+            // Unbind the target for the debug overlay. No debug or profiler drawing
+            // can occur afer this point.
+            self.unbind_debug_overlay();
+        }
+
+        if device_size.is_some() {
+            // Inform the client that we are finished this composition transaction if native
+            // compositing is enabled. This must be called after any debug / profiling compositor
+            // surfaces have been drawn and added to the visual tree.
+            if let CompositorKind::Native { .. } = self.current_compositor_kind {
+                profile_scope!("compositor.end_frame");
+                let compositor = self.compositor_config.compositor().unwrap();
+                compositor.end_frame();
+            }
+        }
+
+        self.documents_seen.clear();
+        self.shared_texture_cache_cleared = false;
+
+        self.check_gl_errors();
+
+        if self.renderer_errors.is_empty() {
+            Ok(results)
+        } else {
+            Err(mem::replace(&mut self.renderer_errors, Vec::new()))
+        }
+    }
+
+    fn update_gpu_profile(&mut self, device_size: DeviceIntSize) {
+        let _gm = self.gpu_profiler.start_marker("build samples");
+        // Block CPU waiting for last frame's GPU profiles to arrive.
+        // In general this shouldn't block unless heavily GPU limited.
+        let (gpu_frame_id, timers, samplers) = self.gpu_profiler.build_samples();
+
+        if self.max_recorded_profiles > 0 {
+            while self.gpu_profiles.len() >= self.max_recorded_profiles {
+                self.gpu_profiles.pop_front();
+            }
+
+            self.gpu_profiles.push_back(GpuProfile::new(gpu_frame_id, &timers));
+        }
+
+        self.profiler.set_gpu_time_queries(timers);
+
+        if !samplers.is_empty() {
+            let screen_fraction = 1.0 / device_size.to_f32().area();
+
+            fn accumulate_sampler_value(description: &str, samplers: &[GpuSampler]) -> f32 {
+                let mut accum = 0.0;
+                for sampler in samplers {
+                    if sampler.tag.label != description {
+                        continue;
+                    }
+
+                    accum += sampler.count as f32;
+                }
+
+                accum
+            }
+
+            let alpha_targets = accumulate_sampler_value(&"Alpha targets", &samplers) * screen_fraction;
+            let transparent_pass = accumulate_sampler_value(&"Transparent pass", &samplers) * screen_fraction;
+            let opaque_pass = accumulate_sampler_value(&"Opaque pass", &samplers) * screen_fraction;
+            self.profile.set(profiler::ALPHA_TARGETS_SAMPLERS, alpha_targets);
+            self.profile.set(profiler::TRANSPARENT_PASS_SAMPLERS, transparent_pass);
+            self.profile.set(profiler::OPAQUE_PASS_SAMPLERS, opaque_pass);
+            self.profile.set(profiler::TOTAL_SAMPLERS, alpha_targets + transparent_pass + opaque_pass);
+        }
+    }
+
+    fn update_texture_cache(&mut self) {
+        profile_scope!("update_texture_cache");
+
+        let _gm = self.gpu_profiler.start_marker("texture cache update");
+        let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]);
+        self.pending_texture_cache_updates = false;
+
+        self.profile.start_time(profiler::TEXTURE_CACHE_UPDATE_TIME);
+
+        let mut create_cache_texture_time = 0;
+        let mut delete_cache_texture_time = 0;
+
+        for update_list in pending_texture_updates.drain(..) {
+            // Handle copies from one texture to another.
+            for ((src_tex, dst_tex), copies) in &update_list.copies {
+
+                let dest_texture = &self.texture_resolver.texture_cache_map[&dst_tex].texture;
+                let dst_texture_size = dest_texture.get_dimensions().to_f32();
+
+                let mut copy_instances = Vec::new();
+                for copy in copies {
+                    copy_instances.push(CopyInstance {
+                        src_rect: copy.src_rect.to_f32(),
+                        dst_rect: copy.dst_rect.to_f32(),
+                        dst_texture_size,
+                    });
+                }
+
+                let draw_target = DrawTarget::from_texture(dest_texture, false);
+                self.device.bind_draw_target(draw_target);
+
+                self.shaders
+                    .borrow_mut()
+                    .ps_copy
+                    .bind(
+                        &mut self.device,
+                        &Transform3D::identity(),
+                        None,
+                        &mut self.renderer_errors,
+                        &mut self.profile,
+                    );
+
+                self.draw_instanced_batch(
+                    &copy_instances,
+                    VertexArrayKind::Copy,
+                    &BatchTextures::composite_rgb(
+                        TextureSource::TextureCache(*src_tex, Swizzle::default())
+                    ),
+                    &mut RendererStats::default(),
+                );
+            }
+
+            // Find any textures that will need to be deleted in this group of allocations.
+            let mut pending_deletes = Vec::new();
+            for allocation in &update_list.allocations {
+                let old = self.texture_resolver.texture_cache_map.remove(&allocation.id);
+                match allocation.kind {
+                    TextureCacheAllocationKind::Alloc(_) => {
+                        assert!(old.is_none(), "Renderer and backend disagree!");
+                    }
+                    TextureCacheAllocationKind::Reset(_) |
+                    TextureCacheAllocationKind::Free => {
+                        assert!(old.is_some(), "Renderer and backend disagree!");
+                    }
+                }
+                if let Some(old) = old {
+
+                    // Regenerate the cache allocation info so we can search through deletes for reuse.
+                    let size = old.texture.get_dimensions();
+                    let info = TextureCacheAllocInfo {
+                        width: size.width,
+                        height: size.height,
+                        format: old.texture.get_format(),
+                        filter: old.texture.get_filter(),
+                        target: old.texture.get_target(),
+                        is_shared_cache: old.texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE),
+                        has_depth: old.texture.supports_depth(),
+                        category: old.category,
+                    };
+                    pending_deletes.push((old.texture, info));
+                }
+            }
+            // Look for any alloc or reset that has matching alloc info and save it from being deleted.
+            let mut reused_textures = VecDeque::with_capacity(pending_deletes.len());
+            for allocation in &update_list.allocations {
+                match allocation.kind {
+                    TextureCacheAllocationKind::Alloc(ref info) |
+                    TextureCacheAllocationKind::Reset(ref info) => {
+                        reused_textures.push_back(
+                            pending_deletes.iter()
+                                .position(|(_, old_info)| *old_info == *info)
+                                .map(|index| pending_deletes.swap_remove(index).0)
+                        );
+                    }
+                    TextureCacheAllocationKind::Free => {}
+                }
+            }
+
+            // Now that we've saved as many deletions for reuse as we can, actually delete whatever is left.
+            if !pending_deletes.is_empty() {
+                let delete_texture_start = precise_time_ns();
+                for (texture, _) in pending_deletes {
+                    add_event_marker("TextureCacheFree");
+                    self.device.delete_texture(texture);
+                }
+                delete_cache_texture_time += precise_time_ns() - delete_texture_start;
+            }
+
+            for allocation in update_list.allocations {
+                match allocation.kind {
+                    TextureCacheAllocationKind::Alloc(_) => add_event_marker("TextureCacheAlloc"),
+                    TextureCacheAllocationKind::Reset(_) => add_event_marker("TextureCacheReset"),
+                    TextureCacheAllocationKind::Free => {}
+                };
+                match allocation.kind {
+                    TextureCacheAllocationKind::Alloc(ref info) |
+                    TextureCacheAllocationKind::Reset(ref info) => {
+                        let create_cache_texture_start = precise_time_ns();
+                        // Create a new native texture, as requested by the texture cache.
+                        // If we managed to reuse a deleted texture, then prefer that instead.
+                        //
+                        // Ensure no PBO is bound when creating the texture storage,
+                        // or GL will attempt to read data from there.
+                        let mut texture = reused_textures.pop_front().unwrap_or(None).unwrap_or_else(|| {
+                            self.device.create_texture(
+                                info.target,
+                                info.format,
+                                info.width,
+                                info.height,
+                                info.filter,
+                                // This needs to be a render target because some render
+                                // tasks get rendered into the texture cache.
+                                Some(RenderTargetInfo { has_depth: info.has_depth }),
+                            )
+                        });
+
+                        if info.is_shared_cache {
+                            texture.flags_mut()
+                                .insert(TextureFlags::IS_SHARED_TEXTURE_CACHE);
+
+                            // On Mali-Gxx devices we use batched texture uploads as it performs much better.
+                            // However, due to another driver bug we must ensure the textures are fully cleared,
+                            // otherwise we get visual artefacts when blitting to the texture cache.
+                            if self.device.use_batched_texture_uploads() &&
+                                !self.device.get_capabilities().supports_render_target_partial_update
+                            {
+                                self.clear_texture(&texture, [0.0; 4]);
+                            }
+
+                            // Textures in the cache generally don't need to be cleared,
+                            // but we do so if the debug display is active to make it
+                            // easier to identify unallocated regions.
+                            if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
+                                self.clear_texture(&texture, TEXTURE_CACHE_DBG_CLEAR_COLOR);
+                            }
+                        }
+
+                        create_cache_texture_time += precise_time_ns() - create_cache_texture_start;
+
+                        self.texture_resolver.texture_cache_map.insert(allocation.id, CacheTexture {
+                            texture,
+                            category: info.category,
+                        });
+                    }
+                    TextureCacheAllocationKind::Free => {}
+                };
+            }
+
+            upload_to_texture_cache(self, update_list.updates);
+
+            self.check_gl_errors();
+        }
+
+        if create_cache_texture_time > 0 {
+            self.profile.set(
+                profiler::CREATE_CACHE_TEXTURE_TIME,
+                profiler::ns_to_ms(create_cache_texture_time)
+            );
+        }
+        if delete_cache_texture_time > 0 {
+            self.profile.set(
+                profiler::DELETE_CACHE_TEXTURE_TIME,
+                profiler::ns_to_ms(delete_cache_texture_time)
+            )
+        }
+
+        let t = self.profile.end_time(profiler::TEXTURE_CACHE_UPDATE_TIME);
+        self.resource_upload_time += t;
+        Telemetry::record_texture_cache_update_time(Duration::from_micros((t * 1000.00) as u64));
+
+        drain_filter(
+            &mut self.notifications,
+            |n| { n.when() == Checkpoint::FrameTexturesUpdated },
+            |n| { n.notify(); },
+        );
+    }
+
+    fn check_gl_errors(&mut self) {
+        let err = self.device.gl().get_error();
+        if err == gl::OUT_OF_MEMORY {
+            self.renderer_errors.push(RendererError::OutOfMemory);
+        }
+
+        // Probably should check for other errors?
+    }
+
+    fn bind_textures(&mut self, textures: &BatchTextures) {
+        for i in 0 .. 3 {
+            self.texture_resolver.bind(
+                &textures.input.colors[i],
+                TextureSampler::color(i),
+                &mut self.device,
+            );
+        }
+
+        self.texture_resolver.bind(
+            &textures.clip_mask,
+            TextureSampler::ClipMask,
+            &mut self.device,
+        );
+
+        // TODO: this probably isn't the best place for this.
+        if let Some(ref texture) = self.dither_matrix_texture {
+            self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default());
+        }
+    }
+
+    fn draw_instanced_batch<T: Clone>(
+        &mut self,
+        data: &[T],
+        vertex_array_kind: VertexArrayKind,
+        textures: &BatchTextures,
+        stats: &mut RendererStats,
+    ) {
+        self.bind_textures(textures);
+
+        // If we end up with an empty draw call here, that means we have
+        // probably introduced unnecessary batch breaks during frame
+        // building - so we should be catching this earlier and removing
+        // the batch.
+        debug_assert!(!data.is_empty());
+
+        let vao = &self.vaos[vertex_array_kind];
+        self.device.bind_vao(vao);
+
+        let chunk_size = if self.debug_flags.contains(DebugFlags::DISABLE_BATCHING) {
+            1
+        } else if vertex_array_kind == VertexArrayKind::Primitive {
+            self.max_primitive_instance_count
+        } else {
+            data.len()
+        };
+
+        for chunk in data.chunks(chunk_size) {
+            if self.enable_instancing {
+                self.device
+                    .update_vao_instances(vao, chunk, ONE_TIME_USAGE_HINT, None);
+                self.device
+                    .draw_indexed_triangles_instanced_u16(6, chunk.len() as i32);
+            } else {
+                self.device
+                    .update_vao_instances(vao, chunk, ONE_TIME_USAGE_HINT, NonZeroUsize::new(4));
+                self.device
+                    .draw_indexed_triangles(6 * chunk.len() as i32);
+            }
+            self.profile.inc(profiler::DRAW_CALLS);
+            stats.total_draw_calls += 1;
+        }
+
+        self.profile.add(profiler::VERTICES, 6 * data.len());
+    }
+
+    fn handle_readback_composite(
+        &mut self,
+        draw_target: DrawTarget,
+        uses_scissor: bool,
+        backdrop: &RenderTask,
+        readback: &RenderTask,
+    ) {
+        // Extract the rectangle in the backdrop surface's device space of where
+        // we need to read from.
+        let readback_origin = match readback.kind {
+            RenderTaskKind::Readback(ReadbackTask { readback_origin: Some(o), .. }) => o,
+            RenderTaskKind::Readback(ReadbackTask { readback_origin: None, .. }) => {
+                // If this is a dummy readback, just early out. We know that the
+                // clear of the target will ensure the task rect is already zero alpha,
+                // so it won't affect the rendering output.
+                return;
+            }
+            _ => unreachable!(),
+        };
+
+        if uses_scissor {
+            self.device.disable_scissor();
+        }
+
+        let texture_source = TextureSource::TextureCache(
+            readback.get_target_texture(),
+            Swizzle::default(),
+        );
+        let (cache_texture, _) = self.texture_resolver
+            .resolve(&texture_source).expect("bug: no source texture");
+
+        // Before submitting the composite batch, do the
+        // framebuffer readbacks that are needed for each
+        // composite operation in this batch.
+        let readback_rect = readback.get_target_rect();
+        let backdrop_rect = backdrop.get_target_rect();
+        let (backdrop_screen_origin, _) = match backdrop.kind {
+            RenderTaskKind::Picture(ref task_info) => (task_info.content_origin, task_info.device_pixel_scale),
+            _ => panic!("bug: composite on non-picture?"),
+        };
+
+        // Bind the FBO to blit the backdrop to.
+        // Called per-instance in case the FBO changes. The device will skip
+        // the GL call if the requested target is already bound.
+        let cache_draw_target = DrawTarget::from_texture(
+            cache_texture,
+            false,
+        );
+
+        // Get the rect that we ideally want, in space of the parent surface
+        let wanted_rect = DeviceRect::from_origin_and_size(
+            readback_origin,
+            readback_rect.size().to_f32(),
+        );
+
+        // Get the rect that is available on the parent surface. It may be smaller
+        // than desired because this is a picture cache tile covering only part of
+        // the wanted rect and/or because the parent surface was clipped.
+        let avail_rect = DeviceRect::from_origin_and_size(
+            backdrop_screen_origin,
+            backdrop_rect.size().to_f32(),
+        );
+
+        if let Some(int_rect) = wanted_rect.intersection(&avail_rect) {
+            // If there is a valid intersection, work out the correct origins and
+            // sizes of the copy rects, and do the blit.
+            let copy_size = int_rect.size().to_i32();
+
+            let src_origin = backdrop_rect.min.to_f32() +
+                int_rect.min.to_vector() -
+                backdrop_screen_origin.to_vector();
+
+            let src = DeviceIntRect::from_origin_and_size(
+                src_origin.to_i32(),
+                copy_size,
+            );
+
+            let dest_origin = readback_rect.min.to_f32() +
+                int_rect.min.to_vector() -
+                readback_origin.to_vector();
+
+            let dest = DeviceIntRect::from_origin_and_size(
+                dest_origin.to_i32(),
+                copy_size,
+            );
+
+            // Should always be drawing to picture cache tiles or off-screen surface!
+            debug_assert!(!draw_target.is_default());
+            let device_to_framebuffer = Scale::new(1i32);
+
+            self.device.blit_render_target(
+                draw_target.into(),
+                src * device_to_framebuffer,
+                cache_draw_target,
+                dest * device_to_framebuffer,
+                TextureFilter::Linear,
+            );
+        }
+
+        // Restore draw target to current pass render target, and reset
+        // the read target.
+        self.device.bind_draw_target(draw_target);
+        self.device.reset_read_target();
+
+        if uses_scissor {
+            self.device.enable_scissor();
+        }
+    }
+
+    fn handle_resolves(
+        &mut self,
+        resolve_ops: &[ResolveOp],
+        render_tasks: &RenderTaskGraph,
+        draw_target: DrawTarget,
+    ) {
+        if resolve_ops.is_empty() {
+            return;
+        }
+
+        let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLIT);
+
+        for resolve_op in resolve_ops {
+            self.handle_resolve(
+                resolve_op,
+                render_tasks,
+                draw_target,
+            );
+        }
+
+        self.device.reset_read_target();
+    }
+
+
+    fn handle_blits(
+        &mut self,
+        blits: &[BlitJob],
+        render_tasks: &RenderTaskGraph,
+        draw_target: DrawTarget,
+    ) {
+        if blits.is_empty() {
+            return;
+        }
+
+        let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLIT);
+
+        // TODO(gw): For now, we don't bother batching these by source texture.
+        //           If if ever shows up as an issue, we can easily batch them.
+        for blit in blits {
+            let (source, source_rect) = {
+                // A blit from the child render task into this target.
+                // TODO(gw): Support R8 format here once we start
+                //           creating mips for alpha masks.
+                let task = &render_tasks[blit.source];
+                let source_rect = task.get_target_rect();
+                let source_texture = task.get_texture_source();
+
+                (source_texture, source_rect)
+            };
+
+            debug_assert_eq!(source_rect.size(), blit.target_rect.size());
+            let (texture, swizzle) = self.texture_resolver
+                .resolve(&source)
+                .expect("BUG: invalid source texture");
+
+            if swizzle != Swizzle::default() {
+                error!("Swizzle {:?} can't be handled by a blit", swizzle);
+            }
+
+            let read_target = DrawTarget::from_texture(
+                texture,
+                false,
+            );
+
+            self.device.blit_render_target(
+                read_target.into(),
+                read_target.to_framebuffer_rect(source_rect),
+                draw_target,
+                draw_target.to_framebuffer_rect(blit.target_rect),
+                TextureFilter::Linear,
+            );
+        }
+    }
+
+    fn handle_scaling(
+        &mut self,
+        scalings: &FastHashMap<TextureSource, Vec<ScalingInstance>>,
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
+    ) {
+        if scalings.is_empty() {
+            return
+        }
+
+        let _timer = self.gpu_profiler.start_timer(GPU_TAG_SCALE);
+
+        for (source, instances) in scalings {
+            let buffer_kind = source.image_buffer_kind();
+
+            // When the source texture is an external texture, the UV rect is not known
+            // when the external surface descriptor is created, because external textures
+            // are not resolved until the lock() callback is invoked at the start of the
+            // frame render. We must therefore override the source rects now.
+            let uv_override_instances;
+            let instances = match source {
+                TextureSource::External(..) => {
+                    uv_override_instances = instances.iter().map(|instance| {
+                        let texel_rect: TexelRect = self.texture_resolver.get_uv_rect(
+                            &source,
+                            instance.source_rect.cast().into()
+                        ).into();
+                        ScalingInstance {
+                            target_rect: instance.target_rect,
+                            source_rect: DeviceRect::new(texel_rect.uv0, texel_rect.uv1),
+                        }
+                    }).collect::<Vec<_>>();
+                    &uv_override_instances
+                }
+                _ => &instances
+            };
+
+            self.shaders
+                .borrow_mut()
+                .get_scale_shader(buffer_kind)
+                .bind(
+                    &mut self.device,
+                    &projection,
+                    Some(self.texture_resolver.get_texture_size(source).to_f32()),
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+
+            self.draw_instanced_batch(
+                instances,
+                VertexArrayKind::Scale,
+                &BatchTextures::composite_rgb(*source),
+                stats,
+            );
+        }
+    }
+
+    fn handle_svg_filters(
+        &mut self,
+        textures: &BatchTextures,
+        svg_filters: &[SvgFilterInstance],
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
+    ) {
+        if svg_filters.is_empty() {
+            return;
+        }
+
+        let _timer = self.gpu_profiler.start_timer(GPU_TAG_SVG_FILTER);
+
+        self.shaders.borrow_mut().cs_svg_filter.bind(
+            &mut self.device,
+            &projection,
+            None,
+            &mut self.renderer_errors,
+            &mut self.profile,
+        );
+
+        self.draw_instanced_batch(
+            &svg_filters,
+            VertexArrayKind::SvgFilter,
+            textures,
+            stats,
+        );
+    }
+
+    fn handle_resolve(
+        &mut self,
+        resolve_op: &ResolveOp,
+        render_tasks: &RenderTaskGraph,
+        draw_target: DrawTarget,
+    ) {
+        for src_task_id in &resolve_op.src_task_ids {
+            let src_task = &render_tasks[*src_task_id];
+            let src_info = match src_task.kind {
+                RenderTaskKind::Picture(ref info) => info,
+                _ => panic!("bug: not a picture"),
+            };
+            let src_task_rect = src_task.get_target_rect().to_f32();
+
+            let dest_task = &render_tasks[resolve_op.dest_task_id];
+            let dest_info = match dest_task.kind {
+                RenderTaskKind::Picture(ref info) => info,
+                _ => panic!("bug: not a picture"),
+            };
+            let dest_task_rect = dest_task.get_target_rect().to_f32();
+
+            // Get the rect that we ideally want, in space of the parent surface
+            let wanted_rect = DeviceRect::from_origin_and_size(
+                dest_info.content_origin,
+                dest_task_rect.size().to_f32(),
+            ).cast_unit() * dest_info.device_pixel_scale.inverse();
+
+            // Get the rect that is available on the parent surface. It may be smaller
+            // than desired because this is a picture cache tile covering only part of
+            // the wanted rect and/or because the parent surface was clipped.
+            let avail_rect = DeviceRect::from_origin_and_size(
+                src_info.content_origin,
+                src_task_rect.size().to_f32(),
+            ).cast_unit() * src_info.device_pixel_scale.inverse();
+
+            if let Some(device_int_rect) = wanted_rect.intersection(&avail_rect) {
+                let src_int_rect = (device_int_rect * src_info.device_pixel_scale).cast_unit();
+                let dest_int_rect = (device_int_rect * dest_info.device_pixel_scale).cast_unit();
+
+                // If there is a valid intersection, work out the correct origins and
+                // sizes of the copy rects, and do the blit.
+
+                let src_origin = src_task_rect.min.to_f32() +
+                    src_int_rect.min.to_vector() -
+                    src_info.content_origin.to_vector();
+
+                let src = DeviceIntRect::from_origin_and_size(
+                    src_origin.to_i32(),
+                    src_int_rect.size().round().to_i32(),
+                );
+
+                let dest_origin = dest_task_rect.min.to_f32() +
+                    dest_int_rect.min.to_vector() -
+                    dest_info.content_origin.to_vector();
+
+                let dest = DeviceIntRect::from_origin_and_size(
+                    dest_origin.to_i32(),
+                    dest_int_rect.size().round().to_i32(),
+                );
+
+                let texture_source = TextureSource::TextureCache(
+                    src_task.get_target_texture(),
+                    Swizzle::default(),
+                );
+                let (cache_texture, _) = self.texture_resolver
+                    .resolve(&texture_source).expect("bug: no source texture");
+
+                let read_target = ReadTarget::from_texture(cache_texture);
+
+                // Should always be drawing to picture cache tiles or off-screen surface!
+                debug_assert!(!draw_target.is_default());
+                let device_to_framebuffer = Scale::new(1i32);
+
+                self.device.blit_render_target(
+                    read_target,
+                    src * device_to_framebuffer,
+                    draw_target,
+                    dest * device_to_framebuffer,
+                    TextureFilter::Linear,
+                );
+            }
+        }
+    }
+
+    fn draw_picture_cache_target(
+        &mut self,
+        target: &PictureCacheTarget,
+        draw_target: DrawTarget,
+        projection: &default::Transform3D<f32>,
+        render_tasks: &RenderTaskGraph,
+        stats: &mut RendererStats,
+    ) {
+        profile_scope!("draw_picture_cache_target");
+
+        self.profile.inc(profiler::RENDERED_PICTURE_TILES);
+        let _gm = self.gpu_profiler.start_marker("picture cache target");
+        let framebuffer_kind = FramebufferKind::Other;
+
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET);
+            self.device.bind_draw_target(draw_target);
+            self.device.enable_depth_write();
+            self.set_blend(false, framebuffer_kind);
+
+            let clear_color = target.clear_color.map(|c| c.to_array());
+            let scissor_rect = if self.device.get_capabilities().supports_render_target_partial_update {
+                Some(target.dirty_rect)
+            } else {
+                None
+            };
+            match scissor_rect {
+                // If updating only a dirty rect within a picture cache target, the
+                // clear must also be scissored to that dirty region.
+                Some(r) if self.clear_caches_with_quads => {
+                    self.device.enable_depth(DepthFunction::Always);
+                    // Save the draw call count so that our reftests don't get confused...
+                    let old_draw_call_count = stats.total_draw_calls;
+                    if clear_color.is_none() {
+                        self.device.disable_color_write();
+                    }
+                    let instance = ClearInstance {
+                        rect: [
+                            r.min.x as f32, r.min.y as f32,
+                            r.max.x as f32, r.max.y as f32,
+                        ],
+                        color: clear_color.unwrap_or([0.0; 4]),
+                    };
+                    self.shaders.borrow_mut().ps_clear.bind(
+                        &mut self.device,
+                        &projection,
+                        None,
+                        &mut self.renderer_errors,
+                        &mut self.profile,
+                    );
+                    self.draw_instanced_batch(
+                        &[instance],
+                        VertexArrayKind::Clear,
+                        &BatchTextures::empty(),
+                        stats,
+                    );
+                    if clear_color.is_none() {
+                        self.device.enable_color_write();
+                    }
+                    stats.total_draw_calls = old_draw_call_count;
+                    self.device.disable_depth();
+                }
+                other => {
+                    let scissor_rect = other.map(|rect| {
+                        draw_target.build_scissor_rect(Some(rect))
+                    });
+                    self.device.clear_target(clear_color, Some(1.0), scissor_rect);
+                }
+            };
+            self.device.disable_depth_write();
+        }
+
+        match target.kind {
+            PictureCacheTargetKind::Draw { ref alpha_batch_container } => {
+                self.draw_alpha_batch_container(
+                    alpha_batch_container,
+                    draw_target,
+                    framebuffer_kind,
+                    projection,
+                    render_tasks,
+                    stats,
+                );
+            }
+            PictureCacheTargetKind::Blit { task_id, sub_rect_offset } => {
+                let src_task = &render_tasks[task_id];
+                let (texture, _swizzle) = self.texture_resolver
+                    .resolve(&src_task.get_texture_source())
+                    .expect("BUG: invalid source texture");
+
+                let src_task_rect = src_task.get_target_rect();
+
+                let p0 = src_task_rect.min + sub_rect_offset;
+                let p1 = p0 + target.dirty_rect.size();
+                let src_rect = DeviceIntRect::new(p0, p1);
+
+                // TODO(gw): In future, it'd be tidier to have the draw target offset
+                //           for DC surfaces handled by `blit_render_target`. However,
+                //           for now they are only ever written to here.
+                let target_rect = target
+                    .dirty_rect
+                    .translate(draw_target.offset().to_vector())
+                    .cast_unit();
+
+                self.device.blit_render_target(
+                    ReadTarget::from_texture(texture),
+                    src_rect.cast_unit(),
+                    draw_target,
+                    target_rect,
+                    TextureFilter::Nearest,
+                );
+            }
+        }
+
+        self.device.invalidate_depth_target();
+    }
+
+    /// Draw an alpha batch container into a given draw target. This is used
+    /// by both color and picture cache target kinds.
+    fn draw_alpha_batch_container(
+        &mut self,
+        alpha_batch_container: &AlphaBatchContainer,
+        draw_target: DrawTarget,
+        framebuffer_kind: FramebufferKind,
+        projection: &default::Transform3D<f32>,
+        render_tasks: &RenderTaskGraph,
+        stats: &mut RendererStats,
+    ) {
+        let uses_scissor = alpha_batch_container.task_scissor_rect.is_some();
+
+        if uses_scissor {
+            self.device.enable_scissor();
+            let scissor_rect = draw_target.build_scissor_rect(
+                alpha_batch_container.task_scissor_rect,
+            );
+            self.device.set_scissor_rect(scissor_rect)
+        }
+
+        if !alpha_batch_container.opaque_batches.is_empty()
+            && !self.debug_flags.contains(DebugFlags::DISABLE_OPAQUE_PASS) {
+            let _gl = self.gpu_profiler.start_marker("opaque batches");
+            let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
+            self.set_blend(false, framebuffer_kind);
+            //Note: depth equality is needed for split planes
+            self.device.enable_depth(DepthFunction::LessEqual);
+            self.device.enable_depth_write();
+
+            // Draw opaque batches front-to-back for maximum
+            // z-buffer efficiency!
+            for batch in alpha_batch_container
+                .opaque_batches
+                .iter()
+                .rev()
+                {
+                    if should_skip_batch(&batch.key.kind, self.debug_flags) {
+                        continue;
+                    }
+
+                    self.shaders.borrow_mut()
+                        .get(&batch.key, batch.features, self.debug_flags, &self.device)
+                        .bind(
+                            &mut self.device, projection, None,
+                            &mut self.renderer_errors,
+                            &mut self.profile,
+                        );
+
+                    let _timer = self.gpu_profiler.start_timer(batch.key.kind.sampler_tag());
+                    self.draw_instanced_batch(
+                        &batch.instances,
+                        VertexArrayKind::Primitive,
+                        &batch.key.textures,
+                        stats
+                    );
+                }
+
+            self.device.disable_depth_write();
+            self.gpu_profiler.finish_sampler(opaque_sampler);
+        } else {
+            self.device.disable_depth();
+        }
+
+        if !alpha_batch_container.alpha_batches.is_empty()
+            && !self.debug_flags.contains(DebugFlags::DISABLE_ALPHA_PASS) {
+            let _gl = self.gpu_profiler.start_marker("alpha batches");
+            let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+            self.set_blend(true, framebuffer_kind);
+
+            let mut prev_blend_mode = BlendMode::None;
+            let shaders_rc = self.shaders.clone();
+
+            for batch in &alpha_batch_container.alpha_batches {
+                if should_skip_batch(&batch.key.kind, self.debug_flags) {
+                    continue;
+                }
+
+                let mut shaders = shaders_rc.borrow_mut();
+                let shader = shaders.get(
+                    &batch.key,
+                    batch.features | BatchFeatures::ALPHA_PASS,
+                    self.debug_flags,
+                    &self.device,
+                );
+
+                if batch.key.blend_mode != prev_blend_mode {
+                    match batch.key.blend_mode {
+                        _ if self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) &&
+                            framebuffer_kind == FramebufferKind::Main => {
+                            self.device.set_blend_mode_show_overdraw();
+                        }
+                        BlendMode::None => {
+                            unreachable!("bug: opaque blend in alpha pass");
+                        }
+                        BlendMode::Alpha => {
+                            self.device.set_blend_mode_alpha();
+                        }
+                        BlendMode::PremultipliedAlpha => {
+                            self.device.set_blend_mode_premultiplied_alpha();
+                        }
+                        BlendMode::PremultipliedDestOut => {
+                            self.device.set_blend_mode_premultiplied_dest_out();
+                        }
+                        BlendMode::SubpixelDualSource => {
+                            self.device.set_blend_mode_subpixel_dual_source();
+                        }
+                        BlendMode::SubpixelWithBgColor => {
+                            // Using the three pass "component alpha with font smoothing
+                            // background color" rendering technique:
+                            //
+                            // /webrender/doc/text-rendering.md
+                            //
+                            self.device.set_blend_mode_subpixel_with_bg_color_pass0();
+                            // need to make sure the shader is bound
+                            shader.bind(
+                                &mut self.device,
+                                projection,
+                                None,
+                                &mut self.renderer_errors,
+                                &mut self.profile,
+                            );
+                            self.device.switch_mode(ShaderColorMode::SubpixelWithBgColorPass0 as _);
+                        }
+                        BlendMode::Advanced(mode) => {
+                            if self.enable_advanced_blend_barriers {
+                                self.device.gl().blend_barrier_khr();
+                            }
+                            self.device.set_blend_mode_advanced(mode);
+                        }
+                        BlendMode::MultiplyDualSource => {
+                            self.device.set_blend_mode_multiply_dual_source();
+                        }
+                        BlendMode::Screen => {
+                            self.device.set_blend_mode_screen();
+                        }
+                        BlendMode::Exclusion => {
+                            self.device.set_blend_mode_exclusion();
+                        }
+                        BlendMode::PlusLighter => {
+                            self.device.set_blend_mode_plus_lighter();
+                        }
+                    }
+                    prev_blend_mode = batch.key.blend_mode;
+                }
+
+                // Handle special case readback for composites.
+                if let BatchKind::Brush(BrushBatchKind::MixBlend { task_id, backdrop_id }) = batch.key.kind {
+                    // composites can't be grouped together because
+                    // they may overlap and affect each other.
+                    debug_assert_eq!(batch.instances.len(), 1);
+                    self.handle_readback_composite(
+                        draw_target,
+                        uses_scissor,
+                        &render_tasks[task_id],
+                        &render_tasks[backdrop_id],
+                    );
+                }
+
+                let _timer = self.gpu_profiler.start_timer(batch.key.kind.sampler_tag());
+                shader.bind(
+                    &mut self.device,
+                    projection,
+                    None,
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+
+                self.draw_instanced_batch(
+                    &batch.instances,
+                    VertexArrayKind::Primitive,
+                    &batch.key.textures,
+                    stats
+                );
+
+                if batch.key.blend_mode == BlendMode::SubpixelWithBgColor {
+                    self.set_blend_mode_subpixel_with_bg_color_pass1(framebuffer_kind);
+                    // re-binding the shader after the blend mode change
+                    shader.bind(
+                        &mut self.device,
+                        projection,
+                        None,
+                        &mut self.renderer_errors,
+                        &mut self.profile,
+                    );
+                    self.device.switch_mode(ShaderColorMode::SubpixelWithBgColorPass1 as _);
+
+                    // When drawing the 2nd and 3rd passes, we know that the VAO, textures etc
+                    // are all set up from the previous draw_instanced_batch call,
+                    // so just issue a draw call here to avoid re-uploading the
+                    // instances and re-binding textures etc.
+                    self.device
+                        .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);
+
+                    self.set_blend_mode_subpixel_with_bg_color_pass2(framebuffer_kind);
+                    // re-binding the shader after the blend mode change
+                    shader.bind(
+                        &mut self.device,
+                        projection,
+                        None,
+                        &mut self.renderer_errors,
+                        &mut self.profile,
+                    );
+                    self.device.switch_mode(ShaderColorMode::SubpixelWithBgColorPass2 as _);
+
+                    self.device
+                        .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);
+                }
+
+                if batch.key.blend_mode == BlendMode::SubpixelWithBgColor {
+                    prev_blend_mode = BlendMode::None;
+                }
+            }
+
+            self.set_blend(false, framebuffer_kind);
+            self.gpu_profiler.finish_sampler(transparent_sampler);
+        }
+
+        self.device.disable_depth();
+        if uses_scissor {
+            self.device.disable_scissor();
+        }
+    }
+
+    /// Rasterize any external compositor surfaces that require updating
+    fn update_external_native_surfaces(
+        &mut self,
+        external_surfaces: &[ResolvedExternalSurface],
+        results: &mut RenderResults,
+    ) {
+        if external_surfaces.is_empty() {
+            return;
+        }
+
+        let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
+
+        self.device.disable_depth();
+        self.set_blend(false, FramebufferKind::Main);
+
+        for surface in external_surfaces {
+            // See if this surface needs to be updated
+            let (native_surface_id, surface_size) = match surface.update_params {
+                Some(params) => params,
+                None => continue,
+            };
+
+            // When updating an external surface, the entire surface rect is used
+            // for all of the draw, dirty, valid and clip rect parameters.
+            let surface_rect = surface_size.into();
+
+            // Bind the native compositor surface to update
+            let surface_info = self.compositor_config
+                .compositor()
+                .unwrap()
+                .bind(
+                    NativeTileId {
+                        surface_id: native_surface_id,
+                        x: 0,
+                        y: 0,
+                    },
+                    surface_rect,
+                    surface_rect,
+                );
+
+            // Bind the native surface to current FBO target
+            let draw_target = DrawTarget::NativeSurface {
+                offset: surface_info.origin,
+                external_fbo_id: surface_info.fbo_id,
+                dimensions: surface_size,
+            };
+            self.device.bind_draw_target(draw_target);
+
+            let projection = Transform3D::ortho(
+                0.0,
+                surface_size.width as f32,
+                0.0,
+                surface_size.height as f32,
+                self.device.ortho_near_plane(),
+                self.device.ortho_far_plane(),
+            );
+
+            let ( textures, instance ) = match surface.color_data {
+                ResolvedExternalSurfaceColorData::Yuv{
+                        ref planes, color_space, format, channel_bit_depth, .. } => {
+
+                    // Bind an appropriate YUV shader for the texture format kind
+                    self.shaders
+                        .borrow_mut()
+                        .get_composite_shader(
+                            CompositeSurfaceFormat::Yuv,
+                            surface.image_buffer_kind,
+                            CompositeFeatures::empty(),
+                        ).bind(
+                            &mut self.device,
+                            &projection,
+                            None,
+                            &mut self.renderer_errors,
+                            &mut self.profile,
+                        );
+
+                    let textures = BatchTextures::composite_yuv(
+                        planes[0].texture,
+                        planes[1].texture,
+                        planes[2].texture,
+                    );
+
+                    // When the texture is an external texture, the UV rect is not known when
+                    // the external surface descriptor is created, because external textures
+                    // are not resolved until the lock() callback is invoked at the start of
+                    // the frame render. To handle this, query the texture resolver for the
+                    // UV rect if it's an external texture, otherwise use the default UV rect.
+                    let uv_rects = [
+                        self.texture_resolver.get_uv_rect(&textures.input.colors[0], planes[0].uv_rect),
+                        self.texture_resolver.get_uv_rect(&textures.input.colors[1], planes[1].uv_rect),
+                        self.texture_resolver.get_uv_rect(&textures.input.colors[2], planes[2].uv_rect),
+                    ];
+
+                    let instance = CompositeInstance::new_yuv(
+                        surface_rect.cast_unit().to_f32(),
+                        surface_rect.to_f32(),
+                        // z-id is not relevant when updating a native compositor surface.
+                        // TODO(gw): Support compositor surfaces without z-buffer, for memory / perf win here.
+                        ZBufferId(0),
+                        color_space,
+                        format,
+                        channel_bit_depth,
+                        uv_rects,
+                        CompositorTransform::identity(),
+                    );
+
+                    ( textures, instance )
+                },
+                ResolvedExternalSurfaceColorData::Rgb{ ref plane, .. } => {
+                    self.shaders
+                        .borrow_mut()
+                        .get_composite_shader(
+                            CompositeSurfaceFormat::Rgba,
+                            surface.image_buffer_kind,
+                            CompositeFeatures::empty(),
+                        ).bind(
+                            &mut self.device,
+                            &projection,
+                            None,
+                            &mut self.renderer_errors,
+                            &mut self.profile,
+                        );
+
+                    let textures = BatchTextures::composite_rgb(plane.texture);
+                    let uv_rect = self.texture_resolver.get_uv_rect(&textures.input.colors[0], plane.uv_rect);
+                    let instance = CompositeInstance::new_rgb(
+                        surface_rect.cast_unit().to_f32(),
+                        surface_rect.to_f32(),
+                        PremultipliedColorF::WHITE,
+                        ZBufferId(0),
+                        uv_rect,
+                        CompositorTransform::identity(),
+                    );
+
+                    ( textures, instance )
+                },
+            };
+
+            self.draw_instanced_batch(
+                &[instance],
+                VertexArrayKind::Composite,
+                &textures,
+                &mut results.stats,
+            );
+
+            self.compositor_config
+                .compositor()
+                .unwrap()
+                .unbind();
+        }
+
+        self.gpu_profiler.finish_sampler(opaque_sampler);
+    }
+
+    /// Draw a list of tiles to the framebuffer
+    fn draw_tile_list<'a, I: Iterator<Item = &'a occlusion::Item>>(
+        &mut self,
+        tiles_iter: I,
+        composite_state: &CompositeState,
+        external_surfaces: &[ResolvedExternalSurface],
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
+    ) {
+        let mut current_shader_params = (
+            CompositeSurfaceFormat::Rgba,
+            ImageBufferKind::Texture2D,
+            CompositeFeatures::empty(),
+            None,
+        );
+        let mut current_textures = BatchTextures::empty();
+        let mut instances = Vec::new();
+
+        self.shaders
+            .borrow_mut()
+            .get_composite_shader(
+                current_shader_params.0,
+                current_shader_params.1,
+                current_shader_params.2,
+            ).bind(
+                &mut self.device,
+                projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+        for item in tiles_iter {
+            let tile = &composite_state.tiles[item.key];
+
+            let clip_rect = item.rectangle;
+            let tile_rect = tile.local_rect;
+            let transform = composite_state.get_device_transform(tile.transform_index).into();
+
+            // Work out the draw params based on the tile surface
+            let (instance, textures, shader_params) = match tile.surface {
+                CompositeTileSurface::Color { color } => {
+                    let dummy = TextureSource::Dummy;
+                    let image_buffer_kind = dummy.image_buffer_kind();
+                    let instance = CompositeInstance::new(
+                        tile_rect,
+                        clip_rect,
+                        color.premultiplied(),
+                        tile.z_id,
+                        transform,
+                    );
+                    let features = instance.get_rgb_features();
+                    (
+                        instance,
+                        BatchTextures::composite_rgb(dummy),
+                        (CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None),
+                    )
+                }
+                CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::TextureCache { texture } } => {
+                    let instance = CompositeInstance::new(
+                        tile_rect,
+                        clip_rect,
+                        PremultipliedColorF::WHITE,
+                        tile.z_id,
+                        transform,
+                    );
+                    let features = instance.get_rgb_features();
+                    (
+                        instance,
+                        BatchTextures::composite_rgb(texture),
+                        (
+                            CompositeSurfaceFormat::Rgba,
+                            ImageBufferKind::Texture2D,
+                            features,
+                            None,
+                        ),
+                    )
+                }
+                CompositeTileSurface::ExternalSurface { external_surface_index } => {
+                    let surface = &external_surfaces[external_surface_index.0];
+
+                    match surface.color_data {
+                        ResolvedExternalSurfaceColorData::Yuv{ ref planes, color_space, format, channel_bit_depth, .. } => {
+                            let textures = BatchTextures::composite_yuv(
+                                planes[0].texture,
+                                planes[1].texture,
+                                planes[2].texture,
+                            );
+
+                            // When the texture is an external texture, the UV rect is not known when
+                            // the external surface descriptor is created, because external textures
+                            // are not resolved until the lock() callback is invoked at the start of
+                            // the frame render. To handle this, query the texture resolver for the
+                            // UV rect if it's an external texture, otherwise use the default UV rect.
+                            let uv_rects = [
+                                self.texture_resolver.get_uv_rect(&textures.input.colors[0], planes[0].uv_rect),
+                                self.texture_resolver.get_uv_rect(&textures.input.colors[1], planes[1].uv_rect),
+                                self.texture_resolver.get_uv_rect(&textures.input.colors[2], planes[2].uv_rect),
+                            ];
+
+                            (
+                                CompositeInstance::new_yuv(
+                                    tile_rect,
+                                    clip_rect,
+                                    tile.z_id,
+                                    color_space,
+                                    format,
+                                    channel_bit_depth,
+                                    uv_rects,
+                                    transform,
+                                ),
+                                textures,
+                                (
+                                    CompositeSurfaceFormat::Yuv,
+                                    surface.image_buffer_kind,
+                                    CompositeFeatures::empty(),
+                                    None
+                                ),
+                            )
+                        },
+                        ResolvedExternalSurfaceColorData::Rgb { ref plane, .. } => {
+                            let uv_rect = self.texture_resolver.get_uv_rect(&plane.texture, plane.uv_rect);
+                            let instance = CompositeInstance::new_rgb(
+                                tile_rect,
+                                clip_rect,
+                                PremultipliedColorF::WHITE,
+                                tile.z_id,
+                                uv_rect,
+                                transform,
+                            );
+                            let features = instance.get_rgb_features();
+                            (
+                                instance,
+                                BatchTextures::composite_rgb(plane.texture),
+                                (
+                                    CompositeSurfaceFormat::Rgba,
+                                    surface.image_buffer_kind,
+                                    features,
+                                    Some(self.texture_resolver.get_texture_size(&plane.texture).to_f32()),
+                                ),
+                            )
+                        },
+                    }
+                }
+                CompositeTileSurface::Clear => {
+                    let dummy = TextureSource::Dummy;
+                    let image_buffer_kind = dummy.image_buffer_kind();
+                    let instance = CompositeInstance::new(
+                        tile_rect,
+                        clip_rect,
+                        PremultipliedColorF::BLACK,
+                        tile.z_id,
+                        transform,
+                    );
+                    let features = instance.get_rgb_features();
+                    (
+                        instance,
+                        BatchTextures::composite_rgb(dummy),
+                        (CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None),
+                    )
+                }
+                CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::Native { .. } } => {
+                    unreachable!("bug: found native surface in simple composite path");
+                }
+            };
+
+            // Flush batch if shader params or textures changed
+            let flush_batch = !current_textures.is_compatible_with(&textures) ||
+                shader_params != current_shader_params;
+
+            if flush_batch {
+                if !instances.is_empty() {
+                    self.draw_instanced_batch(
+                        &instances,
+                        VertexArrayKind::Composite,
+                        &current_textures,
+                        stats,
+                    );
+                    instances.clear();
+                }
+            }
+
+            if shader_params != current_shader_params {
+                self.shaders
+                    .borrow_mut()
+                    .get_composite_shader(shader_params.0, shader_params.1, shader_params.2)
+                    .bind(
+                        &mut self.device,
+                        projection,
+                        shader_params.3,
+                        &mut self.renderer_errors,
+                        &mut self.profile,
+                    );
+
+                current_shader_params = shader_params;
+            }
+
+            current_textures = textures;
+
+            // Add instance to current batch
+            instances.push(instance);
+        }
+
+        // Flush the last batch
+        if !instances.is_empty() {
+            self.draw_instanced_batch(
+                &instances,
+                VertexArrayKind::Composite,
+                &current_textures,
+                stats,
+            );
+        }
+    }
+
+    /// Composite picture cache tiles into the framebuffer. This is currently
+    /// the only way that picture cache tiles get drawn. In future, the tiles
+    /// will often be handed to the OS compositor, and this method will be
+    /// rarely used.
+    fn composite_simple(
+        &mut self,
+        composite_state: &CompositeState,
+        draw_target: DrawTarget,
+        projection: &default::Transform3D<f32>,
+        results: &mut RenderResults,
+        partial_present_mode: Option<PartialPresentMode>,
+    ) {
+        let _gm = self.gpu_profiler.start_marker("framebuffer");
+        let _timer = self.gpu_profiler.start_timer(GPU_TAG_COMPOSITE);
+
+        self.device.bind_draw_target(draw_target);
+        self.device.disable_depth_write();
+        self.device.disable_depth();
+
+        // If using KHR_partial_update, call eglSetDamageRegion.
+        // This must be called exactly once per frame, and prior to any rendering to the main
+        // framebuffer. Additionally, on Mali-G77 we encountered rendering issues when calling
+        // this earlier in the frame, during offscreen render passes. So call it now, immediately
+        // before rendering to the main framebuffer. See bug 1685276 for details.
+        if let Some(partial_present) = self.compositor_config.partial_present() {
+            if let Some(PartialPresentMode::Single { dirty_rect }) = partial_present_mode {
+                partial_present.set_buffer_damage_region(&[dirty_rect.to_i32()]);
+            }
+        }
+
+        let cap = composite_state.tiles.len();
+
+        let mut occlusion = occlusion::FrontToBackBuilder::with_capacity(cap, cap);
+        let mut clear_tiles = Vec::new();
+
+        for (idx, tile) in composite_state.tiles.iter().enumerate() {
+            // Clear tiles overwrite whatever is under them, so they are treated as opaque.
+            let is_opaque = tile.kind != TileKind::Alpha;
+
+            let device_tile_box = composite_state.get_device_rect(
+                &tile.local_rect,
+                tile.transform_index
+            );
+
+            // Determine a clip rect to apply to this tile, depending on what
+            // the partial present mode is.
+            let partial_clip_rect = match partial_present_mode {
+                Some(PartialPresentMode::Single { dirty_rect }) => dirty_rect,
+                None => device_tile_box,
+            };
+
+            // Simple compositor needs the valid rect in device space to match clip rect
+            let device_valid_rect = composite_state
+                .get_device_rect(&tile.local_valid_rect, tile.transform_index);
+
+            let rect = device_tile_box
+                .intersection_unchecked(&tile.device_clip_rect)
+                .intersection_unchecked(&partial_clip_rect)
+                .intersection_unchecked(&device_valid_rect);
+
+            if rect.is_empty() {
+                continue;
+            }
+
+            if tile.kind == TileKind::Clear {
+                // Clear tiles are specific to how we render the window buttons on
+                // Windows 8. They clobber what's under them so they can be treated as opaque,
+                // but require a different blend state so they will be rendered after the opaque
+                // tiles and before transparent ones.
+                clear_tiles.push(occlusion::Item { rectangle: rect, key: idx });
+                continue;
+            }
+
+            occlusion.add(&rect, is_opaque, idx);
+        }
+
+        // Clear the framebuffer
+        let clear_color = Some(self.clear_color.to_array());
+
+        match partial_present_mode {
+            Some(PartialPresentMode::Single { dirty_rect }) => {
+                // There is no need to clear if the dirty rect is occluded. Additionally,
+                // on Mali-G77 we have observed artefacts when calling glClear (even with
+                // the empty scissor rect set) after calling eglSetDamageRegion with an
+                // empty damage region. So avoid clearing in that case. See bug 1709548.
+                if !dirty_rect.is_empty() && occlusion.test(&dirty_rect) {
+                    // We have a single dirty rect, so clear only that
+                    self.device.clear_target(clear_color,
+                                             None,
+                                             Some(draw_target.to_framebuffer_rect(dirty_rect.to_i32())));
+                }
+            }
+            None => {
+                // Partial present is disabled, so clear the entire framebuffer
+                self.device.clear_target(clear_color,
+                                         None,
+                                         None);
+            }
+        }
+
+        // We are only interested in tiles backed with actual cached pixels so we don't
+        // count clear tiles here.
+        let num_tiles = composite_state.tiles
+            .iter()
+            .filter(|tile| tile.kind != TileKind::Clear).count();
+        self.profile.set(profiler::PICTURE_TILES, num_tiles);
+
+        if !occlusion.opaque_items().is_empty() {
+            let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
+            self.set_blend(false, FramebufferKind::Main);
+            self.draw_tile_list(
+                occlusion.opaque_items().iter(),
+                &composite_state,
+                &composite_state.external_surfaces,
+                projection,
+                &mut results.stats,
+            );
+            self.gpu_profiler.finish_sampler(opaque_sampler);
+        }
+
+        if !clear_tiles.is_empty() {
+            let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+            self.set_blend(true, FramebufferKind::Main);
+            self.device.set_blend_mode_premultiplied_dest_out();
+            self.draw_tile_list(
+                clear_tiles.iter(),
+                &composite_state,
+                &composite_state.external_surfaces,
+                projection,
+                &mut results.stats,
+            );
+            self.gpu_profiler.finish_sampler(transparent_sampler);
+        }
+
+        // Draw alpha tiles
+        if !occlusion.alpha_items().is_empty() {
+            let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+            self.set_blend(true, FramebufferKind::Main);
+            self.set_blend_mode_premultiplied_alpha(FramebufferKind::Main);
+            self.draw_tile_list(
+                occlusion.alpha_items().iter().rev(),
+                &composite_state,
+                &composite_state.external_surfaces,
+                projection,
+                &mut results.stats,
+            );
+            self.gpu_profiler.finish_sampler(transparent_sampler);
+        }
+    }
+
+    fn draw_color_target(
+        &mut self,
+        draw_target: DrawTarget,
+        target: &ColorRenderTarget,
+        clear_depth: Option<f32>,
+        render_tasks: &RenderTaskGraph,
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
+    ) {
+        profile_scope!("draw_color_target");
+
+        self.profile.inc(profiler::COLOR_PASSES);
+        let _gm = self.gpu_profiler.start_marker("color target");
+
+        // sanity check for the depth buffer
+        if let DrawTarget::Texture { with_depth, .. } = draw_target {
+            assert!(with_depth >= target.needs_depth());
+        }
+
+        let framebuffer_kind = if draw_target.is_default() {
+            FramebufferKind::Main
+        } else {
+            FramebufferKind::Other
+        };
+
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET);
+            self.device.bind_draw_target(draw_target);
+            self.device.disable_depth();
+            self.set_blend(false, framebuffer_kind);
+
+            if clear_depth.is_some() {
+                self.device.enable_depth_write();
+            }
+
+            let clear_color = target
+                .clear_color
+                .map(|color| color.to_array());
+
+            let clear_rect = match draw_target {
+                DrawTarget::NativeSurface { .. } => {
+                    unreachable!("bug: native compositor surface in child target");
+                }
+                DrawTarget::Default { rect, total_size, .. } if rect.min == FramebufferIntPoint::zero() && rect.size() == total_size => {
+                    // whole screen is covered, no need for scissor
+                    None
+                }
+                DrawTarget::Default { rect, .. } => {
+                    Some(rect)
+                }
+                DrawTarget::Texture { .. } if self.enable_clear_scissor => {
+                    // TODO(gw): Applying a scissor rect and minimal clear here
+                    // is a very large performance win on the Intel and nVidia
+                    // GPUs that I have tested with. It's possible it may be a
+                    // performance penalty on other GPU types - we should test this
+                    // and consider different code paths.
+                    //
+                    // Note: The above measurements were taken when render
+                    // target slices were minimum 2048x2048. Now that we size
+                    // them adaptively, this may be less of a win (except perhaps
+                    // on a mostly-unused last slice of a large texture array).
+                    Some(draw_target.to_framebuffer_rect(target.used_rect))
+                }
+                DrawTarget::Texture { .. } | DrawTarget::External { .. } => {
+                    None
+                }
+            };
+
+            self.device.clear_target(
+                clear_color,
+                clear_depth,
+                clear_rect,
+            );
+
+            if clear_depth.is_some() {
+                self.device.disable_depth_write();
+            }
+        }
+
+        // Handle any resolves from parent pictures to this target
+        self.handle_resolves(
+            &target.resolve_ops,
+            render_tasks,
+            draw_target,
+        );
+
+        // Handle any blits from the texture cache to this target.
+        self.handle_blits(
+            &target.blits,
+            render_tasks,
+            draw_target,
+        );
+
+        // Draw any blurs for this target.
+        // Blurs are rendered as a standard 2-pass
+        // separable implementation.
+        // TODO(gw): In the future, consider having
+        //           fast path blur shaders for common
+        //           blur radii with fixed weights.
+        if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLUR);
+
+            self.set_blend(false, framebuffer_kind);
+            self.shaders.borrow_mut().cs_blur_rgba8
+                .bind(&mut self.device, projection, None, &mut self.renderer_errors, &mut self.profile);
+
+            if !target.vertical_blurs.is_empty() {
+                self.draw_blurs(
+                    &target.vertical_blurs,
+                    stats,
+                );
+            }
+
+            if !target.horizontal_blurs.is_empty() {
+                self.draw_blurs(
+                    &target.horizontal_blurs,
+                    stats,
+                );
+            }
+        }
+
+        self.handle_scaling(
+            &target.scalings,
+            projection,
+            stats,
+        );
+
+        for (ref textures, ref filters) in &target.svg_filters {
+            self.handle_svg_filters(
+                textures,
+                filters,
+                projection,
+                stats,
+            );
+        }
+
+        for alpha_batch_container in &target.alpha_batch_containers {
+            self.draw_alpha_batch_container(
+                alpha_batch_container,
+                draw_target,
+                framebuffer_kind,
+                projection,
+                render_tasks,
+                stats,
+            );
+        }
+
+        if clear_depth.is_some() {
+            self.device.invalidate_depth_target();
+        }
+    }
+
+    fn draw_blurs(
+        &mut self,
+        blurs: &FastHashMap<TextureSource, Vec<BlurInstance>>,
+        stats: &mut RendererStats,
+    ) {
+        for (texture, blurs) in blurs {
+            let textures = BatchTextures::composite_rgb(
+                *texture,
+            );
+
+            self.draw_instanced_batch(
+                blurs,
+                VertexArrayKind::Blur,
+                &textures,
+                stats,
+            );
+        }
+    }
+
+    /// Draw all the instances in a clip batcher list to the current target.
+    fn draw_clip_batch_list(
+        &mut self,
+        list: &ClipBatchList,
+        draw_target: &DrawTarget,
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
+    ) {
+        if self.debug_flags.contains(DebugFlags::DISABLE_CLIP_MASKS) {
+            return;
+        }
+
+        // draw rounded cornered rectangles
+        if !list.slow_rectangles.is_empty() {
+            let _gm2 = self.gpu_profiler.start_marker("slow clip rectangles");
+            self.shaders.borrow_mut().cs_clip_rectangle_slow.bind(
+                &mut self.device,
+                projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+            self.draw_instanced_batch(
+                &list.slow_rectangles,
+                VertexArrayKind::ClipRect,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+        if !list.fast_rectangles.is_empty() {
+            let _gm2 = self.gpu_profiler.start_marker("fast clip rectangles");
+            self.shaders.borrow_mut().cs_clip_rectangle_fast.bind(
+                &mut self.device,
+                projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+            self.draw_instanced_batch(
+                &list.fast_rectangles,
+                VertexArrayKind::ClipRect,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+
+        // draw box-shadow clips
+        for (mask_texture_id, items) in list.box_shadows.iter() {
+            let _gm2 = self.gpu_profiler.start_marker("box-shadows");
+            let textures = BatchTextures::composite_rgb(*mask_texture_id);
+            self.shaders.borrow_mut().cs_clip_box_shadow
+                .bind(&mut self.device, projection, None, &mut self.renderer_errors, &mut self.profile);
+            self.draw_instanced_batch(
+                items,
+                VertexArrayKind::ClipBoxShadow,
+                &textures,
+                stats,
+            );
+        }
+
+        // draw image masks
+        let mut using_scissor = false;
+        for ((mask_texture_id, clip_rect), items) in list.images.iter() {
+            let _gm2 = self.gpu_profiler.start_marker("clip images");
+            // Some image masks may require scissoring to ensure they don't draw
+            // outside their task's target bounds. Axis-aligned primitives will
+            // be clamped inside the shader and should not require scissoring.
+            // TODO: We currently assume scissor state is off by default for
+            // alpha targets here, but in the future we may want to track the
+            // current scissor state so that this can be properly saved and
+            // restored here.
+            if let Some(clip_rect) = clip_rect {
+                if !using_scissor {
+                    self.device.enable_scissor();
+                    using_scissor = true;
+                }
+                let scissor_rect = draw_target.build_scissor_rect(Some(*clip_rect));
+                self.device.set_scissor_rect(scissor_rect);
+            } else if using_scissor {
+                self.device.disable_scissor();
+                using_scissor = false;
+            }
+            let textures = BatchTextures::composite_rgb(*mask_texture_id);
+            self.shaders.borrow_mut().cs_clip_image
+                .bind(&mut self.device, projection, None, &mut self.renderer_errors, &mut self.profile);
+            self.draw_instanced_batch(
+                items,
+                VertexArrayKind::ClipImage,
+                &textures,
+                stats,
+            );
+        }
+        if using_scissor {
+            self.device.disable_scissor();
+        }
+    }
+
+    fn draw_alpha_target(
+        &mut self,
+        draw_target: DrawTarget,
+        target: &AlphaRenderTarget,
+        projection: &default::Transform3D<f32>,
+        render_tasks: &RenderTaskGraph,
+        stats: &mut RendererStats,
+    ) {
+        profile_scope!("draw_alpha_target");
+
+        self.profile.inc(profiler::ALPHA_PASSES);
+        let _gm = self.gpu_profiler.start_marker("alpha target");
+        let alpha_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_ALPHA);
+
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET);
+            self.device.bind_draw_target(draw_target);
+            self.device.disable_depth();
+            self.device.disable_depth_write();
+            self.set_blend(false, FramebufferKind::Other);
+
+            let zero_color = [0.0, 0.0, 0.0, 0.0];
+            let one_color = [1.0, 1.0, 1.0, 1.0];
+
+            // On some Adreno 4xx devices we have seen render tasks to alpha targets have no
+            // effect unless the target is fully cleared prior to rendering. See bug 1714227.
+            if self.device.get_capabilities().requires_alpha_target_full_clear {
+                self.device.clear_target(
+                    Some(zero_color),
+                    None,
+                    None,
+                );
+            }
+
+            // On some Mali-T devices we have observed crashes in subsequent draw calls
+            // immediately after clearing the alpha render target regions with glClear().
+            // Using the shader to clear the regions avoids the crash. See bug 1638593.
+            if self.clear_alpha_targets_with_quads
+                && !(target.zero_clears.is_empty() && target.one_clears.is_empty())
+            {
+                let zeroes = target.zero_clears
+                    .iter()
+                    .map(|task_id| {
+                        let rect = render_tasks[*task_id].get_target_rect().to_f32();
+                        ClearInstance {
+                            rect: [
+                                rect.min.x, rect.min.y,
+                                rect.max.x, rect.max.y,
+                            ],
+                            color: zero_color,
+                        }
+                    });
+
+                let ones = target.one_clears
+                    .iter()
+                    .map(|task_id| {
+                        let rect = render_tasks[*task_id].get_target_rect().to_f32();
+                        ClearInstance {
+                            rect: [
+                                rect.min.x, rect.min.y,
+                                rect.max.x, rect.max.y,
+                            ],
+                            color: one_color,
+                        }
+                    });
+
+                let instances = zeroes.chain(ones).collect::<Vec<_>>();
+                self.shaders.borrow_mut().ps_clear.bind(
+                    &mut self.device,
+                    &projection,
+                    None,
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+                self.draw_instanced_batch(
+                    &instances,
+                    VertexArrayKind::Clear,
+                    &BatchTextures::empty(),
+                    stats,
+                );
+            } else {
+                // TODO(gw): Applying a scissor rect and minimal clear here
+                // is a very large performance win on the Intel and nVidia
+                // GPUs that I have tested with. It's possible it may be a
+                // performance penalty on other GPU types - we should test this
+                // and consider different code paths.
+                for &task_id in &target.zero_clears {
+                    let rect = render_tasks[task_id].get_target_rect();
+                    self.device.clear_target(
+                        Some(zero_color),
+                        None,
+                        Some(draw_target.to_framebuffer_rect(rect)),
+                    );
+                }
+
+                for &task_id in &target.one_clears {
+                    let rect = render_tasks[task_id].get_target_rect();
+                    self.device.clear_target(
+                        Some(one_color),
+                        None,
+                        Some(draw_target.to_framebuffer_rect(rect)),
+                    );
+                }
+            }
+        }
+
+        // Draw any blurs for this target.
+        // Blurs are rendered as a standard 2-pass
+        // separable implementation.
+        // TODO(gw): In the future, consider having
+        //           fast path blur shaders for common
+        //           blur radii with fixed weights.
+        if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLUR);
+
+            self.shaders.borrow_mut().cs_blur_a8
+                .bind(&mut self.device, projection, None, &mut self.renderer_errors, &mut self.profile);
+
+            if !target.vertical_blurs.is_empty() {
+                self.draw_blurs(
+                    &target.vertical_blurs,
+                    stats,
+                );
+            }
+
+            if !target.horizontal_blurs.is_empty() {
+                self.draw_blurs(
+                    &target.horizontal_blurs,
+                    stats,
+                );
+            }
+        }
+
+        self.handle_scaling(
+            &target.scalings,
+            projection,
+            stats,
+        );
+
+        // Draw the clip items into the tiled alpha mask.
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_CLIP);
+
+            // TODO(gw): Consider grouping multiple clip masks per shader
+            //           invocation here to reduce memory bandwith further?
+
+            // Draw the primary clip mask - since this is the first mask
+            // for the task, we can disable blending, knowing that it will
+            // overwrite every pixel in the mask area.
+            self.set_blend(false, FramebufferKind::Other);
+            self.draw_clip_batch_list(
+                &target.clip_batcher.primary_clips,
+                &draw_target,
+                projection,
+                stats,
+            );
+
+            // switch to multiplicative blending for secondary masks, using
+            // multiplicative blending to accumulate clips into the mask.
+            self.set_blend(true, FramebufferKind::Other);
+            self.set_blend_mode_multiply(FramebufferKind::Other);
+            self.draw_clip_batch_list(
+                &target.clip_batcher.secondary_clips,
+                &draw_target,
+                projection,
+                stats,
+            );
+        }
+
+        self.gpu_profiler.finish_sampler(alpha_sampler);
+    }
+
+    fn draw_texture_cache_target(
+        &mut self,
+        texture: &CacheTextureId,
+        target: &TextureCacheRenderTarget,
+        render_tasks: &RenderTaskGraph,
+        stats: &mut RendererStats,
+    ) {
+        profile_scope!("draw_texture_cache_target");
+
+        self.device.disable_depth();
+        self.device.disable_depth_write();
+
+        self.set_blend(false, FramebufferKind::Other);
+
+        let texture = &self.texture_resolver.texture_cache_map[texture].texture;
+        let target_size = texture.get_dimensions();
+
+        let projection = Transform3D::ortho(
+            0.0,
+            target_size.width as f32,
+            0.0,
+            target_size.height as f32,
+            self.device.ortho_near_plane(),
+            self.device.ortho_far_plane(),
+        );
+
+        let draw_target = DrawTarget::from_texture(
+            texture,
+            false,
+        );
+        self.device.bind_draw_target(draw_target);
+
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CLEAR);
+
+            self.device.disable_depth();
+            self.device.disable_depth_write();
+            self.set_blend(false, FramebufferKind::Other);
+
+            let color = [0.0, 0.0, 0.0, 0.0];
+            if self.clear_caches_with_quads && !target.clears.is_empty() {
+                let instances = target.clears
+                    .iter()
+                    .map(|r| ClearInstance {
+                        rect: [
+                            r.min.x as f32, r.min.y as f32,
+                            r.max.x as f32, r.max.y as f32,
+                        ],
+                        color,
+                    })
+                    .collect::<Vec<_>>();
+                self.shaders.borrow_mut().ps_clear.bind(
+                    &mut self.device,
+                    &projection,
+                    None,
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+                self.draw_instanced_batch(
+                    &instances,
+                    VertexArrayKind::Clear,
+                    &BatchTextures::empty(),
+                    stats,
+                );
+            } else {
+                for rect in &target.clears {
+                    self.device.clear_target(
+                        Some(color),
+                        None,
+                        Some(draw_target.to_framebuffer_rect(*rect)),
+                    );
+                }
+            }
+
+            // Handle any blits to this texture from child tasks.
+            self.handle_blits(
+                &target.blits,
+                render_tasks,
+                draw_target,
+            );
+        }
+
+        // Draw any borders for this target.
+        if !target.border_segments_solid.is_empty() ||
+           !target.border_segments_complex.is_empty()
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_BORDER);
+
+            self.set_blend(true, FramebufferKind::Other);
+            self.set_blend_mode_premultiplied_alpha(FramebufferKind::Other);
+
+            if !target.border_segments_solid.is_empty() {
+                self.shaders.borrow_mut().cs_border_solid.bind(
+                    &mut self.device,
+                    &projection,
+                    None,
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+
+                self.draw_instanced_batch(
+                    &target.border_segments_solid,
+                    VertexArrayKind::Border,
+                    &BatchTextures::empty(),
+                    stats,
+                );
+            }
+
+            if !target.border_segments_complex.is_empty() {
+                self.shaders.borrow_mut().cs_border_segment.bind(
+                    &mut self.device,
+                    &projection,
+                    None,
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+
+                self.draw_instanced_batch(
+                    &target.border_segments_complex,
+                    VertexArrayKind::Border,
+                    &BatchTextures::empty(),
+                    stats,
+                );
+            }
+
+            self.set_blend(false, FramebufferKind::Other);
+        }
+
+        // Draw any line decorations for this target.
+        if !target.line_decorations.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_LINE_DECORATION);
+
+            self.set_blend(true, FramebufferKind::Other);
+            self.set_blend_mode_premultiplied_alpha(FramebufferKind::Other);
+
+            self.shaders.borrow_mut().cs_line_decoration.bind(
+                &mut self.device,
+                &projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+            self.draw_instanced_batch(
+                &target.line_decorations,
+                VertexArrayKind::LineDecoration,
+                &BatchTextures::empty(),
+                stats,
+            );
+
+            self.set_blend(false, FramebufferKind::Other);
+        }
+
+        // Draw any fast path linear gradients for this target.
+        if !target.fast_linear_gradients.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_FAST_LINEAR_GRADIENT);
+
+            self.set_blend(false, FramebufferKind::Other);
+
+            self.shaders.borrow_mut().cs_fast_linear_gradient.bind(
+                &mut self.device,
+                &projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+            self.draw_instanced_batch(
+                &target.fast_linear_gradients,
+                VertexArrayKind::FastLinearGradient,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+
+        // Draw any linear gradients for this target.
+        if !target.linear_gradients.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_LINEAR_GRADIENT);
+
+            self.set_blend(false, FramebufferKind::Other);
+
+            self.shaders.borrow_mut().cs_linear_gradient.bind(
+                &mut self.device,
+                &projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+            if let Some(ref texture) = self.dither_matrix_texture {
+                self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default());
+            }
+
+            self.draw_instanced_batch(
+                &target.linear_gradients,
+                VertexArrayKind::LinearGradient,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+
+        // Draw any radial gradients for this target.
+        if !target.radial_gradients.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_RADIAL_GRADIENT);
+
+            self.set_blend(false, FramebufferKind::Other);
+
+            self.shaders.borrow_mut().cs_radial_gradient.bind(
+                &mut self.device,
+                &projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+            if let Some(ref texture) = self.dither_matrix_texture {
+                self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default());
+            }
+
+            self.draw_instanced_batch(
+                &target.radial_gradients,
+                VertexArrayKind::RadialGradient,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+
+        // Draw any conic gradients for this target.
+        if !target.conic_gradients.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_CONIC_GRADIENT);
+
+            self.set_blend(false, FramebufferKind::Other);
+
+            self.shaders.borrow_mut().cs_conic_gradient.bind(
+                &mut self.device,
+                &projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+            if let Some(ref texture) = self.dither_matrix_texture {
+                self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default());
+            }
+
+            self.draw_instanced_batch(
+                &target.conic_gradients,
+                VertexArrayKind::ConicGradient,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+
+        // Draw any blurs for this target.
+        if !target.horizontal_blurs.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLUR);
+
+            {
+                let mut shaders = self.shaders.borrow_mut();
+                match target.target_kind {
+                    RenderTargetKind::Alpha => &mut shaders.cs_blur_a8,
+                    RenderTargetKind::Color => &mut shaders.cs_blur_rgba8,
+                }.bind(&mut self.device, &projection, None, &mut self.renderer_errors, &mut self.profile);
+            }
+
+            self.draw_blurs(
+                &target.horizontal_blurs,
+                stats,
+            );
+        }
+    }
+
+    fn update_deferred_resolves(&mut self, deferred_resolves: &[DeferredResolve]) -> Option<GpuCacheUpdateList> {
+        // The first thing we do is run through any pending deferred
+        // resolves, and use a callback to get the UV rect for this
+        // custom item. Then we patch the resource_rects structure
+        // here before it's uploaded to the GPU.
+        if deferred_resolves.is_empty() {
+            return None;
+        }
+
+        let handler = self.external_image_handler
+            .as_mut()
+            .expect("Found external image, but no handler set!");
+
+        let mut list = GpuCacheUpdateList {
+            frame_id: FrameId::INVALID,
+            clear: false,
+            height: self.gpu_cache_texture.get_height(),
+            blocks: Vec::new(),
+            updates: Vec::new(),
+            debug_commands: Vec::new(),
+        };
+
+        for (i, deferred_resolve) in deferred_resolves.iter().enumerate() {
+            self.gpu_profiler.place_marker("deferred resolve");
+            let props = &deferred_resolve.image_properties;
+            let ext_image = props
+                .external_image
+                .expect("BUG: Deferred resolves must be external images!");
+            // Provide rendering information for NativeTexture external images.
+            let image = handler.lock(ext_image.id, ext_image.channel_index);
+            let texture_target = match ext_image.image_type {
+                ExternalImageType::TextureHandle(target) => target,
+                ExternalImageType::Buffer => {
+                    panic!("not a suitable image type in update_deferred_resolves()");
+                }
+            };
+
+            // In order to produce the handle, the external image handler may call into
+            // the GL context and change some states.
+            self.device.reset_state();
+
+            let texture = match image.source {
+                ExternalImageSource::NativeTexture(texture_id) => {
+                    ExternalTexture::new(
+                        texture_id,
+                        texture_target,
+                        image.uv,
+                        deferred_resolve.rendering,
+                    )
+                }
+                ExternalImageSource::Invalid => {
+                    warn!("Invalid ext-image");
+                    debug!(
+                        "For ext_id:{:?}, channel:{}.",
+                        ext_image.id,
+                        ext_image.channel_index
+                    );
+                    // Just use 0 as the gl handle for this failed case.
+                    ExternalTexture::new(
+                        0,
+                        texture_target,
+                        image.uv,
+                        deferred_resolve.rendering,
+                    )
+                }
+                ExternalImageSource::RawData(_) => {
+                    panic!("Raw external data is not expected for deferred resolves!");
+                }
+            };
+
+            self.texture_resolver
+                .external_images
+                .insert(DeferredResolveIndex(i as u32), texture);
+
+            list.updates.push(GpuCacheUpdate::Copy {
+                block_index: list.blocks.len(),
+                block_count: BLOCKS_PER_UV_RECT,
+                address: deferred_resolve.address,
+            });
+            list.blocks.push(image.uv.into());
+            list.blocks.push([0f32; 4].into());
+        }
+
+        Some(list)
+    }
+
+    fn unlock_external_images(
+        &mut self,
+        deferred_resolves: &[DeferredResolve],
+    ) {
+        if !self.texture_resolver.external_images.is_empty() {
+            let handler = self.external_image_handler
+                .as_mut()
+                .expect("Found external image, but no handler set!");
+
+            for (index, _) in self.texture_resolver.external_images.drain() {
+                let props = &deferred_resolves[index.0 as usize].image_properties;
+                let ext_image = props
+                    .external_image
+                    .expect("BUG: Deferred resolves must be external images!");
+                handler.unlock(ext_image.id, ext_image.channel_index);
+            }
+        }
+    }
+
+    /// Update the dirty rects based on current compositing mode and config
+    // TODO(gw): This can be tidied up significantly once the Draw compositor
+    //           is implemented in terms of the compositor trait.
+    fn calculate_dirty_rects(
+        &mut self,
+        buffer_age: usize,
+        composite_state: &CompositeState,
+        draw_target_dimensions: DeviceIntSize,
+        results: &mut RenderResults,
+    ) -> Option<PartialPresentMode> {
+        let mut partial_present_mode = None;
+
+        let (max_partial_present_rects, draw_previous_partial_present_regions) = match self.current_compositor_kind {
+            CompositorKind::Native { .. } => {
+                // Assume that we can return a single dirty rect for native
+                // compositor for now, and that there is no buffer-age functionality.
+                // These params can be exposed by the compositor capabilities struct
+                // as the Draw compositor is ported to use it.
+                (1, false)
+            }
+            CompositorKind::Draw { draw_previous_partial_present_regions, max_partial_present_rects } => {
+                (max_partial_present_rects, draw_previous_partial_present_regions)
+            }
+        };
+
+        if max_partial_present_rects > 0 {
+            let prev_frames_damage_rect = if let Some(..) = self.compositor_config.partial_present() {
+                self.buffer_damage_tracker
+                    .get_damage_rect(buffer_age)
+                    .or_else(|| Some(DeviceRect::from_size(draw_target_dimensions.to_f32())))
+            } else {
+                None
+            };
+
+            let can_use_partial_present =
+                composite_state.dirty_rects_are_valid &&
+                !self.force_redraw &&
+                !(prev_frames_damage_rect.is_none() && draw_previous_partial_present_regions) &&
+                !self.debug_overlay_state.is_enabled;
+
+            if can_use_partial_present {
+                let mut combined_dirty_rect = DeviceRect::zero();
+                let fb_rect = DeviceRect::from_size(draw_target_dimensions.to_f32());
+
+                // Work out how many dirty rects WR produced, and if that's more than
+                // what the device supports.
+                for tile in &composite_state.tiles {
+                    if tile.kind == TileKind::Clear {
+                        continue;
+                    }
+                    let dirty_rect = composite_state.get_device_rect(
+                        &tile.local_dirty_rect,
+                        tile.transform_index,
+                    );
+
+                    // In pathological cases where a tile is extremely zoomed, it
+                    // may end up with device coords outside the range of an i32,
+                    // so clamp it to the frame buffer rect here, before it gets
+                    // casted to an i32 rect below.
+                    if let Some(dirty_rect) = dirty_rect.intersection(&fb_rect) {
+                        combined_dirty_rect = combined_dirty_rect.union(&dirty_rect);
+                    }
+                }
+
+                let combined_dirty_rect = combined_dirty_rect.round();
+                let combined_dirty_rect_i32 = combined_dirty_rect.to_i32();
+                // Return this frame's dirty region. If nothing has changed, don't return any dirty
+                // rects at all (the client can use this as a signal to skip present completely).
+                if !combined_dirty_rect.is_empty() {
+                    results.dirty_rects.push(combined_dirty_rect_i32);
+                }
+
+                // Track this frame's dirty region, for calculating subsequent frames' damage.
+                if draw_previous_partial_present_regions {
+                    self.buffer_damage_tracker.push_dirty_rect(&combined_dirty_rect);
+                }
+
+                // If the implementation requires manually keeping the buffer consistent,
+                // then we must combine this frame's dirty region with that of previous frames
+                // to determine the total_dirty_rect. The is used to determine what region we
+                // render to, and is what we send to the compositor as the buffer damage region
+                // (eg for KHR_partial_update).
+                let total_dirty_rect = if draw_previous_partial_present_regions {
+                    combined_dirty_rect.union(&prev_frames_damage_rect.unwrap())
+                } else {
+                    combined_dirty_rect
+                };
+
+                partial_present_mode = Some(PartialPresentMode::Single {
+                    dirty_rect: total_dirty_rect,
+                });
+            } else {
+                // If we don't have a valid partial present scenario, return a single
+                // dirty rect to the client that covers the entire framebuffer.
+                let fb_rect = DeviceIntRect::from_size(
+                    draw_target_dimensions,
+                );
+                results.dirty_rects.push(fb_rect);
+
+                if draw_previous_partial_present_regions {
+                    self.buffer_damage_tracker.push_dirty_rect(&fb_rect.to_f32());
+                }
+            }
+
+            self.force_redraw = false;
+        }
+
+        partial_present_mode
+    }
+
+    fn bind_frame_data(&mut self, frame: &mut Frame) {
+        profile_scope!("bind_frame_data");
+
+        let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_DATA);
+
+        self.vertex_data_textures[self.current_vertex_data_textures].update(
+            &mut self.device,
+            &mut self.texture_upload_pbo_pool,
+            frame,
+        );
+        self.current_vertex_data_textures =
+            (self.current_vertex_data_textures + 1) % VERTEX_DATA_TEXTURE_COUNT;
+    }
+
+    fn update_native_surfaces(&mut self) {
+        profile_scope!("update_native_surfaces");
+
+        match self.compositor_config {
+            CompositorConfig::Native { ref mut compositor, .. } => {
+                for op in self.pending_native_surface_updates.drain(..) {
+                    match op.details {
+                        NativeSurfaceOperationDetails::CreateSurface { id, virtual_offset, tile_size, is_opaque } => {
+                            let _inserted = self.allocated_native_surfaces.insert(id);
+                            debug_assert!(_inserted, "bug: creating existing surface");
+                            compositor.create_surface(
+                                    id,
+                                    virtual_offset,
+                                    tile_size,
+                                    is_opaque,
+                            );
+                        }
+                        NativeSurfaceOperationDetails::CreateExternalSurface { id, is_opaque } => {
+                            let _inserted = self.allocated_native_surfaces.insert(id);
+                            debug_assert!(_inserted, "bug: creating existing surface");
+                            compositor.create_external_surface(
+                                id,
+                                is_opaque,
+                            );
+                        }
+                        NativeSurfaceOperationDetails::CreateBackdropSurface { id, color } => {
+                            let _inserted = self.allocated_native_surfaces.insert(id);
+                            debug_assert!(_inserted, "bug: creating existing surface");
+                            compositor.create_backdrop_surface(
+                                id,
+                                color,
+                            );
+                        }
+                        NativeSurfaceOperationDetails::DestroySurface { id } => {
+                            let _existed = self.allocated_native_surfaces.remove(&id);
+                            debug_assert!(_existed, "bug: removing unknown surface");
+                            compositor.destroy_surface(id);
+                        }
+                        NativeSurfaceOperationDetails::CreateTile { id } => {
+                            compositor.create_tile(id);
+                        }
+                        NativeSurfaceOperationDetails::DestroyTile { id } => {
+                            compositor.destroy_tile(id);
+                        }
+                        NativeSurfaceOperationDetails::AttachExternalImage { id, external_image } => {
+                            compositor.attach_external_image(id, external_image);
+                        }
+                    }
+                }
+            }
+            CompositorConfig::Draw { .. } => {
+                // Ensure nothing is added in simple composite mode, since otherwise
+                // memory will leak as this doesn't get drained
+                debug_assert!(self.pending_native_surface_updates.is_empty());
+            }
+        }
+    }
+
+    fn draw_frame(
+        &mut self,
+        frame: &mut Frame,
+        device_size: Option<DeviceIntSize>,
+        buffer_age: usize,
+        results: &mut RenderResults,
+    ) {
+        profile_scope!("draw_frame");
+
+        // These markers seem to crash a lot on Android, see bug 1559834
+        #[cfg(not(target_os = "android"))]
+        let _gm = self.gpu_profiler.start_marker("draw frame");
+
+        if frame.passes.is_empty() {
+            frame.has_been_rendered = true;
+            return;
+        }
+
+        self.device.disable_depth_write();
+        self.set_blend(false, FramebufferKind::Other);
+        self.device.disable_stencil();
+
+        self.bind_frame_data(frame);
+
+        // Upload experimental GPU buffer texture if there is any data present
+        // TODO: Recycle these textures, upload via PBO or best approach for platform
+        let gpu_buffer_texture = if frame.gpu_buffer.is_empty() {
+            None
+        } else {
+            let gpu_buffer_texture = self.device.create_texture(
+                ImageBufferKind::Texture2D,
+                ImageFormat::RGBAF32,
+                frame.gpu_buffer.size.width,
+                frame.gpu_buffer.size.height,
+                TextureFilter::Nearest,
+                None,
+            );
+
+            self.device.bind_texture(
+                TextureSampler::GpuBuffer,
+                &gpu_buffer_texture,
+                Swizzle::default(),
+            );
+
+            self.device.upload_texture_immediate(
+                &gpu_buffer_texture,
+                &frame.gpu_buffer.data,
+            );
+
+            Some(gpu_buffer_texture)
+        };
+
+        // Determine the present mode and dirty rects, if device_size
+        // is Some(..). If it's None, no composite will occur and only
+        // picture cache and texture cache targets will be updated.
+        // TODO(gw): Split Frame so that it's clearer when a composite
+        //           is occurring.
+        let present_mode = device_size.and_then(|device_size| {
+            self.calculate_dirty_rects(
+                buffer_age,
+                &frame.composite_state,
+                device_size,
+                results,
+            )
+        });
+
+        // If we have a native OS compositor, then make use of that interface to
+        // specify how to composite each of the picture cache surfaces. First, we
+        // need to find each tile that may be bound and updated later in the frame
+        // and invalidate it so that the native render compositor knows that these
+        // tiles can't be composited early. Next, after all such tiles have been
+        // invalidated, then we queue surfaces for native composition by the render
+        // compositor before we actually update the tiles. This allows the render
+        // compositor to start early composition while the tiles are updating.
+        if let CompositorKind::Native { .. } = self.current_compositor_kind {
+            let compositor = self.compositor_config.compositor().unwrap();
+            // Invalidate any native surface tiles that might be updated by passes.
+            if !frame.has_been_rendered {
+                for tile in &frame.composite_state.tiles {
+                    if tile.kind == TileKind::Clear {
+                        continue;
+                    }
+                    if !tile.local_dirty_rect.is_empty() {
+                        if let CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::Native { id, .. } } = tile.surface {
+                            let valid_rect = frame.composite_state.get_surface_rect(
+                                &tile.local_valid_rect,
+                                &tile.local_rect,
+                                tile.transform_index,
+                            ).to_i32();
+
+                            compositor.invalidate_tile(id, valid_rect);
+                        }
+                    }
+                }
+            }
+            // Ensure any external surfaces that might be used during early composition
+            // are invalidated first so that the native compositor can properly schedule
+            // composition to happen only when the external surface is updated.
+            // See update_external_native_surfaces for more details.
+            for surface in &frame.composite_state.external_surfaces {
+                if let Some((native_surface_id, size)) = surface.update_params {
+                    let surface_rect = size.into();
+                    compositor.invalidate_tile(NativeTileId { surface_id: native_surface_id, x: 0, y: 0 }, surface_rect);
+                }
+            }
+            // Finally queue native surfaces for early composition, if applicable. By now,
+            // we have already invalidated any tiles that such surfaces may depend upon, so
+            // the native render compositor can keep track of when to actually schedule
+            // composition as surfaces are updated.
+            if device_size.is_some() {
+                frame.composite_state.composite_native(
+                    self.clear_color,
+                    &results.dirty_rects,
+                    &mut **compositor,
+                );
+            }
+        }
+
+        for (_pass_index, pass) in frame.passes.iter_mut().enumerate() {
+            #[cfg(not(target_os = "android"))]
+            let _gm = self.gpu_profiler.start_marker(&format!("pass {}", _pass_index));
+
+            profile_scope!("offscreen target");
+
+            // If this frame has already been drawn, then any texture
+            // cache targets have already been updated and can be
+            // skipped this time.
+            if !frame.has_been_rendered {
+                for (&texture_id, target) in &pass.texture_cache {
+                    self.draw_texture_cache_target(
+                        &texture_id,
+                        target,
+                        &frame.render_tasks,
+                        &mut results.stats,
+                    );
+                }
+
+                if !pass.picture_cache.is_empty() {
+                    self.profile.inc(profiler::COLOR_PASSES);
+                }
+
+                // Draw picture caching tiles for this pass.
+                for picture_target in &pass.picture_cache {
+                    results.stats.color_target_count += 1;
+
+                    let draw_target = match picture_target.surface {
+                        ResolvedSurfaceTexture::TextureCache { ref texture } => {
+                            let (texture, _) = self.texture_resolver
+                                .resolve(texture)
+                                .expect("bug");
+
+                            DrawTarget::from_texture(
+                                texture,
+                                true,
+                            )
+                        }
+                        ResolvedSurfaceTexture::Native { id, size } => {
+                            let surface_info = match self.current_compositor_kind {
+                                CompositorKind::Native { .. } => {
+                                    let compositor = self.compositor_config.compositor().unwrap();
+                                    compositor.bind(
+                                        id,
+                                        picture_target.dirty_rect,
+                                        picture_target.valid_rect,
+                                    )
+                                }
+                                CompositorKind::Draw { .. } => {
+                                    unreachable!();
+                                }
+                            };
+
+                            DrawTarget::NativeSurface {
+                                offset: surface_info.origin,
+                                external_fbo_id: surface_info.fbo_id,
+                                dimensions: size,
+                            }
+                        }
+                    };
+
+                    let projection = Transform3D::ortho(
+                        0.0,
+                        draw_target.dimensions().width as f32,
+                        0.0,
+                        draw_target.dimensions().height as f32,
+                        self.device.ortho_near_plane(),
+                        self.device.ortho_far_plane(),
+                    );
+
+                    self.draw_picture_cache_target(
+                        picture_target,
+                        draw_target,
+                        &projection,
+                        &frame.render_tasks,
+                        &mut results.stats,
+                    );
+
+                    // Native OS surfaces must be unbound at the end of drawing to them
+                    if let ResolvedSurfaceTexture::Native { .. } = picture_target.surface {
+                        match self.current_compositor_kind {
+                            CompositorKind::Native { .. } => {
+                                let compositor = self.compositor_config.compositor().unwrap();
+                                compositor.unbind();
+                            }
+                            CompositorKind::Draw { .. } => {
+                                unreachable!();
+                            }
+                        }
+                    }
+                }
+            }
+
+            for target in &pass.alpha.targets {
+                results.stats.alpha_target_count += 1;
+
+                let texture_id = target.texture_id();
+
+                let alpha_tex = self.texture_resolver.get_cache_texture_mut(&texture_id);
+
+                let draw_target = DrawTarget::from_texture(
+                    alpha_tex,
+                    false,
+                );
+
+                let projection = Transform3D::ortho(
+                    0.0,
+                    draw_target.dimensions().width as f32,
+                    0.0,
+                    draw_target.dimensions().height as f32,
+                    self.device.ortho_near_plane(),
+                    self.device.ortho_far_plane(),
+                );
+
+                self.draw_alpha_target(
+                    draw_target,
+                    target,
+                    &projection,
+                    &frame.render_tasks,
+                    &mut results.stats,
+                );
+            }
+
+            let color_rt_info = RenderTargetInfo { has_depth: pass.color.needs_depth() };
+
+            for target in &pass.color.targets {
+                results.stats.color_target_count += 1;
+
+                let texture_id = target.texture_id();
+
+                let color_tex = self.texture_resolver.get_cache_texture_mut(&texture_id);
+
+                self.device.reuse_render_target::<u8>(
+                    color_tex,
+                    color_rt_info,
+                );
+
+                let draw_target = DrawTarget::from_texture(
+                    color_tex,
+                    target.needs_depth(),
+                );
+
+                let projection = Transform3D::ortho(
+                    0.0,
+                    draw_target.dimensions().width as f32,
+                    0.0,
+                    draw_target.dimensions().height as f32,
+                    self.device.ortho_near_plane(),
+                    self.device.ortho_far_plane(),
+                );
+
+                let clear_depth = if target.needs_depth() {
+                    Some(1.0)
+                } else {
+                    None
+                };
+
+                self.draw_color_target(
+                    draw_target,
+                    target,
+                    clear_depth,
+                    &frame.render_tasks,
+                    &projection,
+                    &mut results.stats,
+                );
+            }
+
+            // Only end the pass here and invalidate previous textures for
+            // off-screen targets. Deferring return of the inputs to the
+            // frame buffer until the implicit end_pass in end_frame allows
+            // debug draw overlays to be added without triggering a copy
+            // resolve stage in mobile / tiled GPUs.
+            self.texture_resolver.end_pass(
+                &mut self.device,
+                &pass.textures_to_invalidate,
+            );
+            {
+                profile_scope!("gl.flush");
+                self.device.gl().flush();
+            }
+        }
+
+        self.composite_frame(
+            frame,
+            device_size,
+            results,
+            present_mode,
+        );
+
+        if let Some(gpu_buffer_texture) = gpu_buffer_texture {
+            self.device.delete_texture(gpu_buffer_texture);
+        }
+
+        frame.has_been_rendered = true;
+    }
+
+    fn composite_frame(
+        &mut self,
+        frame: &mut Frame,
+        device_size: Option<DeviceIntSize>,
+        results: &mut RenderResults,
+        present_mode: Option<PartialPresentMode>,
+    ) {
+        profile_scope!("main target");
+
+        if let Some(device_size) = device_size {
+            results.stats.color_target_count += 1;
+            results.picture_cache_debug = mem::replace(
+                &mut frame.composite_state.picture_cache_debug,
+                PictureCacheDebugInfo::new(),
+            );
+
+            let size = frame.device_rect.size().to_f32();
+            let surface_origin_is_top_left = self.device.surface_origin_is_top_left();
+            let (bottom, top) = if surface_origin_is_top_left {
+              (0.0, size.height)
+            } else {
+              (size.height, 0.0)
+            };
+
+            let projection = Transform3D::ortho(
+                0.0,
+                size.width,
+                bottom,
+                top,
+                self.device.ortho_near_plane(),
+                self.device.ortho_far_plane(),
+            );
+
+            let fb_scale = Scale::<_, _, FramebufferPixel>::new(1i32);
+            let mut fb_rect = frame.device_rect * fb_scale;
+
+            if !surface_origin_is_top_left {
+                let h = fb_rect.height();
+                fb_rect.min.y = device_size.height - fb_rect.max.y;
+                fb_rect.max.y = fb_rect.min.y + h;
+            }
+
+            let draw_target = DrawTarget::Default {
+                rect: fb_rect,
+                total_size: device_size * fb_scale,
+                surface_origin_is_top_left,
+            };
+
+            // If we have a native OS compositor, then make use of that interface
+            // to specify how to composite each of the picture cache surfaces.
+            match self.current_compositor_kind {
+                CompositorKind::Native { .. } => {
+                    // We have already queued surfaces for early native composition by this point.
+                    // All that is left is to finally update any external native surfaces that were
+                    // invalidated so that composition can complete.
+                    self.update_external_native_surfaces(
+                        &frame.composite_state.external_surfaces,
+                        results,
+                    );
+                }
+                CompositorKind::Draw { .. } => {
+                    self.composite_simple(
+                        &frame.composite_state,
+                        draw_target,
+                        &projection,
+                        results,
+                        present_mode,
+                    );
+                }
+            }
+        } else {
+            // Rendering a frame without presenting it will confuse the partial
+            // present logic, so force a full present for the next frame.
+            self.force_redraw();
+        }
+    }
+
+    pub fn debug_renderer(&mut self) -> Option<&mut DebugRenderer> {
+        self.debug.get_mut(&mut self.device)
+    }
+
+    pub fn get_debug_flags(&self) -> DebugFlags {
+        self.debug_flags
+    }
+
+    pub fn set_debug_flags(&mut self, flags: DebugFlags) {
+        if let Some(enabled) = flag_changed(self.debug_flags, flags, DebugFlags::GPU_TIME_QUERIES) {
+            if enabled {
+                self.gpu_profiler.enable_timers();
+            } else {
+                self.gpu_profiler.disable_timers();
+            }
+        }
+        if let Some(enabled) = flag_changed(self.debug_flags, flags, DebugFlags::GPU_SAMPLE_QUERIES) {
+            if enabled {
+                self.gpu_profiler.enable_samplers();
+            } else {
+                self.gpu_profiler.disable_samplers();
+            }
+        }
+
+        self.debug_flags = flags;
+    }
+
+    pub fn set_profiler_ui(&mut self, ui_str: &str) {
+        self.profiler.set_ui(ui_str);
+    }
+
+    fn draw_frame_debug_items(&mut self, items: &[DebugItem]) {
+        if items.is_empty() {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        for item in items {
+            match item {
+                DebugItem::Rect { rect, outer_color, inner_color } => {
+                    debug_renderer.add_quad(
+                        rect.min.x,
+                        rect.min.y,
+                        rect.max.x,
+                        rect.max.y,
+                        (*inner_color).into(),
+                        (*inner_color).into(),
+                    );
+
+                    debug_renderer.add_rect(
+                        &rect.to_i32(),
+                        (*outer_color).into(),
+                    );
+                }
+                DebugItem::Text { ref msg, position, color } => {
+                    debug_renderer.add_text(
+                        position.x,
+                        position.y,
+                        msg,
+                        (*color).into(),
+                        None,
+                    );
+                }
+            }
+        }
+    }
+
+    fn draw_render_target_debug(&mut self, draw_target: &DrawTarget) {
+        if !self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        let textures = self.texture_resolver
+            .texture_cache_map
+            .values()
+            .filter(|item| item.category == TextureCacheCategory::RenderTarget)
+            .map(|item| &item.texture)
+            .collect::<Vec<&Texture>>();
+
+        Self::do_debug_blit(
+            &mut self.device,
+            debug_renderer,
+            textures,
+            draw_target,
+            0,
+            &|_| [0.0, 1.0, 0.0, 1.0], // Use green for all RTs.
+        );
+    }
+
+    fn draw_zoom_debug(
+        &mut self,
+        device_size: DeviceIntSize,
+    ) {
+        if !self.debug_flags.contains(DebugFlags::ZOOM_DBG) {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        let source_size = DeviceIntSize::new(64, 64);
+        let target_size = DeviceIntSize::new(1024, 1024);
+
+        let source_origin = DeviceIntPoint::new(
+            (self.cursor_position.x - source_size.width / 2)
+                .min(device_size.width - source_size.width)
+                .max(0),
+            (self.cursor_position.y - source_size.height / 2)
+                .min(device_size.height - source_size.height)
+                .max(0),
+        );
+
+        let source_rect = DeviceIntRect::from_origin_and_size(
+            source_origin,
+            source_size,
+        );
+
+        let target_rect = DeviceIntRect::from_origin_and_size(
+            DeviceIntPoint::new(
+                device_size.width - target_size.width - 64,
+                device_size.height - target_size.height - 64,
+            ),
+            target_size,
+        );
+
+        let texture_rect = FramebufferIntRect::from_size(
+            source_rect.size().cast_unit(),
+        );
+
+        debug_renderer.add_rect(
+            &target_rect.inflate(1, 1),
+            debug_colors::RED.into(),
+        );
+
+        if self.zoom_debug_texture.is_none() {
+            let texture = self.device.create_texture(
+                ImageBufferKind::Texture2D,
+                ImageFormat::BGRA8,
+                source_rect.width(),
+                source_rect.height(),
+                TextureFilter::Nearest,
+                Some(RenderTargetInfo { has_depth: false }),
+            );
+
+            self.zoom_debug_texture = Some(texture);
+        }
+
+        // Copy frame buffer into the zoom texture
+        let read_target = DrawTarget::new_default(device_size, self.device.surface_origin_is_top_left());
+        self.device.blit_render_target(
+            read_target.into(),
+            read_target.to_framebuffer_rect(source_rect),
+            DrawTarget::from_texture(
+                self.zoom_debug_texture.as_ref().unwrap(),
+                false,
+            ),
+            texture_rect,
+            TextureFilter::Nearest,
+        );
+
+        // Draw the zoom texture back to the framebuffer
+        self.device.blit_render_target(
+            ReadTarget::from_texture(
+                self.zoom_debug_texture.as_ref().unwrap(),
+            ),
+            texture_rect,
+            read_target,
+            read_target.to_framebuffer_rect(target_rect),
+            TextureFilter::Nearest,
+        );
+    }
+
+    fn draw_texture_cache_debug(&mut self, draw_target: &DrawTarget) {
+        if !self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        let textures = self.texture_resolver
+            .texture_cache_map
+            .values()
+            .filter(|item| item.category == TextureCacheCategory::Atlas)
+            .map(|item| &item.texture)
+            .collect::<Vec<&Texture>>();
+
+        fn select_color(texture: &Texture) -> [f32; 4] {
+            if texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) {
+                [1.0, 0.5, 0.0, 1.0] // Orange for shared.
+            } else {
+                [1.0, 0.0, 1.0, 1.0] // Fuchsia for standalone.
+            }
+        }
+
+        Self::do_debug_blit(
+            &mut self.device,
+            debug_renderer,
+            textures,
+            draw_target,
+            if self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) { 544 } else { 0 },
+            &select_color,
+        );
+    }
+
+    fn do_debug_blit(
+        device: &mut Device,
+        debug_renderer: &mut DebugRenderer,
+        mut textures: Vec<&Texture>,
+        draw_target: &DrawTarget,
+        bottom: i32,
+        select_color: &dyn Fn(&Texture) -> [f32; 4],
+    ) {
+        let mut spacing = 16;
+        let mut size = 512;
+
+        let device_size = draw_target.dimensions();
+        let fb_width = device_size.width;
+        let fb_height = device_size.height;
+        let surface_origin_is_top_left = draw_target.surface_origin_is_top_left();
+
+        let num_textures = textures.len() as i32;
+
+        if num_textures * (size + spacing) > fb_width {
+            let factor = fb_width as f32 / (num_textures * (size + spacing)) as f32;
+            size = (size as f32 * factor) as i32;
+            spacing = (spacing as f32 * factor) as i32;
+        }
+
+        let text_height = 14; // Visually approximated.
+        let text_margin = 1;
+        let tag_height = text_height + text_margin * 2;
+        let tag_y = fb_height - (bottom + spacing + tag_height);
+        let image_y = tag_y - size;
+
+        // Sort the display by size (in bytes), so that left-to-right is
+        // largest-to-smallest.
+        //
+        // Note that the vec here is in increasing order, because the elements
+        // get drawn right-to-left.
+        textures.sort_by_key(|t| t.size_in_bytes());
+
+        let mut i = 0;
+        for texture in textures.iter() {
+            let dimensions = texture.get_dimensions();
+            let src_rect = FramebufferIntRect::from_size(
+                FramebufferIntSize::new(dimensions.width as i32, dimensions.height as i32),
+            );
+
+            let x = fb_width - (spacing + size) * (i as i32 + 1);
+
+            // If we have more targets than fit on one row in screen, just early exit.
+            if x > fb_width {
+                return;
+            }
+
+            // Draw the info tag.
+            let tag_rect = rect(x, tag_y, size, tag_height).to_box2d();
+            let tag_color = select_color(texture);
+            device.clear_target(
+                Some(tag_color),
+                None,
+                Some(draw_target.to_framebuffer_rect(tag_rect)),
+            );
+
+            // Draw the dimensions onto the tag.
+            let dim = texture.get_dimensions();
+            let text_rect = tag_rect.inflate(-text_margin, -text_margin);
+            debug_renderer.add_text(
+                text_rect.min.x as f32,
+                text_rect.max.y as f32, // Top-relative.
+                &format!("{}x{}", dim.width, dim.height),
+                ColorU::new(0, 0, 0, 255),
+                Some(tag_rect.to_f32())
+            );
+
+            // Blit the contents of the texture.
+            let dest_rect = draw_target.to_framebuffer_rect(rect(x, image_y, size, size).to_box2d());
+            let read_target = ReadTarget::from_texture(texture);
+
+            if surface_origin_is_top_left {
+                device.blit_render_target(
+                    read_target,
+                    src_rect,
+                    *draw_target,
+                    dest_rect,
+                    TextureFilter::Linear,
+                );
+            } else {
+                 // Invert y.
+                 device.blit_render_target_invert_y(
+                    read_target,
+                    src_rect,
+                    *draw_target,
+                    dest_rect,
+                );
+            }
+            i += 1;
+        }
+    }
+
+    fn draw_epoch_debug(&mut self) {
+        if !self.debug_flags.contains(DebugFlags::EPOCHS) {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        let dy = debug_renderer.line_height();
+        let x0: f32 = 30.0;
+        let y0: f32 = 30.0;
+        let mut y = y0;
+        let mut text_width = 0.0;
+        for ((pipeline, document_id), epoch) in  &self.pipeline_info.epochs {
+            y += dy;
+            let w = debug_renderer.add_text(
+                x0, y,
+                &format!("({:?}, {:?}): {:?}", pipeline, document_id, epoch),
+                ColorU::new(255, 255, 0, 255),
+                None,
+            ).size.width;
+            text_width = f32::max(text_width, w);
+        }
+
+        let margin = 10.0;
+        debug_renderer.add_quad(
+            x0 - margin,
+            y0 - margin,
+            x0 + text_width + margin,
+            y + margin,
+            ColorU::new(25, 25, 25, 200),
+            ColorU::new(51, 51, 51, 200),
+        );
+    }
+
+    fn draw_window_visibility_debug(&mut self) {
+        if !self.debug_flags.contains(DebugFlags::WINDOW_VISIBILITY_DBG) {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        let x: f32 = 30.0;
+        let y: f32 = 40.0;
+
+        if let CompositorConfig::Native { ref mut compositor, .. } = self.compositor_config {
+            let visibility = compositor.get_window_visibility();
+            let color = if visibility.is_fully_occluded {
+                ColorU::new(255, 0, 0, 255)
+
+            } else {
+                ColorU::new(0, 0, 255, 255)
+            };
+
+            debug_renderer.add_text(
+                x, y,
+                &format!("{:?}", visibility),
+                color,
+                None,
+            );
+        }
+
+
+    }
+
+    fn draw_gpu_cache_debug(&mut self, device_size: DeviceIntSize) {
+        if !self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        let (x_off, y_off) = (30f32, 30f32);
+        let height = self.gpu_cache_texture.get_height()
+            .min(device_size.height - (y_off as i32) * 2) as usize;
+        debug_renderer.add_quad(
+            x_off,
+            y_off,
+            x_off + MAX_VERTEX_TEXTURE_WIDTH as f32,
+            y_off + height as f32,
+            ColorU::new(80, 80, 80, 80),
+            ColorU::new(80, 80, 80, 80),
+        );
+
+        let upper = self.gpu_cache_debug_chunks.len().min(height);
+        for chunk in self.gpu_cache_debug_chunks[0..upper].iter().flatten() {
+            let color = ColorU::new(250, 0, 0, 200);
+            debug_renderer.add_quad(
+                x_off + chunk.address.u as f32,
+                y_off + chunk.address.v as f32,
+                x_off + chunk.address.u as f32 + chunk.size as f32,
+                y_off + chunk.address.v as f32 + 1.0,
+                color,
+                color,
+            );
+        }
+    }
+
+    /// Pass-through to `Device::read_pixels_into`, used by Gecko's WR bindings.
+    pub fn read_pixels_into(&mut self, rect: FramebufferIntRect, format: ImageFormat, output: &mut [u8]) {
+        self.device.read_pixels_into(rect, format, output);
+    }
+
+    pub fn read_pixels_rgba8(&mut self, rect: FramebufferIntRect) -> Vec<u8> {
+        let mut pixels = vec![0; (rect.area() * 4) as usize];
+        self.device.read_pixels_into(rect, ImageFormat::RGBA8, &mut pixels);
+        pixels
+    }
+
+    // De-initialize the Renderer safely, assuming the GL is still alive and active.
+    pub fn deinit(mut self) {
+        //Note: this is a fake frame, only needed because texture deletion is require to happen inside a frame
+        self.device.begin_frame();
+        // If we are using a native compositor, ensure that any remaining native
+        // surfaces are freed.
+        if let CompositorConfig::Native { mut compositor, .. } = self.compositor_config {
+            for id in self.allocated_native_surfaces.drain() {
+                compositor.destroy_surface(id);
+            }
+            // Destroy the debug overlay surface, if currently allocated.
+            if self.debug_overlay_state.current_size.is_some() {
+                compositor.destroy_surface(NativeSurfaceId::DEBUG_OVERLAY);
+            }
+            compositor.deinit();
+        }
+        self.gpu_cache_texture.deinit(&mut self.device);
+        if let Some(dither_matrix_texture) = self.dither_matrix_texture {
+            self.device.delete_texture(dither_matrix_texture);
+        }
+        if let Some(zoom_debug_texture) = self.zoom_debug_texture {
+            self.device.delete_texture(zoom_debug_texture);
+        }
+        for textures in self.vertex_data_textures.drain(..) {
+            textures.deinit(&mut self.device);
+        }
+        self.texture_upload_pbo_pool.deinit(&mut self.device);
+        self.staging_texture_pool.delete_textures(&mut self.device);
+        self.texture_resolver.deinit(&mut self.device);
+        self.vaos.deinit(&mut self.device);
+        self.debug.deinit(&mut self.device);
+
+        if let Ok(shaders) = Rc::try_unwrap(self.shaders) {
+            shaders.into_inner().deinit(&mut self.device);
+        }
+
+        if let Some(async_screenshots) = self.async_screenshots.take() {
+            async_screenshots.deinit(&mut self.device);
+        }
+
+        if let Some(async_frame_recorder) = self.async_frame_recorder.take() {
+            async_frame_recorder.deinit(&mut self.device);
+        }
+
+        #[cfg(feature = "capture")]
+        self.device.delete_fbo(self.read_fbo);
+        #[cfg(feature = "replay")]
+        for (_, ext) in self.owned_external_images {
+            self.device.delete_external_texture(ext);
+        }
+        self.device.end_frame();
+    }
+
+    fn size_of<T>(&self, ptr: *const T) -> usize {
+        let ops = self.size_of_ops.as_ref().unwrap();
+        unsafe { ops.malloc_size_of(ptr) }
+    }
+
+    /// Collects a memory report.
+    pub fn report_memory(&self, swgl: *mut c_void) -> MemoryReport {
+        let mut report = MemoryReport::default();
+
+        // GPU cache CPU memory.
+        self.gpu_cache_texture.report_memory_to(&mut report, self.size_of_ops.as_ref().unwrap());
+
+        self.staging_texture_pool.report_memory_to(&mut report, self.size_of_ops.as_ref().unwrap());
+
+        // Render task CPU memory.
+        for (_id, doc) in &self.active_documents {
+            report.render_tasks += self.size_of(doc.frame.render_tasks.tasks.as_ptr());
+            report.render_tasks += self.size_of(doc.frame.render_tasks.task_data.as_ptr());
+        }
+
+        // Vertex data GPU memory.
+        for textures in &self.vertex_data_textures {
+            report.vertex_data_textures += textures.size_in_bytes();
+        }
+
+        // Texture cache and render target GPU memory.
+        report += self.texture_resolver.report_memory();
+
+        // Texture upload PBO memory.
+        report += self.texture_upload_pbo_pool.report_memory();
+
+        // Textures held internally within the device layer.
+        report += self.device.report_memory(self.size_of_ops.as_ref().unwrap(), swgl);
+
+        report
+    }
+
+    // Sets the blend mode. Blend is unconditionally set if the "show overdraw" debugging mode is
+    // enabled.
+    fn set_blend(&mut self, mut blend: bool, framebuffer_kind: FramebufferKind) {
+        if framebuffer_kind == FramebufferKind::Main &&
+                self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) {
+            blend = true
+        }
+        self.device.set_blend(blend)
+    }
+
+    fn set_blend_mode_multiply(&mut self, framebuffer_kind: FramebufferKind) {
+        if framebuffer_kind == FramebufferKind::Main &&
+                self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) {
+            self.device.set_blend_mode_show_overdraw();
+        } else {
+            self.device.set_blend_mode_multiply();
+        }
+    }
+
+    fn set_blend_mode_premultiplied_alpha(&mut self, framebuffer_kind: FramebufferKind) {
+        if framebuffer_kind == FramebufferKind::Main &&
+                self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) {
+            self.device.set_blend_mode_show_overdraw();
+        } else {
+            self.device.set_blend_mode_premultiplied_alpha();
+        }
+    }
+
+    fn set_blend_mode_subpixel_with_bg_color_pass1(&mut self, framebuffer_kind: FramebufferKind) {
+        if framebuffer_kind == FramebufferKind::Main &&
+                self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) {
+            self.device.set_blend_mode_show_overdraw();
+        } else {
+            self.device.set_blend_mode_subpixel_with_bg_color_pass1();
+        }
+    }
+
+    fn set_blend_mode_subpixel_with_bg_color_pass2(&mut self, framebuffer_kind: FramebufferKind) {
+        if framebuffer_kind == FramebufferKind::Main &&
+                self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) {
+            self.device.set_blend_mode_show_overdraw();
+        } else {
+            self.device.set_blend_mode_subpixel_with_bg_color_pass2();
+        }
+    }
+
+    /// Clears the texture with a given color.
+    fn clear_texture(&mut self, texture: &Texture, color: [f32; 4]) {
+        self.device.bind_draw_target(DrawTarget::from_texture(
+            &texture,
+            false,
+        ));
+        self.device.clear_target(Some(color), None, None);
+    }
+}
+
+bitflags! {
+    /// Flags that control how shaders are pre-cached, if at all.
+    #[derive(Default)]
+    pub struct ShaderPrecacheFlags: u32 {
+        /// Needed for const initialization
+        const EMPTY                 = 0;
+
+        /// Only start async compile
+        const ASYNC_COMPILE         = 1 << 2;
+
+        /// Do a full compile/link during startup
+        const FULL_COMPILE          = 1 << 3;
+    }
+}
+
+/// The cumulative times spent in each painting phase to generate this frame.
+#[derive(Debug, Default)]
+pub struct FullFrameStats {
+    pub full_display_list: bool,
+    pub gecko_display_list_time: f64,
+    pub wr_display_list_time: f64,
+    pub scene_build_time: f64,
+    pub frame_build_time: f64,
+}
+
+impl FullFrameStats {
+    pub fn merge(&self, other: &FullFrameStats) -> Self {
+        Self {
+            full_display_list: self.full_display_list || other.full_display_list,
+            gecko_display_list_time: self.gecko_display_list_time + other.gecko_display_list_time,
+            wr_display_list_time: self.wr_display_list_time + other.wr_display_list_time,
+            scene_build_time: self.scene_build_time + other.scene_build_time,
+            frame_build_time: self.frame_build_time + other.frame_build_time
+        }
+    }
+
+    pub fn total(&self) -> f64 {
+      self.gecko_display_list_time + self.wr_display_list_time + self.scene_build_time + self.frame_build_time
+    }
+}
+
+/// Some basic statistics about the rendered scene, used in Gecko, as
+/// well as in wrench reftests to ensure that tests are batching and/or
+/// allocating on render targets as we expect them to.
+#[repr(C)]
+#[derive(Debug, Default)]
+pub struct RendererStats {
+    pub total_draw_calls: usize,
+    pub alpha_target_count: usize,
+    pub color_target_count: usize,
+    pub texture_upload_mb: f64,
+    pub resource_upload_time: f64,
+    pub gpu_cache_upload_time: f64,
+    pub gecko_display_list_time: f64,
+    pub wr_display_list_time: f64,
+    pub scene_build_time: f64,
+    pub frame_build_time: f64,
+    pub full_display_list: bool,
+    pub full_paint: bool,
+}
+
+impl RendererStats {
+    pub fn merge(&mut self, stats: &FullFrameStats) {
+        self.gecko_display_list_time = stats.gecko_display_list_time;
+        self.wr_display_list_time = stats.wr_display_list_time;
+        self.scene_build_time = stats.scene_build_time;
+        self.frame_build_time = stats.frame_build_time;
+        self.full_display_list = stats.full_display_list;
+        self.full_paint = true;
+    }
+}
+
+/// Return type from render(), which contains some repr(C) statistics as well as
+/// some non-repr(C) data.
+#[derive(Debug, Default)]
+pub struct RenderResults {
+    /// Statistics about the frame that was rendered.
+    pub stats: RendererStats,
+
+    /// A list of the device dirty rects that were updated
+    /// this frame.
+    /// TODO(gw): This is an initial interface, likely to change in future.
+    /// TODO(gw): The dirty rects here are currently only useful when scrolling
+    ///           is not occurring. They are still correct in the case of
+    ///           scrolling, but will be very large (until we expose proper
+    ///           OS compositor support where the dirty rects apply to a
+    ///           specific picture cache slice / OS compositor surface).
+    pub dirty_rects: Vec<DeviceIntRect>,
+
+    /// Information about the state of picture cache tiles. This is only
+    /// allocated and stored if config.testing is true (such as wrench)
+    pub picture_cache_debug: PictureCacheDebugInfo,
+}
+
+#[cfg(any(feature = "capture", feature = "replay"))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct PlainTexture {
+    data: String,
+    size: DeviceIntSize,
+    format: ImageFormat,
+    filter: TextureFilter,
+    has_depth: bool,
+    category: Option<TextureCacheCategory>,
+}
+
+
+#[cfg(any(feature = "capture", feature = "replay"))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct PlainRenderer {
+    device_size: Option<DeviceIntSize>,
+    gpu_cache: PlainTexture,
+    gpu_cache_frame_id: FrameId,
+    textures: FastHashMap<CacheTextureId, PlainTexture>,
+}
+
+#[cfg(any(feature = "capture", feature = "replay"))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct PlainExternalResources {
+    images: Vec<ExternalCaptureImage>
+}
+
+#[cfg(feature = "replay")]
+enum CapturedExternalImageData {
+    NativeTexture(gl::GLuint),
+    Buffer(Arc<Vec<u8>>),
+}
+
+#[cfg(feature = "replay")]
+struct DummyExternalImageHandler {
+    data: FastHashMap<(ExternalImageId, u8), (CapturedExternalImageData, TexelRect)>,
+}
+
+#[cfg(feature = "replay")]
+impl ExternalImageHandler for DummyExternalImageHandler {
+    fn lock(&mut self, key: ExternalImageId, channel_index: u8) -> ExternalImage {
+        let (ref captured_data, ref uv) = self.data[&(key, channel_index)];
+        ExternalImage {
+            uv: *uv,
+            source: match *captured_data {
+                CapturedExternalImageData::NativeTexture(tid) => ExternalImageSource::NativeTexture(tid),
+                CapturedExternalImageData::Buffer(ref arc) => ExternalImageSource::RawData(&*arc),
+            }
+        }
+    }
+    fn unlock(&mut self, _key: ExternalImageId, _channel_index: u8) {}
+}
+
+#[derive(Default)]
+pub struct PipelineInfo {
+    pub epochs: FastHashMap<(PipelineId, DocumentId), Epoch>,
+    pub removed_pipelines: Vec<(PipelineId, DocumentId)>,
+}
+
+impl Renderer {
+    #[cfg(feature = "capture")]
+    fn save_texture(
+        texture: &Texture, category: Option<TextureCacheCategory>, name: &str, root: &PathBuf, device: &mut Device
+    ) -> PlainTexture {
+        use std::fs;
+        use std::io::Write;
+
+        let short_path = format!("textures/{}.raw", name);
+
+        let bytes_per_pixel = texture.get_format().bytes_per_pixel();
+        let read_format = texture.get_format();
+        let rect_size = texture.get_dimensions();
+
+        let mut file = fs::File::create(root.join(&short_path))
+            .expect(&format!("Unable to create {}", short_path));
+        let bytes_per_texture = (rect_size.width * rect_size.height * bytes_per_pixel) as usize;
+        let mut data = vec![0; bytes_per_texture];
+
+        //TODO: instead of reading from an FBO with `read_pixels*`, we could
+        // read from textures directly with `get_tex_image*`.
+
+        let rect = device_size_as_framebuffer_size(rect_size).into();
+
+        device.attach_read_texture(texture);
+        #[cfg(feature = "png")]
+        {
+            let mut png_data;
+            let (data_ref, format) = match texture.get_format() {
+                ImageFormat::RGBAF32 => {
+                    png_data = vec![0; (rect_size.width * rect_size.height * 4) as usize];
+                    device.read_pixels_into(rect, ImageFormat::RGBA8, &mut png_data);
+                    (&png_data, ImageFormat::RGBA8)
+                }
+                fm => (&data, fm),
+            };
+            CaptureConfig::save_png(
+                root.join(format!("textures/{}-{}.png", name, 0)),
+                rect_size, format,
+                None,
+                data_ref,
+            );
+        }
+        device.read_pixels_into(rect, read_format, &mut data);
+        file.write_all(&data)
+            .unwrap();
+
+        PlainTexture {
+            data: short_path,
+            size: rect_size,
+            format: texture.get_format(),
+            filter: texture.get_filter(),
+            has_depth: texture.supports_depth(),
+            category,
+        }
+    }
+
+    #[cfg(feature = "replay")]
+    fn load_texture(
+        target: ImageBufferKind,
+        plain: &PlainTexture,
+        rt_info: Option<RenderTargetInfo>,
+        root: &PathBuf,
+        device: &mut Device
+    ) -> (Texture, Vec<u8>)
+    {
+        use std::fs::File;
+        use std::io::Read;
+
+        let mut texels = Vec::new();
+        File::open(root.join(&plain.data))
+            .expect(&format!("Unable to open texture at {}", plain.data))
+            .read_to_end(&mut texels)
+            .unwrap();
+
+        let texture = device.create_texture(
+            target,
+            plain.format,
+            plain.size.width,
+            plain.size.height,
+            plain.filter,
+            rt_info,
+        );
+        device.upload_texture_immediate(&texture, &texels);
+
+        (texture, texels)
+    }
+
+    #[cfg(feature = "capture")]
+    fn save_capture(
+        &mut self,
+        config: CaptureConfig,
+        deferred_images: Vec<ExternalCaptureImage>,
+    ) {
+        use std::fs;
+        use std::io::Write;
+        use api::ExternalImageData;
+        use crate::render_api::CaptureBits;
+
+        let root = config.resource_root();
+
+        self.device.begin_frame();
+        let _gm = self.gpu_profiler.start_marker("read GPU data");
+        self.device.bind_read_target_impl(self.read_fbo, DeviceIntPoint::zero());
+
+        if config.bits.contains(CaptureBits::EXTERNAL_RESOURCES) && !deferred_images.is_empty() {
+            info!("saving external images");
+            let mut arc_map = FastHashMap::<*const u8, String>::default();
+            let mut tex_map = FastHashMap::<u32, String>::default();
+            let handler = self.external_image_handler
+                .as_mut()
+                .expect("Unable to lock the external image handler!");
+            for def in &deferred_images {
+                info!("\t{}", def.short_path);
+                let ExternalImageData { id, channel_index, image_type } = def.external;
+                // The image rendering parameter is irrelevant because no filtering happens during capturing.
+                let ext_image = handler.lock(id, channel_index);
+                let (data, short_path) = match ext_image.source {
+                    ExternalImageSource::RawData(data) => {
+                        let arc_id = arc_map.len() + 1;
+                        match arc_map.entry(data.as_ptr()) {
+                            Entry::Occupied(e) => {
+                                (None, e.get().clone())
+                            }
+                            Entry::Vacant(e) => {
+                                let short_path = format!("externals/d{}.raw", arc_id);
+                                (Some(data.to_vec()), e.insert(short_path).clone())
+                            }
+                        }
+                    }
+                    ExternalImageSource::NativeTexture(gl_id) => {
+                        let tex_id = tex_map.len() + 1;
+                        match tex_map.entry(gl_id) {
+                            Entry::Occupied(e) => {
+                                (None, e.get().clone())
+                            }
+                            Entry::Vacant(e) => {
+                                let target = match image_type {
+                                    ExternalImageType::TextureHandle(target) => target,
+                                    ExternalImageType::Buffer => unreachable!(),
+                                };
+                                info!("\t\tnative texture of target {:?}", target);
+                                self.device.attach_read_texture_external(gl_id, target);
+                                let data = self.device.read_pixels(&def.descriptor);
+                                let short_path = format!("externals/t{}.raw", tex_id);
+                                (Some(data), e.insert(short_path).clone())
+                            }
+                        }
+                    }
+                    ExternalImageSource::Invalid => {
+                        info!("\t\tinvalid source!");
+                        (None, String::new())
+                    }
+                };
+                if let Some(bytes) = data {
+                    fs::File::create(root.join(&short_path))
+                        .expect(&format!("Unable to create {}", short_path))
+                        .write_all(&bytes)
+                        .unwrap();
+                    #[cfg(feature = "png")]
+                    CaptureConfig::save_png(
+                        root.join(&short_path).with_extension("png"),
+                        def.descriptor.size,
+                        def.descriptor.format,
+                        def.descriptor.stride,
+                        &bytes,
+                    );
+                }
+                let plain = PlainExternalImage {
+                    data: short_path,
+                    external: def.external,
+                    uv: ext_image.uv,
+                };
+                config.serialize_for_resource(&plain, &def.short_path);
+            }
+            for def in &deferred_images {
+                handler.unlock(def.external.id, def.external.channel_index);
+            }
+            let plain_external = PlainExternalResources {
+                images: deferred_images,
+            };
+            config.serialize_for_resource(&plain_external, "external_resources");
+        }
+
+        if config.bits.contains(CaptureBits::FRAME) {
+            let path_textures = root.join("textures");
+            if !path_textures.is_dir() {
+                fs::create_dir(&path_textures).unwrap();
+            }
+
+            info!("saving GPU cache");
+            self.update_gpu_cache(); // flush pending updates
+            let mut plain_self = PlainRenderer {
+                device_size: self.device_size,
+                gpu_cache: Self::save_texture(
+                    self.gpu_cache_texture.get_texture(),
+                    None, "gpu", &root, &mut self.device,
+                ),
+                gpu_cache_frame_id: self.gpu_cache_frame_id,
+                textures: FastHashMap::default(),
+            };
+
+            info!("saving cached textures");
+            for (id, item) in &self.texture_resolver.texture_cache_map {
+                let file_name = format!("cache-{}", plain_self.textures.len() + 1);
+                info!("\t{}", file_name);
+                let plain = Self::save_texture(&item.texture, Some(item.category), &file_name, &root, &mut self.device);
+                plain_self.textures.insert(*id, plain);
+            }
+
+            config.serialize_for_resource(&plain_self, "renderer");
+        }
+
+        self.device.reset_read_target();
+        self.device.end_frame();
+
+        let mut stats_file = fs::File::create(config.root.join("profiler-stats.txt"))
+            .expect(&format!("Unable to create profiler-stats.txt"));
+        if self.debug_flags.intersects(DebugFlags::PROFILER_DBG | DebugFlags::PROFILER_CAPTURE) {
+            self.profiler.dump_stats(&mut stats_file).unwrap();
+        } else {
+            writeln!(stats_file, "Turn on PROFILER_DBG or PROFILER_CAPTURE to get stats here!").unwrap();
+        }
+
+        info!("done.");
+    }
+
+    #[cfg(feature = "replay")]
+    fn load_capture(
+        &mut self,
+        config: CaptureConfig,
+        plain_externals: Vec<PlainExternalImage>,
+    ) {
+        use std::{fs::File, io::Read};
+
+        info!("loading external buffer-backed images");
+        assert!(self.texture_resolver.external_images.is_empty());
+        let mut raw_map = FastHashMap::<String, Arc<Vec<u8>>>::default();
+        let mut image_handler = DummyExternalImageHandler {
+            data: FastHashMap::default(),
+        };
+
+        let root = config.resource_root();
+
+        // Note: this is a `SCENE` level population of the external image handlers
+        // It would put both external buffers and texture into the map.
+        // But latter are going to be overwritten later in this function
+        // if we are in the `FRAME` level.
+        for plain_ext in plain_externals {
+            let data = match raw_map.entry(plain_ext.data) {
+                Entry::Occupied(e) => e.get().clone(),
+                Entry::Vacant(e) => {
+                    let mut buffer = Vec::new();
+                    File::open(root.join(e.key()))
+                        .expect(&format!("Unable to open {}", e.key()))
+                        .read_to_end(&mut buffer)
+                        .unwrap();
+                    e.insert(Arc::new(buffer)).clone()
+                }
+            };
+            let ext = plain_ext.external;
+            let value = (CapturedExternalImageData::Buffer(data), plain_ext.uv);
+            image_handler.data.insert((ext.id, ext.channel_index), value);
+        }
+
+        if let Some(external_resources) = config.deserialize_for_resource::<PlainExternalResources, _>("external_resources") {
+            info!("loading external texture-backed images");
+            let mut native_map = FastHashMap::<String, gl::GLuint>::default();
+            for ExternalCaptureImage { short_path, external, descriptor } in external_resources.images {
+                let target = match external.image_type {
+                    ExternalImageType::TextureHandle(target) => target,
+                    ExternalImageType::Buffer => continue,
+                };
+                let plain_ext = config.deserialize_for_resource::<PlainExternalImage, _>(&short_path)
+                    .expect(&format!("Unable to read {}.ron", short_path));
+                let key = (external.id, external.channel_index);
+
+                let tid = match native_map.entry(plain_ext.data) {
+                    Entry::Occupied(e) => e.get().clone(),
+                    Entry::Vacant(e) => {
+                        let plain_tex = PlainTexture {
+                            data: e.key().clone(),
+                            size: descriptor.size,
+                            format: descriptor.format,
+                            filter: TextureFilter::Linear,
+                            has_depth: false,
+                            category: None,
+                        };
+                        let t = Self::load_texture(
+                            target,
+                            &plain_tex,
+                            None,
+                            &root,
+                            &mut self.device
+                        );
+                        let extex = t.0.into_external();
+                        self.owned_external_images.insert(key, extex.clone());
+                        e.insert(extex.internal_id()).clone()
+                    }
+                };
+
+                let value = (CapturedExternalImageData::NativeTexture(tid), plain_ext.uv);
+                image_handler.data.insert(key, value);
+            }
+        }
+
+        self.device.begin_frame();
+        self.gpu_cache_texture.remove_texture(&mut self.device);
+
+        if let Some(renderer) = config.deserialize_for_resource::<PlainRenderer, _>("renderer") {
+            info!("loading cached textures");
+            self.device_size = renderer.device_size;
+
+            for (_id, item) in self.texture_resolver.texture_cache_map.drain() {
+                self.device.delete_texture(item.texture);
+            }
+            for (id, texture) in renderer.textures {
+                info!("\t{}", texture.data);
+                let target = ImageBufferKind::Texture2D;
+                let t = Self::load_texture(
+                    target,
+                    &texture,
+                    Some(RenderTargetInfo { has_depth: texture.has_depth }),
+                    &root,
+                    &mut self.device
+                );
+                self.texture_resolver.texture_cache_map.insert(id, CacheTexture {
+                    texture: t.0,
+                    category: texture.category.unwrap_or(TextureCacheCategory::Standalone),
+                });
+            }
+
+            info!("loading gpu cache");
+            let (t, gpu_cache_data) = Self::load_texture(
+                ImageBufferKind::Texture2D,
+                &renderer.gpu_cache,
+                Some(RenderTargetInfo { has_depth: false }),
+                &root,
+                &mut self.device,
+            );
+            self.gpu_cache_texture.load_from_data(t, gpu_cache_data);
+            self.gpu_cache_frame_id = renderer.gpu_cache_frame_id;
+        } else {
+            info!("loading cached textures");
+            self.device.begin_frame();
+            for (_id, item) in self.texture_resolver.texture_cache_map.drain() {
+                self.device.delete_texture(item.texture);
+            }
+        }
+        self.device.end_frame();
+
+        self.external_image_handler = Some(Box::new(image_handler) as Box<_>);
+        info!("done.");
+    }
+}
+
+#[derive(Clone, Copy, PartialEq)]
+enum FramebufferKind {
+    Main,
+    Other,
+}
+
+fn should_skip_batch(kind: &BatchKind, flags: DebugFlags) -> bool {
+    match kind {
+        BatchKind::TextRun(_) => {
+            flags.contains(DebugFlags::DISABLE_TEXT_PRIMS)
+        }
+        BatchKind::Brush(BrushBatchKind::LinearGradient) => {
+            flags.contains(DebugFlags::DISABLE_GRADIENT_PRIMS)
+        }
+        _ => false,
+    }
+}
+
+impl CompositeState {
+    /// Use the client provided native compositor interface to add all picture
+    /// cache tiles to the OS compositor
+    fn composite_native(
+        &self,
+        clear_color: ColorF,
+        dirty_rects: &[DeviceIntRect],
+        compositor: &mut dyn Compositor,
+    ) {
+        // Add each surface to the visual tree. z-order is implicit based on
+        // order added. Offset and clip rect apply to all tiles within this
+        // surface.
+        for surface in &self.descriptor.surfaces {
+            compositor.add_surface(
+                surface.surface_id.expect("bug: no native surface allocated"),
+                surface.transform,
+                surface.clip_rect.to_i32(),
+                surface.image_rendering,
+            );
+        }
+        compositor.start_compositing(clear_color, dirty_rects, &[]);
+    }
+}
+
+mod tests {
+    #[test]
+    fn test_buffer_damage_tracker() {
+        use super::BufferDamageTracker;
+        use api::units::{DevicePoint, DeviceRect, DeviceSize};
+
+        let mut tracker = BufferDamageTracker::default();
+        assert_eq!(tracker.get_damage_rect(0), None);
+        assert_eq!(tracker.get_damage_rect(1), Some(DeviceRect::zero()));
+        assert_eq!(tracker.get_damage_rect(2), Some(DeviceRect::zero()));
+        assert_eq!(tracker.get_damage_rect(3), Some(DeviceRect::zero()));
+        assert_eq!(tracker.get_damage_rect(4), None);
+
+        let damage1 = DeviceRect::from_origin_and_size(DevicePoint::new(10.0, 10.0), DeviceSize::new(10.0, 10.0));
+        let damage2 = DeviceRect::from_origin_and_size(DevicePoint::new(20.0, 20.0), DeviceSize::new(10.0, 10.0));
+        let combined = damage1.union(&damage2);
+
+        tracker.push_dirty_rect(&damage1);
+        assert_eq!(tracker.get_damage_rect(0), None);
+        assert_eq!(tracker.get_damage_rect(1), Some(DeviceRect::zero()));
+        assert_eq!(tracker.get_damage_rect(2), Some(damage1));
+        assert_eq!(tracker.get_damage_rect(3), Some(damage1));
+        assert_eq!(tracker.get_damage_rect(4), None);
+
+        tracker.push_dirty_rect(&damage2);
+        assert_eq!(tracker.get_damage_rect(0), None);
+        assert_eq!(tracker.get_damage_rect(1), Some(DeviceRect::zero()));
+        assert_eq!(tracker.get_damage_rect(2), Some(damage2));
+        assert_eq!(tracker.get_damage_rect(3), Some(combined));
+        assert_eq!(tracker.get_damage_rect(4), None);
+    }
+}
diff --git a/gfx/wr/webrender/src/renderer/shade.rs b/gfx/wr/webrender/src/renderer/shade.rs
new file mode 100644
index 0000000000..7476d2aa90
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/shade.rs
@@ -0,0 +1,1372 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ImageBufferKind, units::DeviceSize};
+use crate::batch::{BatchKey, BatchKind, BrushBatchKind, BatchFeatures};
+use crate::composite::{CompositeFeatures, CompositeSurfaceFormat};
+use crate::device::{Device, Program, ShaderError};
+use euclid::default::Transform3D;
+use glyph_rasterizer::GlyphFormat;
+use crate::renderer::{
+    desc,
+    BlendMode, DebugFlags, RendererError, WebRenderOptions,
+    TextureSampler, VertexArrayKind, ShaderPrecacheFlags,
+};
+use crate::profiler::{self, TransactionProfile, ns_to_ms};
+
+use gleam::gl::GlType;
+use time::precise_time_ns;
+
+use std::cell::RefCell;
+use std::rc::Rc;
+
+use webrender_build::shader::{ShaderFeatures, ShaderFeatureFlags, get_shader_features};
+
+/// Which extension version to use for texture external support.
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum TextureExternalVersion {
+    // GL_OES_EGL_image_external_essl3 (Compatible with ESSL 3.0 and
+    // later shaders, but not supported on all GLES 3 devices.)
+    ESSL3,
+    // GL_OES_EGL_image_external (Compatible with ESSL 1.0 shaders)
+    ESSL1,
+}
+
+fn get_feature_string(kind: ImageBufferKind, texture_external_version: TextureExternalVersion) -> &'static str {
+    match (kind, texture_external_version) {
+        (ImageBufferKind::Texture2D, _) => "TEXTURE_2D",
+        (ImageBufferKind::TextureRect, _) => "TEXTURE_RECT",
+        (ImageBufferKind::TextureExternal, TextureExternalVersion::ESSL3) => "TEXTURE_EXTERNAL",
+        (ImageBufferKind::TextureExternal, TextureExternalVersion::ESSL1) => "TEXTURE_EXTERNAL_ESSL1",
+    }
+}
+
+fn has_platform_support(kind: ImageBufferKind, gl_type: &GlType) -> bool {
+    match (kind, gl_type) {
+        (ImageBufferKind::Texture2D, _) => true,
+        (ImageBufferKind::TextureRect, &GlType::Gles) => false,
+        (ImageBufferKind::TextureRect, &GlType::Gl) => true,
+        (ImageBufferKind::TextureExternal, &GlType::Gles) => true,
+        (ImageBufferKind::TextureExternal, &GlType::Gl) => false,
+    }
+}
+
+pub const IMAGE_BUFFER_KINDS: [ImageBufferKind; 3] = [
+    ImageBufferKind::Texture2D,
+    ImageBufferKind::TextureRect,
+    ImageBufferKind::TextureExternal,
+];
+
+const ADVANCED_BLEND_FEATURE: &str = "ADVANCED_BLEND";
+const ALPHA_FEATURE: &str = "ALPHA_PASS";
+const DEBUG_OVERDRAW_FEATURE: &str = "DEBUG_OVERDRAW";
+const DITHERING_FEATURE: &str = "DITHERING";
+const DUAL_SOURCE_FEATURE: &str = "DUAL_SOURCE_BLENDING";
+const FAST_PATH_FEATURE: &str = "FAST_PATH";
+
+pub(crate) enum ShaderKind {
+    Primitive,
+    Cache(VertexArrayKind),
+    ClipCache(VertexArrayKind),
+    Brush,
+    Text,
+    #[allow(dead_code)]
+    VectorStencil,
+    #[allow(dead_code)]
+    VectorCover,
+    #[allow(dead_code)]
+    Resolve,
+    Composite,
+    Clear,
+    Copy,
+}
+
+pub struct LazilyCompiledShader {
+    program: Option<Program>,
+    name: &'static str,
+    kind: ShaderKind,
+    cached_projection: Transform3D<f32>,
+    features: Vec<&'static str>,
+}
+
+impl LazilyCompiledShader {
+    pub(crate) fn new(
+        kind: ShaderKind,
+        name: &'static str,
+        unsorted_features: &[&'static str],
+        device: &mut Device,
+        precache_flags: ShaderPrecacheFlags,
+        shader_list: &ShaderFeatures,
+        profile: &mut TransactionProfile,
+    ) -> Result<Self, ShaderError> {
+
+        let mut features = unsorted_features.to_vec();
+        features.sort();
+
+        // Ensure this shader config is in the available shader list so that we get
+        // alerted if the list gets out-of-date when shaders or features are added.
+        let config = features.join(",");
+        assert!(
+            shader_list.get(name).map_or(false, |f| f.contains(&config)),
+            "shader \"{}\" with features \"{}\" not in available shader list",
+            name,
+            config,
+        );
+
+        let mut shader = LazilyCompiledShader {
+            program: None,
+            name,
+            kind,
+            //Note: this isn't really the default state, but there is no chance
+            // an actual projection passed here would accidentally match.
+            cached_projection: Transform3D::identity(),
+            features,
+        };
+
+        if precache_flags.intersects(ShaderPrecacheFlags::ASYNC_COMPILE | ShaderPrecacheFlags::FULL_COMPILE) {
+            let t0 = precise_time_ns();
+            shader.get_internal(device, precache_flags, profile)?;
+            let t1 = precise_time_ns();
+            debug!("[C: {:.1} ms ] Precache {} {:?}",
+                (t1 - t0) as f64 / 1000000.0,
+                name,
+                unsorted_features
+            );
+        }
+
+        Ok(shader)
+    }
+
+    pub fn bind(
+        &mut self,
+        device: &mut Device,
+        projection: &Transform3D<f32>,
+        texture_size: Option<DeviceSize>,
+        renderer_errors: &mut Vec<RendererError>,
+        profile: &mut TransactionProfile,
+    ) {
+        let update_projection = self.cached_projection != *projection;
+        let program = match self.get_internal(device, ShaderPrecacheFlags::FULL_COMPILE, profile) {
+            Ok(program) => program,
+            Err(e) => {
+                renderer_errors.push(RendererError::from(e));
+                return;
+            }
+        };
+        device.bind_program(program);
+        if let Some(texture_size) = texture_size {
+            device.set_shader_texture_size(program, texture_size);
+        }
+        if update_projection {
+            device.set_uniforms(program, projection);
+            // thanks NLL for this (`program` technically borrows `self`)
+            self.cached_projection = *projection;
+        }
+    }
+
+    fn get_internal(
+        &mut self,
+        device: &mut Device,
+        precache_flags: ShaderPrecacheFlags,
+        profile: &mut TransactionProfile,
+    ) -> Result<&mut Program, ShaderError> {
+        if self.program.is_none() {
+            let start_time = precise_time_ns();
+            let program = match self.kind {
+                ShaderKind::Primitive | ShaderKind::Brush | ShaderKind::Text | ShaderKind::Resolve | ShaderKind::Clear | ShaderKind::Copy => {
+                    create_prim_shader(
+                        self.name,
+                        device,
+                        &self.features,
+                    )
+                }
+                ShaderKind::Cache(..) => {
+                    create_prim_shader(
+                        self.name,
+                        device,
+                        &self.features,
+                    )
+                }
+                ShaderKind::VectorStencil => {
+                    create_prim_shader(
+                        self.name,
+                        device,
+                        &self.features,
+                    )
+                }
+                ShaderKind::VectorCover => {
+                    create_prim_shader(
+                        self.name,
+                        device,
+                        &self.features,
+                    )
+                }
+                ShaderKind::Composite => {
+                    create_prim_shader(
+                        self.name,
+                        device,
+                        &self.features,
+                    )
+                }
+                ShaderKind::ClipCache(..) => {
+                    create_clip_shader(
+                        self.name,
+                        device,
+                        &self.features,
+                    )
+                }
+            };
+            self.program = Some(program?);
+
+            let end_time = precise_time_ns();
+            profile.add(profiler::SHADER_BUILD_TIME, ns_to_ms(end_time - start_time));
+        }
+
+        let program = self.program.as_mut().unwrap();
+
+        if precache_flags.contains(ShaderPrecacheFlags::FULL_COMPILE) && !program.is_initialized() {
+            let start_time = precise_time_ns();
+
+            let vertex_format = match self.kind {
+                ShaderKind::Primitive |
+                ShaderKind::Brush |
+                ShaderKind::Text => VertexArrayKind::Primitive,
+                ShaderKind::Cache(format) => format,
+                ShaderKind::VectorStencil => VertexArrayKind::VectorStencil,
+                ShaderKind::VectorCover => VertexArrayKind::VectorCover,
+                ShaderKind::ClipCache(format) => format,
+                ShaderKind::Resolve => VertexArrayKind::Resolve,
+                ShaderKind::Composite => VertexArrayKind::Composite,
+                ShaderKind::Clear => VertexArrayKind::Clear,
+                ShaderKind::Copy => VertexArrayKind::Copy,
+            };
+
+            let vertex_descriptor = match vertex_format {
+                VertexArrayKind::Primitive => &desc::PRIM_INSTANCES,
+                VertexArrayKind::LineDecoration => &desc::LINE,
+                VertexArrayKind::FastLinearGradient => &desc::FAST_LINEAR_GRADIENT,
+                VertexArrayKind::LinearGradient => &desc::LINEAR_GRADIENT,
+                VertexArrayKind::RadialGradient => &desc::RADIAL_GRADIENT,
+                VertexArrayKind::ConicGradient => &desc::CONIC_GRADIENT,
+                VertexArrayKind::Blur => &desc::BLUR,
+                VertexArrayKind::ClipImage => &desc::CLIP_IMAGE,
+                VertexArrayKind::ClipRect => &desc::CLIP_RECT,
+                VertexArrayKind::ClipBoxShadow => &desc::CLIP_BOX_SHADOW,
+                VertexArrayKind::VectorStencil => &desc::VECTOR_STENCIL,
+                VertexArrayKind::VectorCover => &desc::VECTOR_COVER,
+                VertexArrayKind::Border => &desc::BORDER,
+                VertexArrayKind::Scale => &desc::SCALE,
+                VertexArrayKind::Resolve => &desc::RESOLVE,
+                VertexArrayKind::SvgFilter => &desc::SVG_FILTER,
+                VertexArrayKind::Composite => &desc::COMPOSITE,
+                VertexArrayKind::Clear => &desc::CLEAR,
+                VertexArrayKind::Copy => &desc::COPY,
+            };
+
+            device.link_program(program, vertex_descriptor)?;
+            device.bind_program(program);
+            match self.kind {
+                ShaderKind::ClipCache(..) => {
+                    device.bind_shader_samplers(
+                        &program,
+                        &[
+                            ("sColor0", TextureSampler::Color0),
+                            ("sTransformPalette", TextureSampler::TransformPalette),
+                            ("sRenderTasks", TextureSampler::RenderTasks),
+                            ("sGpuCache", TextureSampler::GpuCache),
+                            ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
+                            ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
+                            ("sGpuBuffer", TextureSampler::GpuBuffer),
+                        ],
+                    );
+                }
+                _ => {
+                    device.bind_shader_samplers(
+                        &program,
+                        &[
+                            ("sColor0", TextureSampler::Color0),
+                            ("sColor1", TextureSampler::Color1),
+                            ("sColor2", TextureSampler::Color2),
+                            ("sDither", TextureSampler::Dither),
+                            ("sTransformPalette", TextureSampler::TransformPalette),
+                            ("sRenderTasks", TextureSampler::RenderTasks),
+                            ("sGpuCache", TextureSampler::GpuCache),
+                            ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
+                            ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
+                            ("sClipMask", TextureSampler::ClipMask),
+                            ("sGpuBuffer", TextureSampler::GpuBuffer),
+                        ],
+                    );
+                }
+            }
+
+            let end_time = precise_time_ns();
+            profile.add(profiler::SHADER_BUILD_TIME, ns_to_ms(end_time - start_time));
+        }
+
+        Ok(program)
+    }
+
+    fn deinit(self, device: &mut Device) {
+        if let Some(program) = self.program {
+            device.delete_program(program);
+        }
+    }
+}
+
+// A brush shader supports two modes:
+// opaque:
+//   Used for completely opaque primitives,
+//   or inside segments of partially
+//   opaque primitives. Assumes no need
+//   for clip masks, AA etc.
+// alpha:
+//   Used for brush primitives in the alpha
+//   pass. Assumes that AA should be applied
+//   along the primitive edge, and also that
+//   clip mask is present.
+struct BrushShader {
+    opaque: LazilyCompiledShader,
+    alpha: LazilyCompiledShader,
+    advanced_blend: Option<LazilyCompiledShader>,
+    dual_source: Option<LazilyCompiledShader>,
+    debug_overdraw: LazilyCompiledShader,
+}
+
+impl BrushShader {
+    fn new(
+        name: &'static str,
+        device: &mut Device,
+        features: &[&'static str],
+        precache_flags: ShaderPrecacheFlags,
+        shader_list: &ShaderFeatures,
+        use_advanced_blend: bool,
+        use_dual_source: bool,
+        profile: &mut TransactionProfile,
+    ) -> Result<Self, ShaderError> {
+        let opaque_features = features.to_vec();
+        let opaque = LazilyCompiledShader::new(
+            ShaderKind::Brush,
+            name,
+            &opaque_features,
+            device,
+            precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let mut alpha_features = opaque_features.to_vec();
+        alpha_features.push(ALPHA_FEATURE);
+
+        let alpha = LazilyCompiledShader::new(
+            ShaderKind::Brush,
+            name,
+            &alpha_features,
+            device,
+            precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let advanced_blend = if use_advanced_blend {
+            let mut advanced_blend_features = alpha_features.to_vec();
+            advanced_blend_features.push(ADVANCED_BLEND_FEATURE);
+
+            let shader = LazilyCompiledShader::new(
+                ShaderKind::Brush,
+                name,
+                &advanced_blend_features,
+                device,
+                precache_flags,
+                &shader_list,
+                profile,
+            )?;
+
+            Some(shader)
+        } else {
+            None
+        };
+
+        let dual_source = if use_dual_source {
+            let mut dual_source_features = alpha_features.to_vec();
+            dual_source_features.push(DUAL_SOURCE_FEATURE);
+
+            let shader = LazilyCompiledShader::new(
+                ShaderKind::Brush,
+                name,
+                &dual_source_features,
+                device,
+                precache_flags,
+                &shader_list,
+                profile,
+            )?;
+
+            Some(shader)
+        } else {
+            None
+        };
+
+        let mut debug_overdraw_features = features.to_vec();
+        debug_overdraw_features.push(DEBUG_OVERDRAW_FEATURE);
+
+        let debug_overdraw = LazilyCompiledShader::new(
+            ShaderKind::Brush,
+            name,
+            &debug_overdraw_features,
+            device,
+            precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        Ok(BrushShader {
+            opaque,
+            alpha,
+            advanced_blend,
+            dual_source,
+            debug_overdraw,
+        })
+    }
+
+    fn get(&mut self, blend_mode: BlendMode, features: BatchFeatures, debug_flags: DebugFlags)
+           -> &mut LazilyCompiledShader {
+        match blend_mode {
+            _ if debug_flags.contains(DebugFlags::SHOW_OVERDRAW) => &mut self.debug_overdraw,
+            BlendMode::None => &mut self.opaque,
+            BlendMode::Alpha |
+            BlendMode::PremultipliedAlpha |
+            BlendMode::PremultipliedDestOut |
+            BlendMode::SubpixelWithBgColor |
+            BlendMode::Screen |
+            BlendMode::PlusLighter |
+            BlendMode::Exclusion => {
+                if features.contains(BatchFeatures::ALPHA_PASS) {
+                    &mut self.alpha
+                } else {
+                    &mut self.opaque
+                }
+            }
+            BlendMode::Advanced(_) => {
+                self.advanced_blend
+                    .as_mut()
+                    .expect("bug: no advanced blend shader loaded")
+            }
+            BlendMode::SubpixelDualSource |
+            BlendMode::MultiplyDualSource => {
+                self.dual_source
+                    .as_mut()
+                    .expect("bug: no dual source shader loaded")
+            }
+        }
+    }
+
+    fn deinit(self, device: &mut Device) {
+        self.opaque.deinit(device);
+        self.alpha.deinit(device);
+        if let Some(advanced_blend) = self.advanced_blend {
+            advanced_blend.deinit(device);
+        }
+        if let Some(dual_source) = self.dual_source {
+            dual_source.deinit(device);
+        }
+        self.debug_overdraw.deinit(device);
+    }
+}
+
+pub struct TextShader {
+    simple: LazilyCompiledShader,
+    glyph_transform: LazilyCompiledShader,
+    debug_overdraw: LazilyCompiledShader,
+}
+
+impl TextShader {
+    fn new(
+        name: &'static str,
+        device: &mut Device,
+        features: &[&'static str],
+        precache_flags: ShaderPrecacheFlags,
+        shader_list: &ShaderFeatures,
+        profile: &mut TransactionProfile,
+    ) -> Result<Self, ShaderError> {
+        let mut simple_features = features.to_vec();
+        simple_features.push("ALPHA_PASS");
+        simple_features.push("TEXTURE_2D");
+
+        let simple = LazilyCompiledShader::new(
+            ShaderKind::Text,
+            name,
+            &simple_features,
+            device,
+            precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let mut glyph_transform_features = features.to_vec();
+        glyph_transform_features.push("GLYPH_TRANSFORM");
+        glyph_transform_features.push("ALPHA_PASS");
+        glyph_transform_features.push("TEXTURE_2D");
+
+        let glyph_transform = LazilyCompiledShader::new(
+            ShaderKind::Text,
+            name,
+            &glyph_transform_features,
+            device,
+            precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let mut debug_overdraw_features = features.to_vec();
+        debug_overdraw_features.push("DEBUG_OVERDRAW");
+        debug_overdraw_features.push("TEXTURE_2D");
+
+        let debug_overdraw = LazilyCompiledShader::new(
+            ShaderKind::Text,
+            name,
+            &debug_overdraw_features,
+            device,
+            precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        Ok(TextShader { simple, glyph_transform, debug_overdraw })
+    }
+
+    pub fn get(
+        &mut self,
+        glyph_format: GlyphFormat,
+        debug_flags: DebugFlags,
+    ) -> &mut LazilyCompiledShader {
+        match glyph_format {
+            _ if debug_flags.contains(DebugFlags::SHOW_OVERDRAW) => &mut self.debug_overdraw,
+            GlyphFormat::Alpha |
+            GlyphFormat::Subpixel |
+            GlyphFormat::Bitmap |
+            GlyphFormat::ColorBitmap => &mut self.simple,
+            GlyphFormat::TransformedAlpha |
+            GlyphFormat::TransformedSubpixel => &mut self.glyph_transform,
+        }
+    }
+
+    fn deinit(self, device: &mut Device) {
+        self.simple.deinit(device);
+        self.glyph_transform.deinit(device);
+        self.debug_overdraw.deinit(device);
+    }
+}
+
+fn create_prim_shader(
+    name: &'static str,
+    device: &mut Device,
+    features: &[&'static str],
+) -> Result<Program, ShaderError> {
+    debug!("PrimShader {}", name);
+
+    device.create_program(name, features)
+}
+
+fn create_clip_shader(
+    name: &'static str,
+    device: &mut Device,
+    features: &[&'static str],
+) -> Result<Program, ShaderError> {
+    debug!("ClipShader {}", name);
+
+    device.create_program(name, features)
+}
+
+// NB: If you add a new shader here, make sure to deinitialize it
+// in `Shaders::deinit()` below.
+pub struct Shaders {
+    // These are "cache shaders". These shaders are used to
+    // draw intermediate results to cache targets. The results
+    // of these shaders are then used by the primitive shaders.
+    pub cs_blur_a8: LazilyCompiledShader,
+    pub cs_blur_rgba8: LazilyCompiledShader,
+    pub cs_border_segment: LazilyCompiledShader,
+    pub cs_border_solid: LazilyCompiledShader,
+    pub cs_scale: Vec<Option<LazilyCompiledShader>>,
+    pub cs_line_decoration: LazilyCompiledShader,
+    pub cs_fast_linear_gradient: LazilyCompiledShader,
+    pub cs_linear_gradient: LazilyCompiledShader,
+    pub cs_radial_gradient: LazilyCompiledShader,
+    pub cs_conic_gradient: LazilyCompiledShader,
+    pub cs_svg_filter: LazilyCompiledShader,
+
+    // Brush shaders
+    brush_solid: BrushShader,
+    brush_image: Vec<Option<BrushShader>>,
+    brush_fast_image: Vec<Option<BrushShader>>,
+    brush_blend: BrushShader,
+    brush_mix_blend: BrushShader,
+    brush_yuv_image: Vec<Option<BrushShader>>,
+    brush_linear_gradient: BrushShader,
+    brush_opacity: BrushShader,
+    brush_opacity_aa: BrushShader,
+
+    /// These are "cache clip shaders". These shaders are used to
+    /// draw clip instances into the cached clip mask. The results
+    /// of these shaders are also used by the primitive shaders.
+    pub cs_clip_rectangle_slow: LazilyCompiledShader,
+    pub cs_clip_rectangle_fast: LazilyCompiledShader,
+    pub cs_clip_box_shadow: LazilyCompiledShader,
+    pub cs_clip_image: LazilyCompiledShader,
+
+    // The are "primitive shaders". These shaders draw and blend
+    // final results on screen. They are aware of tile boundaries.
+    // Most draw directly to the framebuffer, but some use inputs
+    // from the cache shaders to draw. Specifically, the box
+    // shadow primitive shader stretches the box shadow cache
+    // output, and the cache_image shader blits the results of
+    // a cache shader (e.g. blur) to the screen.
+    pub ps_text_run: TextShader,
+    pub ps_text_run_dual_source: Option<TextShader>,
+
+    ps_split_composite: LazilyCompiledShader,
+    pub ps_clear: LazilyCompiledShader,
+    pub ps_copy: LazilyCompiledShader,
+
+    // Composite shaders.  These are very simple shaders used to composite
+    // picture cache tiles into the framebuffer on platforms that do not have an
+    // OS Compositor (or we cannot use it).  Such an OS Compositor (such as
+    // DirectComposite or CoreAnimation) handles the composition of the picture
+    // cache tiles at a lower level (e.g. in DWM for Windows); in that case we
+    // directly hand the picture cache surfaces over to the OS Compositor, and
+    // our own Composite shaders below never run.
+    // To composite external (RGB) surfaces we need various permutations of
+    // shaders with WR_FEATURE flags on or off based on the type of image
+    // buffer we're sourcing from (see IMAGE_BUFFER_KINDS).
+    pub composite_rgba: Vec<Option<LazilyCompiledShader>>,
+    // A faster set of rgba composite shaders that do not support UV clamping
+    // or color modulation.
+    pub composite_rgba_fast_path: Vec<Option<LazilyCompiledShader>>,
+    // The same set of composite shaders but with WR_FEATURE_YUV added.
+    pub composite_yuv: Vec<Option<LazilyCompiledShader>>,
+}
+
+impl Shaders {
+    pub fn new(
+        device: &mut Device,
+        gl_type: GlType,
+        options: &WebRenderOptions,
+    ) -> Result<Self, ShaderError> {
+        // We have to pass a profile around a bunch but we aren't recording the initialization
+        // so use a dummy one.
+        let profile = &mut TransactionProfile::new();
+
+        let use_dual_source_blending =
+            device.get_capabilities().supports_dual_source_blending &&
+            options.allow_dual_source_blending;
+        let use_advanced_blend_equation =
+            device.get_capabilities().supports_advanced_blend_equation &&
+            options.allow_advanced_blend_equation;
+
+        let texture_external_version = if device.get_capabilities().supports_image_external_essl3 {
+            TextureExternalVersion::ESSL3
+        } else {
+            TextureExternalVersion::ESSL1
+        };
+        let mut shader_flags = match gl_type {
+            GlType::Gl => ShaderFeatureFlags::GL,
+            GlType::Gles => {
+                let texture_external_flag = match texture_external_version {
+                    TextureExternalVersion::ESSL3 => ShaderFeatureFlags::TEXTURE_EXTERNAL,
+                    TextureExternalVersion::ESSL1 => ShaderFeatureFlags::TEXTURE_EXTERNAL_ESSL1,
+                };
+                ShaderFeatureFlags::GLES | texture_external_flag
+            }
+        };
+        shader_flags.set(ShaderFeatureFlags::ADVANCED_BLEND_EQUATION, use_advanced_blend_equation);
+        shader_flags.set(ShaderFeatureFlags::DUAL_SOURCE_BLENDING, use_dual_source_blending);
+        shader_flags.set(ShaderFeatureFlags::DITHERING, options.enable_dithering);
+        let shader_list = get_shader_features(shader_flags);
+
+        let brush_solid = BrushShader::new(
+            "brush_solid",
+            device,
+            &[],
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            profile,
+        )?;
+
+        let brush_blend = BrushShader::new(
+            "brush_blend",
+            device,
+            &[],
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            profile,
+        )?;
+
+        let brush_mix_blend = BrushShader::new(
+            "brush_mix_blend",
+            device,
+            &[],
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            profile,
+        )?;
+
+        let brush_linear_gradient = BrushShader::new(
+            "brush_linear_gradient",
+            device,
+            if options.enable_dithering {
+               &[DITHERING_FEATURE]
+            } else {
+               &[]
+            },
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            profile,
+        )?;
+
+        let brush_opacity_aa = BrushShader::new(
+            "brush_opacity",
+            device,
+            &["ANTIALIASING"],
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            profile,
+        )?;
+
+        let brush_opacity = BrushShader::new(
+            "brush_opacity",
+            device,
+            &[],
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            profile,
+        )?;
+
+        let cs_blur_a8 = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Blur),
+            "cs_blur",
+            &["ALPHA_TARGET"],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_blur_rgba8 = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Blur),
+            "cs_blur",
+            &["COLOR_TARGET"],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_svg_filter = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::SvgFilter),
+            "cs_svg_filter",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_clip_rectangle_slow = LazilyCompiledShader::new(
+            ShaderKind::ClipCache(VertexArrayKind::ClipRect),
+            "cs_clip_rectangle",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_clip_rectangle_fast = LazilyCompiledShader::new(
+            ShaderKind::ClipCache(VertexArrayKind::ClipRect),
+            "cs_clip_rectangle",
+            &[FAST_PATH_FEATURE],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_clip_box_shadow = LazilyCompiledShader::new(
+            ShaderKind::ClipCache(VertexArrayKind::ClipBoxShadow),
+            "cs_clip_box_shadow",
+            &["TEXTURE_2D"],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_clip_image = LazilyCompiledShader::new(
+            ShaderKind::ClipCache(VertexArrayKind::ClipImage),
+            "cs_clip_image",
+            &["TEXTURE_2D"],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let mut cs_scale = Vec::new();
+        let scale_shader_num = IMAGE_BUFFER_KINDS.len();
+        // PrimitiveShader is not clonable. Use push() to initialize the vec.
+        for _ in 0 .. scale_shader_num {
+            cs_scale.push(None);
+        }
+        for image_buffer_kind in &IMAGE_BUFFER_KINDS {
+            if has_platform_support(*image_buffer_kind, &gl_type) {
+                let feature_string = get_feature_string(
+                    *image_buffer_kind,
+                    texture_external_version,
+                );
+
+                let mut features = Vec::new();
+                if feature_string != "" {
+                    features.push(feature_string);
+                }
+
+                let shader = LazilyCompiledShader::new(
+                    ShaderKind::Cache(VertexArrayKind::Scale),
+                    "cs_scale",
+                    &features,
+                    device,
+                    options.precache_flags,
+                    &shader_list,
+                    profile,
+                 )?;
+
+                 let index = Self::get_compositing_shader_index(
+                    *image_buffer_kind,
+                 );
+                 cs_scale[index] = Some(shader);
+            }
+        }
+
+        // TODO(gw): The split composite + text shader are special cases - the only
+        //           shaders used during normal scene rendering that aren't a brush
+        //           shader. Perhaps we can unify these in future?
+
+        let ps_text_run = TextShader::new("ps_text_run",
+            device,
+            &[],
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let ps_text_run_dual_source = if use_dual_source_blending {
+            let dual_source_features = vec![DUAL_SOURCE_FEATURE];
+            Some(TextShader::new("ps_text_run",
+                device,
+                &dual_source_features,
+                options.precache_flags,
+                &shader_list,
+                profile,
+            )?)
+        } else {
+            None
+        };
+
+        let ps_split_composite = LazilyCompiledShader::new(
+            ShaderKind::Primitive,
+            "ps_split_composite",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let ps_clear = LazilyCompiledShader::new(
+            ShaderKind::Clear,
+            "ps_clear",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let ps_copy = LazilyCompiledShader::new(
+            ShaderKind::Copy,
+            "ps_copy",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        // All image configuration.
+        let mut image_features = Vec::new();
+        let mut brush_image = Vec::new();
+        let mut brush_fast_image = Vec::new();
+        // PrimitiveShader is not clonable. Use push() to initialize the vec.
+        for _ in 0 .. IMAGE_BUFFER_KINDS.len() {
+            brush_image.push(None);
+            brush_fast_image.push(None);
+        }
+        for buffer_kind in 0 .. IMAGE_BUFFER_KINDS.len() {
+            if !has_platform_support(IMAGE_BUFFER_KINDS[buffer_kind], &gl_type)
+                // Brush shaders are not ESSL1 compatible
+                || (IMAGE_BUFFER_KINDS[buffer_kind] == ImageBufferKind::TextureExternal
+                    && texture_external_version == TextureExternalVersion::ESSL1)
+            {
+                continue;
+            }
+
+            let feature_string = get_feature_string(
+                IMAGE_BUFFER_KINDS[buffer_kind],
+                texture_external_version,
+            );
+            if feature_string != "" {
+                image_features.push(feature_string);
+            }
+
+            brush_fast_image[buffer_kind] = Some(BrushShader::new(
+                "brush_image",
+                device,
+                &image_features,
+                options.precache_flags,
+                &shader_list,
+                use_advanced_blend_equation,
+                use_dual_source_blending,
+                profile,
+            )?);
+
+            image_features.push("REPETITION");
+            image_features.push("ANTIALIASING");
+
+            brush_image[buffer_kind] = Some(BrushShader::new(
+                "brush_image",
+                device,
+                &image_features,
+                options.precache_flags,
+                &shader_list,
+                use_advanced_blend_equation,
+                use_dual_source_blending,
+                profile,
+            )?);
+
+            image_features.clear();
+        }
+
+        // All yuv_image configuration.
+        let mut yuv_features = Vec::new();
+        let mut rgba_features = Vec::new();
+        let mut fast_path_features = Vec::new();
+        let yuv_shader_num = IMAGE_BUFFER_KINDS.len();
+        let mut brush_yuv_image = Vec::new();
+        let mut composite_yuv = Vec::new();
+        let mut composite_rgba = Vec::new();
+        let mut composite_rgba_fast_path = Vec::new();
+        // PrimitiveShader is not clonable. Use push() to initialize the vec.
+        for _ in 0 .. yuv_shader_num {
+            brush_yuv_image.push(None);
+            composite_yuv.push(None);
+            composite_rgba.push(None);
+            composite_rgba_fast_path.push(None);
+        }
+        for image_buffer_kind in &IMAGE_BUFFER_KINDS {
+            if has_platform_support(*image_buffer_kind, &gl_type) {
+                yuv_features.push("YUV");
+                fast_path_features.push("FAST_PATH");
+
+                let index = Self::get_compositing_shader_index(
+                    *image_buffer_kind,
+                );
+
+                let feature_string = get_feature_string(
+                    *image_buffer_kind,
+                    texture_external_version,
+                );
+                if feature_string != "" {
+                    yuv_features.push(feature_string);
+                    rgba_features.push(feature_string);
+                    fast_path_features.push(feature_string);
+                }
+
+                // YUV shaders are not compatible with ESSL1
+                if *image_buffer_kind != ImageBufferKind::TextureExternal ||
+                    texture_external_version == TextureExternalVersion::ESSL3 {
+                    let brush_shader = BrushShader::new(
+                        "brush_yuv_image",
+                        device,
+                        &yuv_features,
+                        options.precache_flags,
+                        &shader_list,
+                        false /* advanced blend */,
+                        false /* dual source */,
+                        profile,
+                    )?;
+                    brush_yuv_image[index] = Some(brush_shader);
+
+                    let composite_yuv_shader = LazilyCompiledShader::new(
+                        ShaderKind::Composite,
+                        "composite",
+                        &yuv_features,
+                        device,
+                        options.precache_flags,
+                        &shader_list,
+                        profile,
+                    )?;
+                    composite_yuv[index] = Some(composite_yuv_shader);
+                }
+
+                let composite_rgba_shader = LazilyCompiledShader::new(
+                    ShaderKind::Composite,
+                    "composite",
+                    &rgba_features,
+                    device,
+                    options.precache_flags,
+                    &shader_list,
+                    profile,
+                )?;
+
+                let composite_rgba_fast_path_shader = LazilyCompiledShader::new(
+                    ShaderKind::Composite,
+                    "composite",
+                    &fast_path_features,
+                    device,
+                    options.precache_flags,
+                    &shader_list,
+                    profile,
+                )?;
+
+                let index = Self::get_compositing_shader_index(
+                    *image_buffer_kind,
+                );
+                composite_rgba[index] = Some(composite_rgba_shader);
+                composite_rgba_fast_path[index] = Some(composite_rgba_fast_path_shader);
+
+                yuv_features.clear();
+                rgba_features.clear();
+                fast_path_features.clear();
+            }
+        }
+
+        let cs_line_decoration = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::LineDecoration),
+            "cs_line_decoration",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_fast_linear_gradient = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::FastLinearGradient),
+            "cs_fast_linear_gradient",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_linear_gradient = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::LinearGradient),
+            "cs_linear_gradient",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_radial_gradient = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::RadialGradient),
+            "cs_radial_gradient",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_conic_gradient = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::ConicGradient),
+            "cs_conic_gradient",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_border_segment = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Border),
+            "cs_border_segment",
+             &[],
+             device,
+             options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_border_solid = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Border),
+            "cs_border_solid",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        Ok(Shaders {
+            cs_blur_a8,
+            cs_blur_rgba8,
+            cs_border_segment,
+            cs_line_decoration,
+            cs_fast_linear_gradient,
+            cs_linear_gradient,
+            cs_radial_gradient,
+            cs_conic_gradient,
+            cs_border_solid,
+            cs_scale,
+            cs_svg_filter,
+            brush_solid,
+            brush_image,
+            brush_fast_image,
+            brush_blend,
+            brush_mix_blend,
+            brush_yuv_image,
+            brush_linear_gradient,
+            brush_opacity,
+            brush_opacity_aa,
+            cs_clip_rectangle_slow,
+            cs_clip_rectangle_fast,
+            cs_clip_box_shadow,
+            cs_clip_image,
+            ps_text_run,
+            ps_text_run_dual_source,
+            ps_split_composite,
+            ps_clear,
+            ps_copy,
+            composite_rgba,
+            composite_rgba_fast_path,
+            composite_yuv,
+        })
+    }
+
+    fn get_compositing_shader_index(buffer_kind: ImageBufferKind) -> usize {
+        buffer_kind as usize
+    }
+
+    pub fn get_composite_shader(
+        &mut self,
+        format: CompositeSurfaceFormat,
+        buffer_kind: ImageBufferKind,
+        features: CompositeFeatures,
+    ) -> &mut LazilyCompiledShader {
+        match format {
+            CompositeSurfaceFormat::Rgba => {
+                if features.contains(CompositeFeatures::NO_UV_CLAMP)
+                    && features.contains(CompositeFeatures::NO_COLOR_MODULATION)
+                {
+                    let shader_index = Self::get_compositing_shader_index(buffer_kind);
+                    self.composite_rgba_fast_path[shader_index]
+                        .as_mut()
+                        .expect("bug: unsupported rgba fast path shader requested")
+                } else {
+                    let shader_index = Self::get_compositing_shader_index(buffer_kind);
+                    self.composite_rgba[shader_index]
+                        .as_mut()
+                        .expect("bug: unsupported rgba shader requested")
+                }
+            }
+            CompositeSurfaceFormat::Yuv => {
+                let shader_index = Self::get_compositing_shader_index(buffer_kind);
+                self.composite_yuv[shader_index]
+                    .as_mut()
+                    .expect("bug: unsupported yuv shader requested")
+            }
+        }
+    }
+
+    pub fn get_scale_shader(
+        &mut self,
+        buffer_kind: ImageBufferKind,
+    ) -> &mut LazilyCompiledShader {
+        let shader_index = Self::get_compositing_shader_index(buffer_kind);
+        self.cs_scale[shader_index]
+            .as_mut()
+            .expect("bug: unsupported scale shader requested")
+    }
+
+    pub fn get(&
+        mut self,
+        key: &BatchKey,
+        mut features: BatchFeatures,
+        debug_flags: DebugFlags,
+        device: &Device,
+    ) -> &mut LazilyCompiledShader {
+        match key.kind {
+            BatchKind::SplitComposite => {
+                &mut self.ps_split_composite
+            }
+            BatchKind::Brush(brush_kind) => {
+                // SWGL uses a native anti-aliasing implementation that bypasses the shader.
+                // Don't consider it in that case when deciding whether or not to use
+                // an alpha-pass shader.
+                if device.get_capabilities().uses_native_antialiasing {
+                    features.remove(BatchFeatures::ANTIALIASING);
+                }
+                let brush_shader = match brush_kind {
+                    BrushBatchKind::Solid => {
+                        &mut self.brush_solid
+                    }
+                    BrushBatchKind::Image(image_buffer_kind) => {
+                        if features.contains(BatchFeatures::ANTIALIASING) ||
+                            features.contains(BatchFeatures::REPETITION) {
+
+                            self.brush_image[image_buffer_kind as usize]
+                                .as_mut()
+                                .expect("Unsupported image shader kind")
+                        } else {
+                            self.brush_fast_image[image_buffer_kind as usize]
+                                .as_mut()
+                                .expect("Unsupported image shader kind")
+                        }
+                    }
+                    BrushBatchKind::Blend => {
+                        &mut self.brush_blend
+                    }
+                    BrushBatchKind::MixBlend { .. } => {
+                        &mut self.brush_mix_blend
+                    }
+                    BrushBatchKind::LinearGradient => {
+                        // SWGL uses a native clip mask implementation that bypasses the shader.
+                        // Don't consider it in that case when deciding whether or not to use
+                        // an alpha-pass shader.
+                        if device.get_capabilities().uses_native_clip_mask {
+                            features.remove(BatchFeatures::CLIP_MASK);
+                        }
+                        // Gradient brushes can optimistically use the opaque shader even
+                        // with a blend mode if they don't require any features.
+                        if !features.intersects(
+                            BatchFeatures::ANTIALIASING
+                                | BatchFeatures::REPETITION
+                                | BatchFeatures::CLIP_MASK,
+                        ) {
+                            features.remove(BatchFeatures::ALPHA_PASS);
+                        }
+                        match brush_kind {
+                            BrushBatchKind::LinearGradient => &mut self.brush_linear_gradient,
+                            _ => panic!(),
+                        }
+                    }
+                    BrushBatchKind::YuvImage(image_buffer_kind, ..) => {
+                        let shader_index =
+                            Self::get_compositing_shader_index(image_buffer_kind);
+                        self.brush_yuv_image[shader_index]
+                            .as_mut()
+                            .expect("Unsupported YUV shader kind")
+                    }
+                    BrushBatchKind::Opacity => {
+                        if features.contains(BatchFeatures::ANTIALIASING) {
+                            &mut self.brush_opacity_aa
+                        } else {
+                            &mut self.brush_opacity
+                        }
+                    }
+                };
+                brush_shader.get(key.blend_mode, features, debug_flags)
+            }
+            BatchKind::TextRun(glyph_format) => {
+                let text_shader = match key.blend_mode {
+                    BlendMode::SubpixelDualSource => self.ps_text_run_dual_source.as_mut().unwrap(),
+                    _ => &mut self.ps_text_run,
+                };
+                text_shader.get(glyph_format, debug_flags)
+            }
+        }
+    }
+
+    pub fn deinit(self, device: &mut Device) {
+        for shader in self.cs_scale {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        self.cs_blur_a8.deinit(device);
+        self.cs_blur_rgba8.deinit(device);
+        self.cs_svg_filter.deinit(device);
+        self.brush_solid.deinit(device);
+        self.brush_blend.deinit(device);
+        self.brush_mix_blend.deinit(device);
+        self.brush_linear_gradient.deinit(device);
+        self.brush_opacity.deinit(device);
+        self.brush_opacity_aa.deinit(device);
+        self.cs_clip_rectangle_slow.deinit(device);
+        self.cs_clip_rectangle_fast.deinit(device);
+        self.cs_clip_box_shadow.deinit(device);
+        self.cs_clip_image.deinit(device);
+        self.ps_text_run.deinit(device);
+        if let Some(shader) = self.ps_text_run_dual_source {
+            shader.deinit(device);
+        }
+        for shader in self.brush_image {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        for shader in self.brush_fast_image {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        for shader in self.brush_yuv_image {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        self.cs_border_solid.deinit(device);
+        self.cs_fast_linear_gradient.deinit(device);
+        self.cs_linear_gradient.deinit(device);
+        self.cs_radial_gradient.deinit(device);
+        self.cs_conic_gradient.deinit(device);
+        self.cs_line_decoration.deinit(device);
+        self.cs_border_segment.deinit(device);
+        self.ps_split_composite.deinit(device);
+        self.ps_clear.deinit(device);
+        self.ps_copy.deinit(device);
+
+        for shader in self.composite_rgba {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        for shader in self.composite_rgba_fast_path {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        for shader in self.composite_yuv {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+    }
+}
+
+pub type SharedShaders = Rc<RefCell<Shaders>>;
diff --git a/gfx/wr/webrender/src/renderer/upload.rs b/gfx/wr/webrender/src/renderer/upload.rs
new file mode 100644
index 0000000000..0ba053cd76
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/upload.rs
@@ -0,0 +1,847 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! This module contains the convoluted logic that goes into uploading content into
+//! the texture cache's textures.
+//!
+//! We need to support various combinations of code paths depending on the quirks of
+//! each hardware/driver configuration:
+//! - direct upload,
+//! - staged upload via a pixel buffer object,
+//! - staged upload via a direct upload to a staging texture where PBO's aren't supported,
+//! - copy from the staging to destination textures, either via blits or batched draw calls.
+//!
+//! Conceptually a lot of this logic should probably be in the device module, but some code
+//! here relies on submitting draw calls via the renderer.
+
+
+use std::mem;
+use std::collections::VecDeque;
+use std::sync::Arc;
+use std::time::Duration;
+use euclid::{Transform3D, point2};
+use time::precise_time_ns;
+use malloc_size_of::MallocSizeOfOps;
+use api::units::*;
+use api::{ExternalImageSource, ImageBufferKind, ImageFormat};
+use crate::renderer::{
+    Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR
+};
+use crate::internal_types::{
+    FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate,
+    CacheTextureId, RenderTargetInfo,
+};
+use crate::device::{
+    Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader,
+    TextureFilter,
+};
+use crate::gpu_types::CopyInstance;
+use crate::batch::BatchTextures;
+use crate::texture_pack::{GuillotineAllocator, FreeRectSlice};
+use crate::profiler;
+use crate::render_api::MemoryReport;
+
+pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512);
+
+/// Upload a number of items to texture cache textures.
+///
+/// This is the main entry point of the texture cache upload code.
+/// See also the module documentation for more information.
+pub fn upload_to_texture_cache(
+    renderer: &mut Renderer,
+    update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,
+) {
+
+    let mut stats = UploadStats {
+        num_draw_calls: 0,
+        upload_time: 0,
+        cpu_buffer_alloc_time: 0,
+        texture_alloc_time: 0,
+        cpu_copy_time: 0,
+        gpu_copy_commands_time: 0,
+        bytes_uploaded: 0,
+        items_uploaded: 0,
+    };
+
+    let upload_total_start = precise_time_ns();
+
+    let mut batch_upload_textures = Vec::new();
+
+    // A list of copies that must be performed from the temporary textures to the texture cache.
+    let mut batch_upload_copies = Vec::new();
+
+    // For each texture format, this stores a list of staging buffers
+    // and a texture allocator for packing the buffers.
+    let mut batch_upload_buffers = FastHashMap::default();
+
+    // For best performance we use a single TextureUploader for all uploads.
+    // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs.
+    let mut uploader = renderer.device.upload_texture(
+        &mut renderer.texture_upload_pbo_pool,
+    );
+
+    let num_updates = update_list.len();
+
+    for (texture_id, updates) in update_list {
+        let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture;
+        for update in updates {
+            let TextureCacheUpdate { rect, stride, offset, format_override, source } = update;
+            let mut arc_data = None; 
+            let dummy_data;
+            let data = match source {
+                TextureUpdateSource::Bytes { ref data } => {
+                    arc_data = Some(data.clone());
+                    &data[offset as usize ..]
+                }
+                TextureUpdateSource::External { id, channel_index } => {
+                    let handler = renderer.external_image_handler
+                        .as_mut()
+                        .expect("Found external image, but no handler set!");
+                    // The filter is only relevant for NativeTexture external images.
+                    match handler.lock(id, channel_index).source {
+                        ExternalImageSource::RawData(data) => {
+                            &data[offset as usize ..]
+                        }
+                        ExternalImageSource::Invalid => {
+                            // Create a local buffer to fill the pbo.
+                            let bpp = texture.get_format().bytes_per_pixel();
+                            let width = stride.unwrap_or(rect.width() * bpp);
+                            let total_size = width * rect.height();
+                            // WR haven't support RGBAF32 format in texture_cache, so
+                            // we use u8 type here.
+                            dummy_data = vec![0xFFu8; total_size as usize];
+                            &dummy_data
+                        }
+                        ExternalImageSource::NativeTexture(eid) => {
+                            panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
+                        }
+                    }
+                }
+                TextureUpdateSource::DebugClear => {
+                    let draw_target = DrawTarget::from_texture(
+                        texture,
+                        false,
+                    );
+                    renderer.device.bind_draw_target(draw_target);
+                    renderer.device.clear_target(
+                        Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
+                        None,
+                        Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))
+                    );
+
+                    continue;
+                }
+            };
+
+            stats.items_uploaded += 1;
+
+            let use_batch_upload = renderer.device.use_batched_texture_uploads() &&
+                texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) &&
+                rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width &&
+                rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height &&
+                rect.area() < renderer.device.batched_upload_threshold();
+
+            if use_batch_upload
+                && arc_data.is_some()
+                && matches!(renderer.device.upload_method(), &UploadMethod::Immediate)
+                && rect.area() > BATCH_UPLOAD_TEXTURE_SIZE.area() / 2 {
+                skip_staging_buffer(
+                    &mut renderer.device,
+                    &mut renderer.staging_texture_pool,
+                    rect,
+                    stride,
+                    arc_data.unwrap(),
+                    texture_id,
+                    texture,
+                    &mut batch_upload_buffers,
+                    &mut batch_upload_textures,
+                    &mut batch_upload_copies,
+                    &mut stats,
+                );
+            } else if use_batch_upload {
+                copy_into_staging_buffer(
+                    &mut renderer.device,
+                    &mut uploader,
+                    &mut renderer.staging_texture_pool,
+                    rect,
+                    stride,
+                    data,
+                    texture_id,
+                    texture,
+                    &mut batch_upload_buffers,
+                    &mut batch_upload_textures,
+                    &mut batch_upload_copies,
+                    &mut stats,
+                );
+            } else {
+                let upload_start_time = precise_time_ns();
+
+                stats.bytes_uploaded += uploader.upload(
+                    &mut renderer.device,
+                    texture,
+                    rect,
+                    stride,
+                    format_override,
+                    data.as_ptr(),
+                    data.len()
+                );
+
+                stats.upload_time += precise_time_ns() - upload_start_time;
+            }
+
+            if let TextureUpdateSource::External { id, channel_index } = source {
+                let handler = renderer.external_image_handler
+                    .as_mut()
+                    .expect("Found external image, but no handler set!");
+                handler.unlock(id, channel_index);
+            }
+        }
+    }
+
+    let upload_start_time = precise_time_ns();
+    // Upload batched texture updates to their temporary textures.
+    for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() {
+        let texture = &batch_upload_textures[batch_buffer.texture_index];
+        match batch_buffer.staging_buffer {
+            StagingBufferKind::Pbo(pbo) => {
+                stats.bytes_uploaded += uploader.upload_staged(
+                    &mut renderer.device,
+                    texture,
+                    DeviceIntRect::from_size(texture.get_dimensions()),
+                    None,
+                    pbo,
+                );
+            }
+            StagingBufferKind::CpuBuffer { bytes, .. } => {
+                let bpp = texture.get_format().bytes_per_pixel();
+                stats.bytes_uploaded += uploader.upload(
+                    &mut renderer.device,
+                    texture,
+                    batch_buffer.upload_rect,
+                    Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp),
+                    None,
+                    bytes.as_ptr(),
+                    bytes.len()
+                );
+                renderer.staging_texture_pool.return_temporary_buffer(bytes);
+            }
+            StagingBufferKind::Image { bytes, stride } => {
+                stats.bytes_uploaded += uploader.upload(
+                    &mut renderer.device,
+                    texture,
+                    batch_buffer.upload_rect,
+                    stride,
+                    None,
+                    bytes.as_ptr(),
+                    bytes.len()
+                );
+            }
+        }
+    }
+    stats.upload_time += precise_time_ns() - upload_start_time;
+
+
+    // Flush all uploads, batched or otherwise.
+    let flush_start_time = precise_time_ns();
+    uploader.flush(&mut renderer.device);
+    stats.upload_time += precise_time_ns() - flush_start_time;
+
+    if !batch_upload_copies.is_empty() {
+        // Copy updates that were batch uploaded to their correct destination in the texture cache.
+        // Sort them by destination and source to minimize framebuffer binding changes.
+        batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index));
+
+        let gpu_copy_start = precise_time_ns();
+
+        if renderer.device.use_draw_calls_for_texture_copy() {
+            // Some drivers have a very high CPU overhead when submitting hundreds of small blit
+            // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a
+            // few hundred blits). In this case we do the copy with batched draw calls.
+            copy_from_staging_to_cache_using_draw_calls(
+                renderer,
+                &mut stats,
+                &batch_upload_textures,
+                batch_upload_copies,
+            );
+        } else {
+            copy_from_staging_to_cache(
+                renderer,
+                &batch_upload_textures,
+                batch_upload_copies,
+            );
+        }
+
+        stats.gpu_copy_commands_time += precise_time_ns() - gpu_copy_start;
+    }
+
+    for texture in batch_upload_textures.drain(..) {
+        renderer.staging_texture_pool.return_texture(texture);
+    }
+
+    // Update the profile counters. We use add instead of set because
+    // this function can be called several times per frame.
+    // We don't update the counters when their value is zero, so that
+    // the profiler can treat them as events and we can get notified
+    // when they happen.
+
+    let upload_total = precise_time_ns() - upload_total_start;
+    renderer.profile.add(
+        profiler::TOTAL_UPLOAD_TIME,
+        profiler::ns_to_ms(upload_total)
+    );
+
+    if num_updates > 0 {
+        renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates);
+    }
+
+    if stats.bytes_uploaded > 0 {
+        renderer.profile.add(
+            profiler::TEXTURE_UPLOADS_MEM,
+            profiler::bytes_to_mb(stats.bytes_uploaded)
+        );
+    }
+
+    if stats.cpu_copy_time > 0 {
+        renderer.profile.add(
+            profiler::UPLOAD_CPU_COPY_TIME,
+            profiler::ns_to_ms(stats.cpu_copy_time)
+        );
+    }
+    if stats.upload_time > 0 {
+        renderer.profile.add(
+            profiler::UPLOAD_TIME,
+            profiler::ns_to_ms(stats.upload_time)
+        );
+    }
+    if stats.texture_alloc_time > 0 {
+        renderer.profile.add(
+            profiler::STAGING_TEXTURE_ALLOCATION_TIME,
+            profiler::ns_to_ms(stats.texture_alloc_time)
+        );
+    }
+    if stats.cpu_buffer_alloc_time > 0 {
+        renderer.profile.add(
+            profiler::CPU_TEXTURE_ALLOCATION_TIME,
+            profiler::ns_to_ms(stats.cpu_buffer_alloc_time)
+        );
+    }
+    if stats.num_draw_calls > 0{
+        renderer.profile.add(
+            profiler::UPLOAD_NUM_COPY_BATCHES,
+            stats.num_draw_calls
+        );
+    }
+
+    if stats.gpu_copy_commands_time > 0 {
+        renderer.profile.add(
+            profiler::UPLOAD_GPU_COPY_TIME,
+            profiler::ns_to_ms(stats.gpu_copy_commands_time)
+        );
+    }
+
+    let add_markers = profiler::thread_is_being_profiled();
+    if add_markers && stats.bytes_uploaded > 0 {
+    	let details = format!("{} bytes uploaded, {} items", stats.bytes_uploaded, stats.items_uploaded);
+    	profiler::add_text_marker(&"Texture uploads", &details, Duration::from_nanos(upload_total));
+    }
+}
+
+/// Copy an item into a batched upload staging buffer.
+fn copy_into_staging_buffer<'a>(
+    device: &mut Device,
+    uploader: &mut TextureUploader< 'a>,
+    staging_texture_pool: &mut UploadTexturePool,
+    update_rect: DeviceIntRect,
+    update_stride: Option<i32>,
+    data: &[u8],
+    dest_texture_id: CacheTextureId,
+    texture: &Texture,
+    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
+    batch_upload_textures: &mut Vec<Texture>,
+    batch_upload_copies: &mut Vec<BatchUploadCopy>,
+    stats: &mut UploadStats
+) {
+    let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format())
+        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
+
+    // Allocate a region within the staging buffer for this update. If there is
+    // no room in an existing buffer then allocate another texture and buffer.
+    let (slice, origin) = match allocator.allocate(&update_rect.size()) {
+        Some((slice, origin)) => (slice, origin),
+        None => {
+            let new_slice = FreeRectSlice(buffers.len() as u32);
+            allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size());
+
+            let texture_alloc_time_start = precise_time_ns();
+            let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
+            stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;
+
+            let texture_index = batch_upload_textures.len();
+            batch_upload_textures.push(staging_texture);
+
+            let cpu_buffer_alloc_start_time = precise_time_ns();
+            let staging_buffer = match device.upload_method() {
+                UploadMethod::Immediate => StagingBufferKind::CpuBuffer {
+                    bytes: staging_texture_pool.get_temporary_buffer(),
+                },
+                UploadMethod::PixelBuffer(_) => {
+                    let pbo = uploader.stage(
+                        device,
+                        texture.get_format(),
+                        BATCH_UPLOAD_TEXTURE_SIZE,
+                    ).unwrap();
+
+                    StagingBufferKind::Pbo(pbo)
+                }
+            };
+            stats.cpu_buffer_alloc_time += precise_time_ns() - cpu_buffer_alloc_start_time;
+
+            buffers.push(BatchUploadBuffer {
+                staging_buffer,
+                texture_index,
+                upload_rect: DeviceIntRect::zero()
+            });
+
+            (new_slice, DeviceIntPoint::zero())
+        }
+    };
+    let buffer = &mut buffers[slice.0 as usize];
+    let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size());
+    buffer.upload_rect = buffer.upload_rect.union(&allocated_rect);
+
+    batch_upload_copies.push(BatchUploadCopy {
+        src_texture_index: buffer.texture_index,
+        src_offset: allocated_rect.min,
+        dest_texture_id,
+        dest_offset: update_rect.min,
+        size: update_rect.size(),
+    });
+
+    unsafe {
+        let memcpy_start_time = precise_time_ns();
+        let bpp = texture.get_format().bytes_per_pixel() as usize;
+        let width_bytes = update_rect.width() as usize * bpp;
+        let src_stride = update_stride.map_or(width_bytes, |stride| {
+            assert!(stride >= 0);
+            stride as usize
+        });
+        let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes;
+        assert!(src_size <= data.len());
+
+        let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size);
+        let (dst_stride, dst) = match &mut buffer.staging_buffer {
+            StagingBufferKind::Pbo(buffer) => (
+                buffer.get_stride(),
+                buffer.get_mapping(),
+            ),
+            StagingBufferKind::CpuBuffer { bytes } => (
+                BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp,
+                &mut bytes[..],
+            ),
+            StagingBufferKind::Image { .. } => unreachable!(),
+        };
+
+        // copy the data line-by-line in to the buffer so that we do not overwrite
+        // any other region of the buffer.
+        for y in 0..allocated_rect.height() as usize {
+            let src_start = y * src_stride;
+            let src_end = src_start + width_bytes;
+            let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride +
+                allocated_rect.min.x as usize * bpp;
+            let dst_end = dst_start + width_bytes;
+
+            dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
+        }
+
+        stats.cpu_copy_time += precise_time_ns() - memcpy_start_time;
+    }
+}
+
+/// Take this code path instead of copying into a staging CPU buffer when the image
+/// we would copy is large enough that it's unlikely anything else would fit in the
+/// buffer, therefore we might as well copy directly from the source image's pixels.
+fn skip_staging_buffer<'a>(
+    device: &mut Device,
+    staging_texture_pool: &mut UploadTexturePool,
+    update_rect: DeviceIntRect,
+    stride: Option<i32>,
+    data: Arc<Vec<u8>>,
+    dest_texture_id: CacheTextureId,
+    texture: &Texture,
+    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
+    batch_upload_textures: &mut Vec<Texture>,
+    batch_upload_copies: &mut Vec<BatchUploadCopy>,
+    stats: &mut UploadStats
+) {
+    let (_, buffers) = batch_upload_buffers.entry(texture.get_format())
+        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
+
+    let texture_alloc_time_start = precise_time_ns();
+    let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
+    stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;
+
+    let texture_index = batch_upload_textures.len();
+    batch_upload_textures.push(staging_texture);
+
+    buffers.push(BatchUploadBuffer {
+        staging_buffer: StagingBufferKind::Image { bytes: data, stride },
+        texture_index,
+        upload_rect: DeviceIntRect::from_size(update_rect.size())
+    });
+
+    batch_upload_copies.push(BatchUploadCopy {
+        src_texture_index: texture_index,
+        src_offset: point2(0, 0),
+        dest_texture_id,
+        dest_offset: update_rect.min,
+        size: update_rect.size(),
+    });
+}
+
+
+/// Copy from the staging PBOs or textures to texture cache textures using blit commands.
+///
+/// Using blits instead of draw calls is supposedly more efficient but some drivers have
+/// a very high per-command overhead so in some configurations we end up using
+/// copy_from_staging_to_cache_using_draw_calls instead.
+fn copy_from_staging_to_cache(
+    renderer: &mut Renderer,
+    batch_upload_textures: &[Texture],
+    batch_upload_copies: Vec<BatchUploadCopy>,
+) {
+    for copy in batch_upload_copies {
+        let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
+
+        renderer.device.copy_texture_sub_region(
+            &batch_upload_textures[copy.src_texture_index],
+            copy.src_offset.x as _,
+            copy.src_offset.y as _,
+            dest_texture,
+            copy.dest_offset.x as _,
+            copy.dest_offset.y as _,
+            copy.size.width as _,
+            copy.size.height as _,
+        );
+    }
+}
+
+/// Generate and submit composite shader batches to copy from
+/// the staging textures to the destination cache textures.
+///
+/// If this shows up in GPU time ptofiles we could replace it with
+/// a simpler shader (composite.glsl is already quite simple).
+fn copy_from_staging_to_cache_using_draw_calls(
+    renderer: &mut Renderer,
+    stats: &mut UploadStats,
+    batch_upload_textures: &[Texture],
+    batch_upload_copies: Vec<BatchUploadCopy>,
+) {
+    let mut copy_instances = Vec::new();
+    let mut prev_src = None;
+    let mut prev_dst = None;
+    let mut dst_texture_size = DeviceSize::new(0.0, 0.0);
+
+    for copy in batch_upload_copies {
+
+        let src_changed = prev_src != Some(copy.src_texture_index);
+        let dst_changed = prev_dst != Some(copy.dest_texture_id);
+
+        if (src_changed || dst_changed) && !copy_instances.is_empty() {
+            renderer.draw_instanced_batch(
+                &copy_instances,
+                VertexArrayKind::Copy,
+                // We bind the staging texture manually because it isn't known
+                // to the texture resolver.
+                &BatchTextures::empty(),
+                &mut RendererStats::default(),
+            );
+
+            stats.num_draw_calls += 1;
+            copy_instances.clear();
+        }
+
+        if dst_changed {
+            let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
+            dst_texture_size = dest_texture.get_dimensions().to_f32();
+
+            let draw_target = DrawTarget::from_texture(dest_texture, false);
+            renderer.device.bind_draw_target(draw_target);
+
+            renderer.shaders
+                .borrow_mut()
+                .ps_copy
+                .bind(
+                    &mut renderer.device,
+                    &Transform3D::identity(),
+                    None,
+                    &mut renderer.renderer_errors,
+                    &mut renderer.profile,
+                );
+
+            prev_dst = Some(copy.dest_texture_id);
+        }
+
+        if src_changed {
+            renderer.device.bind_texture(
+                TextureSampler::Color0,
+                &batch_upload_textures[copy.src_texture_index],
+                Swizzle::default(),
+            );
+
+            prev_src = Some(copy.src_texture_index)
+        }
+
+        let src_rect = DeviceRect::from_origin_and_size(
+            copy.src_offset.to_f32(),
+            copy.size.to_f32(),
+        );
+
+        let dst_rect = DeviceRect::from_origin_and_size(
+            copy.dest_offset.to_f32(),
+            copy.size.to_f32(),
+        );
+
+        copy_instances.push(CopyInstance {
+            src_rect,
+            dst_rect,
+            dst_texture_size,
+        });
+    }
+
+    if !copy_instances.is_empty() {
+        renderer.draw_instanced_batch(
+            &copy_instances,
+            VertexArrayKind::Copy,
+            &BatchTextures::empty(),
+            &mut RendererStats::default(),
+        );
+
+        stats.num_draw_calls += 1;
+    }
+}
+
+/// A very basic pool to avoid reallocating staging textures as well as staging
+/// CPU side buffers.
+pub struct UploadTexturePool {
+    /// The textures in the pool associated with a last used frame index.
+    ///
+    /// The outer array corresponds to each of teh three supported texture formats.
+    textures: [VecDeque<(Texture, u64)>; 3],
+    // Frame at which to deallocate some textures if there are too many in the pool,
+    // for each format.
+    delay_texture_deallocation: [u64; 3],
+    current_frame: u64,
+
+    /// Temporary buffers that are used when using staging uploads + glTexImage2D.
+    ///
+    /// Temporary buffers aren't used asynchronously so they can be reused every frame.
+    /// To keep things simple we always allocate enough memory for formats with four bytes
+    /// per pixel (more than we need for alpha-only textures but it works just as well).
+    temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>,
+    min_temporary_buffers: usize,
+    delay_buffer_deallocation: u64,
+}
+
+impl UploadTexturePool {
+    pub fn new() -> Self {
+        UploadTexturePool {
+            textures: [VecDeque::new(), VecDeque::new(), VecDeque::new()],
+            delay_texture_deallocation: [0; 3],
+            current_frame: 0,
+            temporary_buffers: Vec::new(),
+            min_temporary_buffers: 0,
+            delay_buffer_deallocation: 0,
+        }
+    }
+
+    fn format_index(&self, format: ImageFormat) -> usize {
+        match format {
+            ImageFormat::RGBA8 => 0,
+            ImageFormat::BGRA8 => 1,
+            ImageFormat::R8 => 2,
+            _ => { panic!("unexpected format"); }
+        }
+    }
+
+    pub fn begin_frame(&mut self) {
+        self.current_frame += 1;
+        self.min_temporary_buffers = self.temporary_buffers.len();
+    }
+
+    /// Create or reuse a staging texture.
+    ///
+    /// See also return_texture.
+    pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture {
+
+        // First try to reuse a texture from the pool.
+        // "available" here means hasn't been used for 2 frames to avoid stalls.
+        // No need to scan the vector. Newer textures are always pushed at the back
+        // of the vector so we know the first element is the least recently used.
+        let format_idx = self.format_index(format);
+        let can_reuse = self.textures[format_idx].get(0)
+            .map(|tex| self.current_frame - tex.1 > 2)
+            .unwrap_or(false);
+
+        if can_reuse {
+            return self.textures[format_idx].pop_front().unwrap().0;
+        }
+
+        // If we couldn't find an available texture, create a new one.
+
+        device.create_texture(
+            ImageBufferKind::Texture2D,
+            format,
+            BATCH_UPLOAD_TEXTURE_SIZE.width,
+            BATCH_UPLOAD_TEXTURE_SIZE.height,
+            TextureFilter::Nearest,
+            // Currently we need render target support as we always use glBlitFramebuffer
+            // to copy the texture data. Instead, we should use glCopyImageSubData on some
+            // platforms, and avoid creating the FBOs in that case.
+            Some(RenderTargetInfo { has_depth: false }),
+        )
+    }
+
+    /// Hand the staging texture back to the pool after being done with uploads.
+    ///
+    /// The texture must have been obtained from this pool via get_texture.
+    pub fn return_texture(&mut self, texture: Texture) {
+        let format_idx = self.format_index(texture.get_format());
+        self.textures[format_idx].push_back((texture, self.current_frame));
+    }
+
+    /// Create or reuse a temporary CPU buffer.
+    ///
+    /// These buffers are used in the batched upload path when PBOs are not supported.
+    /// Content is first written to the temporary buffer and uploaded via a single
+    /// glTexSubImage2D call.
+    pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> {
+        let buffer = self.temporary_buffers.pop().unwrap_or_else(|| {
+            vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4]
+        });
+        self.min_temporary_buffers = self.min_temporary_buffers.min(self.temporary_buffers.len());
+        buffer
+    }
+
+    /// Return memory that was obtained from this pool via get_temporary_buffer.
+    pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) {
+        assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4);
+        self.temporary_buffers.push(buffer);
+    }
+
+    /// Deallocate this pool's CPU and GPU memory.
+    pub fn delete_textures(&mut self, device: &mut Device) {
+        for format in &mut self.textures {
+            while let Some(texture) = format.pop_back() {
+                device.delete_texture(texture.0)
+            }
+        }
+        self.temporary_buffers.clear();
+    }
+
+    /// Deallocate some textures if there are too many for a long time.
+    pub fn end_frame(&mut self, device: &mut Device) {
+        for format_idx in 0..self.textures.len() {
+            // Count the number of reusable staging textures.
+            // if it stays high for a large number of frames, truncate it back to 8-ish
+            // over multiple frames.
+
+            let mut num_reusable_textures = 0;
+            for texture in &self.textures[format_idx] {
+                if self.current_frame - texture.1 > 2 {
+                    num_reusable_textures += 1;
+                }
+            }
+
+            if num_reusable_textures < 8 {
+                // Don't deallocate textures for another 120 frames.
+                self.delay_texture_deallocation[format_idx] = self.current_frame + 120;
+            }
+
+            // Deallocate up to 4 staging textures every frame.
+            let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] {
+                num_reusable_textures.min(4)
+            } else {
+                0
+            };
+
+            for _ in 0..to_remove {
+                let texture = self.textures[format_idx].pop_front().unwrap().0;
+                device.delete_texture(texture);
+            }
+        }
+
+        // Similar logic for temporary CPU buffers. Our calls to get and return
+        // temporary buffers should have been balanced for this frame, but the call
+        // get_temporary_buffer will allocate a buffer if the vec is empty. Since we
+        // carry these buffers from frame to frame, we keep track of the smallest
+        // length of the temporary_buffers vec that we encountered this frame. Those
+        // buffers were not touched and we deallocate some if there are a lot of them.
+        let unused_buffers = self.min_temporary_buffers;
+        if unused_buffers < 8 {
+            self.delay_buffer_deallocation = self.current_frame + 120;
+        }
+        let to_remove = if self.current_frame > self.delay_buffer_deallocation  {
+            unused_buffers.min(4)
+        } else {
+            0
+        };
+        for _ in 0..to_remove {
+            // Unlike textures it doesn't matter whether we pop from the front or back
+            // of the vector.
+            self.temporary_buffers.pop();
+        }
+    }
+
+    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
+        for buf in &self.temporary_buffers {
+            report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) };
+        }
+
+        for format in &self.textures {
+            for texture in format {
+                report.upload_staging_textures += texture.0.size_in_bytes();
+            }
+        }
+    }
+}
+
+struct UploadStats {
+    num_draw_calls: u32,
+    upload_time: u64,
+    cpu_buffer_alloc_time: u64,
+    texture_alloc_time: u64,
+    cpu_copy_time: u64,
+    gpu_copy_commands_time: u64,
+    bytes_uploaded: usize,
+    items_uploaded: usize,
+}
+
+#[derive(Debug)]
+enum StagingBufferKind<'a> {
+    Pbo(UploadStagingBuffer<'a>),
+    CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> },
+    Image { bytes: Arc<Vec<u8>>, stride: Option<i32> },
+}
+#[derive(Debug)]
+struct BatchUploadBuffer<'a> {
+    staging_buffer: StagingBufferKind<'a>,
+    texture_index: usize,
+    // A rectangle containing all items going into this staging texture, so
+    // that we can avoid uploading the entire area if we are using glTexSubImage2d.
+    upload_rect: DeviceIntRect,
+}
+
+// On some devices performing many small texture uploads is slow, so instead we batch
+// updates in to a small number of uploads to temporary textures, then copy from those
+// textures to the correct place in the texture cache.
+// A list of temporary textures that batches of updates are uploaded to.
+#[derive(Debug)]
+struct BatchUploadCopy {
+    // Index within batch_upload_textures
+    src_texture_index: usize,
+    src_offset: DeviceIntPoint,
+    dest_texture_id: CacheTextureId,
+    dest_offset: DeviceIntPoint,
+    size: DeviceIntSize,
+}
diff --git a/gfx/wr/webrender/src/renderer/vertex.rs b/gfx/wr/webrender/src/renderer/vertex.rs
new file mode 100644
index 0000000000..2f871b4c26
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/vertex.rs
@@ -0,0 +1,1129 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Rendering logic related to the vertex shaders and their states, uncluding
+//!  - Vertex Array Objects
+//!  - vertex layout descriptors
+//!  - textures bound at vertex stage
+
+use std::{marker::PhantomData, mem, num::NonZeroUsize, ops};
+use api::units::*;
+use crate::{
+    device::{
+        Device, Texture, TextureFilter, TextureUploader, UploadPBOPool, VertexUsageHint, VAO,
+    },
+    frame_builder::Frame,
+    gpu_types::{PrimitiveHeaderI, PrimitiveHeaderF, TransformData},
+    internal_types::Swizzle,
+    render_task::RenderTaskData,
+};
+
+pub const VERTEX_TEXTURE_EXTRA_ROWS: i32 = 10;
+
+pub const MAX_VERTEX_TEXTURE_WIDTH: usize = webrender_build::MAX_VERTEX_TEXTURE_WIDTH;
+
+pub mod desc {
+    use crate::device::{VertexAttribute, VertexAttributeKind, VertexDescriptor};
+
+    pub const PRIM_INSTANCES: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[VertexAttribute {
+            name: "aData",
+            count: 4,
+            kind: VertexAttributeKind::I32,
+        }],
+    };
+
+    pub const BLUR: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aBlurRenderTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aBlurSourceTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aBlurDirection",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const LINE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aLocalSize",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aWavyLineThickness",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aStyle",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aAxisSelect",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const FAST_LINEAR_GRADIENT: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor0",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor1",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aAxisSelect",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const LINEAR_GRADIENT: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aStartPoint",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aEndPoint",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aScale",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aExtendMode",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aGradientStopsAddress",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const RADIAL_GRADIENT: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aCenter",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aScale",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aStartRadius",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aEndRadius",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aXYRatio",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aExtendMode",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aGradientStopsAddress",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const CONIC_GRADIENT: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aCenter",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aScale",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aStartOffset",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aEndOffset",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aAngle",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aExtendMode",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aGradientStopsAddress",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const BORDER: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskOrigin",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor0",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor1",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aFlags",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aWidths",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aRadii",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipParams1",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipParams2",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const SCALE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aScaleTargetRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aScaleSourceRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const CLIP_RECT: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            // common clip attributes
+            VertexAttribute {
+                name: "aClipDeviceArea",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipOrigins",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aDevicePixelScale",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aTransformIds",
+                count: 2,
+                kind: VertexAttributeKind::I32,
+            },
+            // specific clip attributes
+            VertexAttribute {
+                name: "aClipLocalPos",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipLocalRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipMode",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRect_TL",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRadii_TL",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRect_TR",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRadii_TR",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRect_BL",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRadii_BL",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRect_BR",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRadii_BR",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const CLIP_BOX_SHADOW: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            // common clip attributes
+            VertexAttribute {
+                name: "aClipDeviceArea",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipOrigins",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aDevicePixelScale",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aTransformIds",
+                count: 2,
+                kind: VertexAttributeKind::I32,
+            },
+            // specific clip attributes
+            VertexAttribute {
+                name: "aClipDataResourceAddress",
+                count: 2,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aClipSrcRectSize",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipMode",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aStretchMode",
+                count: 2,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aClipDestRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const CLIP_IMAGE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            // common clip attributes
+            VertexAttribute {
+                name: "aClipDeviceArea",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipOrigins",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aDevicePixelScale",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aTransformIds",
+                count: 2,
+                kind: VertexAttributeKind::I32,
+            },
+            // specific clip attributes
+            VertexAttribute {
+                name: "aClipTileRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipDataResourceAddress",
+                count: 2,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aClipLocalRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const GPU_CACHE_UPDATE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::U16Norm,
+            },
+            VertexAttribute {
+                name: "aValue",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[],
+    };
+
+    pub const RESOLVE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[VertexAttribute {
+            name: "aRect",
+            count: 4,
+            kind: VertexAttributeKind::F32,
+        }],
+    };
+
+    pub const SVG_FILTER: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aFilterRenderTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterInput1TaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterInput2TaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterKind",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterInputCount",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterGenericInt",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterExtraDataAddress",
+                count: 2,
+                kind: VertexAttributeKind::U16,
+            },
+        ],
+    };
+
+    pub const VECTOR_STENCIL: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aFromPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aCtrlPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aToPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aFromNormal",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aCtrlNormal",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aToNormal",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aPathID",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aPad",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+        ],
+    };
+
+    pub const VECTOR_COVER: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTargetRect",
+                count: 4,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aStencilOrigin",
+                count: 2,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aSubpixel",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aPad",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+        ],
+    };
+
+    pub const COMPOSITE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aLocalRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aDeviceClipRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aParams",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aUvRect0",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aUvRect1",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aUvRect2",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aTransform",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const CLEAR: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const COPY: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "a_src_rect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "a_dst_rect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "a_dst_texture_size",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+}
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum VertexArrayKind {
+    Primitive,
+    Blur,
+    ClipImage,
+    ClipRect,
+    ClipBoxShadow,
+    VectorStencil,
+    VectorCover,
+    Border,
+    Scale,
+    LineDecoration,
+    FastLinearGradient,
+    LinearGradient,
+    RadialGradient,
+    ConicGradient,
+    Resolve,
+    SvgFilter,
+    Composite,
+    Clear,
+    Copy,
+}
+
+pub struct VertexDataTexture<T> {
+    texture: Option<Texture>,
+    format: api::ImageFormat,
+    _marker: PhantomData<T>,
+}
+
+impl<T> VertexDataTexture<T> {
+    pub fn new(format: api::ImageFormat) -> Self {
+        Self {
+            texture: None,
+            format,
+            _marker: PhantomData,
+        }
+    }
+
+    /// Returns a borrow of the GPU texture. Panics if it hasn't been initialized.
+    pub fn texture(&self) -> &Texture {
+        self.texture.as_ref().unwrap()
+    }
+
+    /// Returns an estimate of the GPU memory consumed by this VertexDataTexture.
+    pub fn size_in_bytes(&self) -> usize {
+        self.texture.as_ref().map_or(0, |t| t.size_in_bytes())
+    }
+
+    pub fn update<'a>(
+        &'a mut self,
+        device: &mut Device,
+        texture_uploader: &mut TextureUploader<'a>,
+        data: &mut Vec<T>,
+    ) {
+        debug_assert!(mem::size_of::<T>() % 16 == 0);
+        let texels_per_item = mem::size_of::<T>() / 16;
+        let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / texels_per_item;
+        debug_assert_ne!(items_per_row, 0);
+
+        // Ensure we always end up with a texture when leaving this method.
+        let mut len = data.len();
+        if len == 0 {
+            if self.texture.is_some() {
+                return;
+            }
+            data.reserve(items_per_row);
+            len = items_per_row;
+        } else {
+            // Extend the data array to have enough capacity to upload at least
+            // a multiple of the row size.  This ensures memory safety when the
+            // array is passed to OpenGL to upload to the GPU.
+            let extra = len % items_per_row;
+            if extra != 0 {
+                let padding = items_per_row - extra;
+                data.reserve(padding);
+                len += padding;
+            }
+        }
+
+        let needed_height = (len / items_per_row) as i32;
+        let existing_height = self
+            .texture
+            .as_ref()
+            .map_or(0, |t| t.get_dimensions().height);
+
+        // Create a new texture if needed.
+        //
+        // These textures are generally very small, which is why we don't bother
+        // with incremental updates and just re-upload every frame. For most pages
+        // they're one row each, and on stress tests like css-francine they end up
+        // in the 6-14 range. So we size the texture tightly to what we need (usually
+        // 1), and shrink it if the waste would be more than `VERTEX_TEXTURE_EXTRA_ROWS`
+        // rows. This helps with memory overhead, especially because there are several
+        // instances of these textures per Renderer.
+        if needed_height > existing_height
+            || needed_height + VERTEX_TEXTURE_EXTRA_ROWS < existing_height
+        {
+            // Drop the existing texture, if any.
+            if let Some(t) = self.texture.take() {
+                device.delete_texture(t);
+            }
+
+            let texture = device.create_texture(
+                api::ImageBufferKind::Texture2D,
+                self.format,
+                MAX_VERTEX_TEXTURE_WIDTH as i32,
+                // Ensure height is at least two to work around
+                // https://bugs.chromium.org/p/angleproject/issues/detail?id=3039
+                needed_height.max(2),
+                TextureFilter::Nearest,
+                None,
+            );
+            self.texture = Some(texture);
+        }
+
+        // Note: the actual width can be larger than the logical one, with a few texels
+        // of each row unused at the tail. This is needed because there is still hardware
+        // (like Intel iGPUs) that prefers power-of-two sizes of textures ([1]).
+        //
+        // [1] https://software.intel.com/en-us/articles/opengl-performance-tips-power-of-two-textures-have-better-performance
+        let logical_width = if needed_height == 1 {
+            data.len() * texels_per_item
+        } else {
+            MAX_VERTEX_TEXTURE_WIDTH - (MAX_VERTEX_TEXTURE_WIDTH % texels_per_item)
+        };
+
+        let rect = DeviceIntRect::from_size(
+            DeviceIntSize::new(logical_width as i32, needed_height),
+        );
+
+        debug_assert!(len <= data.capacity(), "CPU copy will read out of bounds");
+        texture_uploader.upload(
+            device,
+            self.texture(),
+            rect,
+            None,
+            None,
+            data.as_ptr(),
+            len,
+        );
+    }
+
+    pub fn deinit(mut self, device: &mut Device) {
+        if let Some(t) = self.texture.take() {
+            device.delete_texture(t);
+        }
+    }
+}
+
+pub struct VertexDataTextures {
+    prim_header_f_texture: VertexDataTexture<PrimitiveHeaderF>,
+    prim_header_i_texture: VertexDataTexture<PrimitiveHeaderI>,
+    transforms_texture: VertexDataTexture<TransformData>,
+    render_task_texture: VertexDataTexture<RenderTaskData>,
+}
+
+impl VertexDataTextures {
+    pub fn new() -> Self {
+        VertexDataTextures {
+            prim_header_f_texture: VertexDataTexture::new(api::ImageFormat::RGBAF32),
+            prim_header_i_texture: VertexDataTexture::new(api::ImageFormat::RGBAI32),
+            transforms_texture: VertexDataTexture::new(api::ImageFormat::RGBAF32),
+            render_task_texture: VertexDataTexture::new(api::ImageFormat::RGBAF32),
+        }
+    }
+
+    pub fn update(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool, frame: &mut Frame) {
+        let mut texture_uploader = device.upload_texture(pbo_pool);
+        self.prim_header_f_texture.update(
+            device,
+            &mut texture_uploader,
+            &mut frame.prim_headers.headers_float,
+        );
+        self.prim_header_i_texture.update(
+            device,
+            &mut texture_uploader,
+            &mut frame.prim_headers.headers_int,
+        );
+        self.transforms_texture
+            .update(device, &mut texture_uploader, &mut frame.transform_palette);
+        self.render_task_texture.update(
+            device,
+            &mut texture_uploader,
+            &mut frame.render_tasks.task_data,
+        );
+
+        // Flush and drop the texture uploader now, so that
+        // we can borrow the textures to bind them.
+        texture_uploader.flush(device);
+
+        device.bind_texture(
+            super::TextureSampler::PrimitiveHeadersF,
+            &self.prim_header_f_texture.texture(),
+            Swizzle::default(),
+        );
+        device.bind_texture(
+            super::TextureSampler::PrimitiveHeadersI,
+            &self.prim_header_i_texture.texture(),
+            Swizzle::default(),
+        );
+        device.bind_texture(
+            super::TextureSampler::TransformPalette,
+            &self.transforms_texture.texture(),
+            Swizzle::default(),
+        );
+        device.bind_texture(
+            super::TextureSampler::RenderTasks,
+            &self.render_task_texture.texture(),
+            Swizzle::default(),
+        );
+    }
+
+    pub fn size_in_bytes(&self) -> usize {
+        self.prim_header_f_texture.size_in_bytes()
+            + self.prim_header_i_texture.size_in_bytes()
+            + self.transforms_texture.size_in_bytes()
+            + self.render_task_texture.size_in_bytes()
+    }
+
+    pub fn deinit(self, device: &mut Device) {
+        self.transforms_texture.deinit(device);
+        self.prim_header_f_texture.deinit(device);
+        self.prim_header_i_texture.deinit(device);
+        self.render_task_texture.deinit(device);
+    }
+}
+
+pub struct RendererVAOs {
+    prim_vao: VAO,
+    blur_vao: VAO,
+    clip_rect_vao: VAO,
+    clip_box_shadow_vao: VAO,
+    clip_image_vao: VAO,
+    border_vao: VAO,
+    line_vao: VAO,
+    scale_vao: VAO,
+    fast_linear_gradient_vao: VAO,
+    linear_gradient_vao: VAO,
+    radial_gradient_vao: VAO,
+    conic_gradient_vao: VAO,
+    resolve_vao: VAO,
+    svg_filter_vao: VAO,
+    composite_vao: VAO,
+    clear_vao: VAO,
+    copy_vao: VAO,
+}
+
+impl RendererVAOs {
+    pub fn new(device: &mut Device, indexed_quads: Option<NonZeroUsize>) -> Self {
+        const QUAD_INDICES: [u16; 6] = [0, 1, 2, 2, 1, 3];
+        const QUAD_VERTICES: [[u8; 2]; 4] = [[0, 0], [0xFF, 0], [0, 0xFF], [0xFF, 0xFF]];
+
+        let instance_divisor = if indexed_quads.is_some() { 0 } else { 1 };
+        let prim_vao = device.create_vao(&desc::PRIM_INSTANCES, instance_divisor);
+
+        device.bind_vao(&prim_vao);
+        match indexed_quads {
+            Some(count) => {
+                assert!(count.get() < u16::MAX as usize);
+                let quad_indices = (0 .. count.get() as u16)
+                    .flat_map(|instance| QUAD_INDICES.iter().map(move |&index| instance * 4 + index))
+                    .collect::<Vec<_>>();
+                device.update_vao_indices(&prim_vao, &quad_indices, VertexUsageHint::Static);
+                let quad_vertices = (0 .. count.get() as u16)
+                    .flat_map(|_| QUAD_VERTICES.iter().cloned())
+                    .collect::<Vec<_>>();
+                device.update_vao_main_vertices(&prim_vao, &quad_vertices, VertexUsageHint::Static);
+            }
+            None => {
+                device.update_vao_indices(&prim_vao, &QUAD_INDICES, VertexUsageHint::Static);
+                device.update_vao_main_vertices(&prim_vao, &QUAD_VERTICES, VertexUsageHint::Static);
+            }
+        }
+
+        RendererVAOs {
+            blur_vao: device.create_vao_with_new_instances(&desc::BLUR, &prim_vao),
+            clip_rect_vao: device.create_vao_with_new_instances(&desc::CLIP_RECT, &prim_vao),
+            clip_box_shadow_vao: device
+                .create_vao_with_new_instances(&desc::CLIP_BOX_SHADOW, &prim_vao),
+            clip_image_vao: device.create_vao_with_new_instances(&desc::CLIP_IMAGE, &prim_vao),
+            border_vao: device.create_vao_with_new_instances(&desc::BORDER, &prim_vao),
+            scale_vao: device.create_vao_with_new_instances(&desc::SCALE, &prim_vao),
+            line_vao: device.create_vao_with_new_instances(&desc::LINE, &prim_vao),
+            fast_linear_gradient_vao: device.create_vao_with_new_instances(&desc::FAST_LINEAR_GRADIENT, &prim_vao),
+            linear_gradient_vao: device.create_vao_with_new_instances(&desc::LINEAR_GRADIENT, &prim_vao),
+            radial_gradient_vao: device.create_vao_with_new_instances(&desc::RADIAL_GRADIENT, &prim_vao),
+            conic_gradient_vao: device.create_vao_with_new_instances(&desc::CONIC_GRADIENT, &prim_vao),
+            resolve_vao: device.create_vao_with_new_instances(&desc::RESOLVE, &prim_vao),
+            svg_filter_vao: device.create_vao_with_new_instances(&desc::SVG_FILTER, &prim_vao),
+            composite_vao: device.create_vao_with_new_instances(&desc::COMPOSITE, &prim_vao),
+            clear_vao: device.create_vao_with_new_instances(&desc::CLEAR, &prim_vao),
+            copy_vao: device.create_vao_with_new_instances(&desc::COPY, &prim_vao),
+            prim_vao,
+        }
+    }
+
+    pub fn deinit(self, device: &mut Device) {
+        device.delete_vao(self.prim_vao);
+        device.delete_vao(self.resolve_vao);
+        device.delete_vao(self.clip_rect_vao);
+        device.delete_vao(self.clip_box_shadow_vao);
+        device.delete_vao(self.clip_image_vao);
+        device.delete_vao(self.fast_linear_gradient_vao);
+        device.delete_vao(self.linear_gradient_vao);
+        device.delete_vao(self.radial_gradient_vao);
+        device.delete_vao(self.conic_gradient_vao);
+        device.delete_vao(self.blur_vao);
+        device.delete_vao(self.line_vao);
+        device.delete_vao(self.border_vao);
+        device.delete_vao(self.scale_vao);
+        device.delete_vao(self.svg_filter_vao);
+        device.delete_vao(self.composite_vao);
+        device.delete_vao(self.clear_vao);
+        device.delete_vao(self.copy_vao);
+    }
+}
+
+impl ops::Index<VertexArrayKind> for RendererVAOs {
+    type Output = VAO;
+    fn index(&self, kind: VertexArrayKind) -> &VAO {
+        match kind {
+            VertexArrayKind::Primitive => &self.prim_vao,
+            VertexArrayKind::ClipImage => &self.clip_image_vao,
+            VertexArrayKind::ClipRect => &self.clip_rect_vao,
+            VertexArrayKind::ClipBoxShadow => &self.clip_box_shadow_vao,
+            VertexArrayKind::Blur => &self.blur_vao,
+            VertexArrayKind::VectorStencil | VertexArrayKind::VectorCover => unreachable!(),
+            VertexArrayKind::Border => &self.border_vao,
+            VertexArrayKind::Scale => &self.scale_vao,
+            VertexArrayKind::LineDecoration => &self.line_vao,
+            VertexArrayKind::FastLinearGradient => &self.fast_linear_gradient_vao,
+            VertexArrayKind::LinearGradient => &self.linear_gradient_vao,
+            VertexArrayKind::RadialGradient => &self.radial_gradient_vao,
+            VertexArrayKind::ConicGradient => &self.conic_gradient_vao,
+            VertexArrayKind::Resolve => &self.resolve_vao,
+            VertexArrayKind::SvgFilter => &self.svg_filter_vao,
+            VertexArrayKind::Composite => &self.composite_vao,
+            VertexArrayKind::Clear => &self.clear_vao,
+            VertexArrayKind::Copy => &self.copy_vao,
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/resource_cache.rs b/gfx/wr/webrender/src/resource_cache.rs
new file mode 100644
index 0000000000..b55e19ca8b
--- /dev/null
+++ b/gfx/wr/webrender/src/resource_cache.rs
@@ -0,0 +1,2307 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{BlobImageRequest, RasterizedBlobImage, ImageFormat, ImageDescriptorFlags};
+use api::{DebugFlags, FontInstanceKey, FontKey, FontTemplate, GlyphIndex};
+use api::{ExternalImageData, ExternalImageType, ExternalImageId, BlobImageResult};
+use api::{DirtyRect, GlyphDimensions, IdNamespace, DEFAULT_TILE_SIZE};
+use api::{ColorF, ImageData, ImageDescriptor, ImageKey, ImageRendering, TileSize};
+use api::{BlobImageHandler, BlobImageKey, VoidPtrToSizeFn};
+use api::units::*;
+use euclid::size2;
+use crate::{render_api::{ClearCache, AddFont, ResourceUpdate, MemoryReport}, util::WeakTable};
+use crate::image_tiling::{compute_tile_size, compute_tile_range};
+#[cfg(feature = "capture")]
+use crate::capture::ExternalCaptureImage;
+#[cfg(feature = "replay")]
+use crate::capture::PlainExternalImage;
+#[cfg(any(feature = "replay", feature = "png", feature="capture"))]
+use crate::capture::CaptureConfig;
+use crate::composite::{NativeSurfaceId, NativeSurfaceOperation, NativeTileId, NativeSurfaceOperationDetails};
+use crate::device::TextureFilter;
+use crate::glyph_cache::{GlyphCache, CachedGlyphInfo};
+use crate::glyph_cache::GlyphCacheEntry;
+use glyph_rasterizer::{GLYPH_FLASHING, FontInstance, GlyphFormat, GlyphKey, GlyphRasterizer, GlyphRasterJob};
+use glyph_rasterizer::{SharedFontResources, BaseFontInstance};
+use crate::gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
+use crate::gpu_types::UvRectKind;
+use crate::internal_types::{
+    CacheTextureId, FastHashMap, FastHashSet, TextureSource, ResourceUpdateList,
+    FrameId, FrameStamp,
+};
+use crate::profiler::{self, TransactionProfile, bytes_to_mb};
+use crate::render_task_graph::{RenderTaskId, RenderTaskGraphBuilder};
+use crate::render_task_cache::{RenderTaskCache, RenderTaskCacheKey, RenderTaskParent};
+use crate::render_task_cache::{RenderTaskCacheEntry, RenderTaskCacheEntryHandle};
+use crate::renderer::GpuBufferBuilder;
+use crate::surface::SurfaceBuilder;
+use euclid::point2;
+use smallvec::SmallVec;
+use std::collections::hash_map::Entry::{self, Occupied, Vacant};
+use std::collections::hash_map::{Iter, IterMut};
+use std::collections::VecDeque;
+use std::{cmp, mem};
+use std::fmt::Debug;
+use std::hash::Hash;
+use std::os::raw::c_void;
+#[cfg(any(feature = "capture", feature = "replay"))]
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::u32;
+use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction, TargetShader};
+use crate::picture_textures::PictureTextures;
+
+// Counter for generating unique native surface ids
+static NEXT_NATIVE_SURFACE_ID: AtomicUsize = AtomicUsize::new(0);
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GlyphFetchResult {
+    pub index_in_text_run: i32,
+    pub uv_rect_address: GpuCacheAddress,
+    pub offset: DevicePoint,
+    pub size: DeviceIntSize,
+    pub scale: f32,
+}
+
+// These coordinates are always in texels.
+// They are converted to normalized ST
+// values in the vertex shader. The reason
+// for this is that the texture may change
+// dimensions (e.g. the pages in a texture
+// atlas can grow). When this happens, by
+// storing the coordinates as texel values
+// we don't need to go through and update
+// various CPU-side structures.
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CacheItem {
+    pub texture_id: TextureSource,
+    pub uv_rect_handle: GpuCacheHandle,
+    pub uv_rect: DeviceIntRect,
+    pub user_data: [f32; 4],
+}
+
+impl CacheItem {
+    pub fn invalid() -> Self {
+        CacheItem {
+            texture_id: TextureSource::Invalid,
+            uv_rect_handle: GpuCacheHandle::new(),
+            uv_rect: DeviceIntRect::zero(),
+            user_data: [0.0; 4],
+        }
+    }
+
+    pub fn is_valid(&self) -> bool {
+        self.texture_id != TextureSource::Invalid
+    }
+}
+
+/// Represents the backing store of an image in the cache.
+/// This storage can take several forms.
+#[derive(Clone, Debug)]
+pub enum CachedImageData {
+    /// A simple series of bytes, provided by the embedding and owned by WebRender.
+    /// The format is stored out-of-band, currently in ImageDescriptor.
+    Raw(Arc<Vec<u8>>),
+    /// An series of commands that can be rasterized into an image via an
+    /// embedding-provided callback.
+    ///
+    /// The commands are stored elsewhere and this variant is used as a placeholder.
+    Blob,
+    /// An image owned by the embedding, and referenced by WebRender. This may
+    /// take the form of a texture or a heap-allocated buffer.
+    External(ExternalImageData),
+}
+
+impl From<ImageData> for CachedImageData {
+    fn from(img_data: ImageData) -> Self {
+        match img_data {
+            ImageData::Raw(data) => CachedImageData::Raw(data),
+            ImageData::External(data) => CachedImageData::External(data),
+        }
+    }
+}
+
+impl CachedImageData {
+    /// Returns true if this represents a blob.
+    #[inline]
+    pub fn is_blob(&self) -> bool {
+        match *self {
+            CachedImageData::Blob => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true if this variant of CachedImageData should go through the texture
+    /// cache.
+    #[inline]
+    pub fn uses_texture_cache(&self) -> bool {
+        match *self {
+            CachedImageData::External(ref ext_data) => match ext_data.image_type {
+                ExternalImageType::TextureHandle(_) => false,
+                ExternalImageType::Buffer => true,
+            },
+            CachedImageData::Blob => true,
+            CachedImageData::Raw(_) => true,
+        }
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ImageProperties {
+    pub descriptor: ImageDescriptor,
+    pub external_image: Option<ExternalImageData>,
+    pub tiling: Option<TileSize>,
+    // Potentially a subset of the image's total rectangle. This rectangle is what
+    // we map to the (layout space) display item bounds.
+    pub visible_rect: DeviceIntRect,
+}
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+enum State {
+    Idle,
+    AddResources,
+    QueryResources,
+}
+
+/// Post scene building state.
+type RasterizedBlob = FastHashMap<TileOffset, RasterizedBlobImage>;
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct ImageGeneration(pub u32);
+
+impl ImageGeneration {
+    pub const INVALID: ImageGeneration = ImageGeneration(u32::MAX);
+}
+
+struct ImageResource {
+    data: CachedImageData,
+    descriptor: ImageDescriptor,
+    tiling: Option<TileSize>,
+    /// This is used to express images that are virtually very large
+    /// but with only a visible sub-set that is valid at a given time.
+    visible_rect: DeviceIntRect,
+    generation: ImageGeneration,
+}
+
+#[derive(Clone, Debug)]
+pub struct ImageTiling {
+    pub image_size: DeviceIntSize,
+    pub tile_size: TileSize,
+}
+
+#[derive(Default)]
+struct ImageTemplates {
+    images: FastHashMap<ImageKey, ImageResource>,
+}
+
+impl ImageTemplates {
+    fn insert(&mut self, key: ImageKey, resource: ImageResource) {
+        self.images.insert(key, resource);
+    }
+
+    fn remove(&mut self, key: ImageKey) -> Option<ImageResource> {
+        self.images.remove(&key)
+    }
+
+    fn get(&self, key: ImageKey) -> Option<&ImageResource> {
+        self.images.get(&key)
+    }
+
+    fn get_mut(&mut self, key: ImageKey) -> Option<&mut ImageResource> {
+        self.images.get_mut(&key)
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct CachedImageInfo {
+    texture_cache_handle: TextureCacheHandle,
+    dirty_rect: ImageDirtyRect,
+    manual_eviction: bool,
+}
+
+impl CachedImageInfo {
+    fn mark_unused(&mut self, texture_cache: &mut TextureCache) {
+        texture_cache.evict_handle(&self.texture_cache_handle);
+        self.manual_eviction = false;
+    }
+}
+
+#[cfg(debug_assertions)]
+impl Drop for CachedImageInfo {
+    fn drop(&mut self) {
+        debug_assert!(!self.manual_eviction, "Manual eviction requires cleanup");
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ResourceClassCache<K: Hash + Eq, V, U: Default> {
+    resources: FastHashMap<K, V>,
+    pub user_data: U,
+}
+
+impl<K, V, U> ResourceClassCache<K, V, U>
+where
+    K: Clone + Hash + Eq + Debug,
+    U: Default,
+{
+    pub fn new() -> Self {
+        ResourceClassCache {
+            resources: FastHashMap::default(),
+            user_data: Default::default(),
+        }
+    }
+
+    pub fn get(&self, key: &K) -> &V {
+        self.resources.get(key)
+            .expect("Didn't find a cached resource with that ID!")
+    }
+
+    pub fn try_get(&self, key: &K) -> Option<&V> {
+        self.resources.get(key)
+    }
+
+    pub fn insert(&mut self, key: K, value: V) {
+        self.resources.insert(key, value);
+    }
+
+    pub fn remove(&mut self, key: &K) -> Option<V> {
+        self.resources.remove(key)
+    }
+
+    pub fn get_mut(&mut self, key: &K) -> &mut V {
+        self.resources.get_mut(key)
+            .expect("Didn't find a cached resource with that ID!")
+    }
+
+    pub fn try_get_mut(&mut self, key: &K) -> Option<&mut V> {
+        self.resources.get_mut(key)
+    }
+
+    pub fn entry(&mut self, key: K) -> Entry<K, V> {
+        self.resources.entry(key)
+    }
+
+    pub fn iter(&self) -> Iter<K, V> {
+        self.resources.iter()
+    }
+
+    pub fn iter_mut(&mut self) -> IterMut<K, V> {
+        self.resources.iter_mut()
+    }
+
+    pub fn is_empty(&mut self) -> bool {
+        self.resources.is_empty()
+    }
+
+    pub fn clear(&mut self) {
+        self.resources.clear();
+    }
+
+    pub fn retain<F>(&mut self, f: F)
+    where
+        F: FnMut(&K, &mut V) -> bool,
+    {
+        self.resources.retain(f);
+    }
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct CachedImageKey {
+    pub rendering: ImageRendering,
+    pub tile: Option<TileOffset>,
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ImageRequest {
+    pub key: ImageKey,
+    pub rendering: ImageRendering,
+    pub tile: Option<TileOffset>,
+}
+
+impl ImageRequest {
+    pub fn with_tile(&self, offset: TileOffset) -> Self {
+        ImageRequest {
+            key: self.key,
+            rendering: self.rendering,
+            tile: Some(offset),
+        }
+    }
+
+    pub fn is_untiled_auto(&self) -> bool {
+        self.tile.is_none() && self.rendering == ImageRendering::Auto
+    }
+}
+
+impl Into<BlobImageRequest> for ImageRequest {
+    fn into(self) -> BlobImageRequest {
+        BlobImageRequest {
+            key: BlobImageKey(self.key),
+            tile: self.tile.unwrap(),
+        }
+    }
+}
+
+impl Into<CachedImageKey> for ImageRequest {
+    fn into(self) -> CachedImageKey {
+        CachedImageKey {
+            rendering: self.rendering,
+            tile: self.tile,
+        }
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Clone, Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum ImageCacheError {
+    OverLimitSize,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+enum ImageResult {
+    UntiledAuto(CachedImageInfo),
+    Multi(ResourceClassCache<CachedImageKey, CachedImageInfo, ()>),
+    Err(ImageCacheError),
+}
+
+impl ImageResult {
+    /// Releases any texture cache entries held alive by this ImageResult.
+    fn drop_from_cache(&mut self, texture_cache: &mut TextureCache) {
+        match *self {
+            ImageResult::UntiledAuto(ref mut entry) => {
+                entry.mark_unused(texture_cache);
+            },
+            ImageResult::Multi(ref mut entries) => {
+                for entry in entries.resources.values_mut() {
+                    entry.mark_unused(texture_cache);
+                }
+            },
+            ImageResult::Err(_) => {},
+        }
+    }
+}
+
+type ImageCache = ResourceClassCache<ImageKey, ImageResult, ()>;
+
+struct Resources {
+    fonts: SharedFontResources,
+    image_templates: ImageTemplates,
+    // We keep a set of Weak references to the fonts so that we're able to include them in memory
+    // reports even if only the OS is holding on to the Vec<u8>. PtrWeakHashSet will periodically
+    // drop any references that have gone dead.
+    weak_fonts: WeakTable
+}
+
+// We only use this to report glyph dimensions to the user of the API, so using
+// the font instance key should be enough. If we start using it to cache dimensions
+// for internal font instances we should change the hash key accordingly.
+pub type GlyphDimensionsCache = FastHashMap<(FontInstanceKey, GlyphIndex), Option<GlyphDimensions>>;
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct BlobImageRasterizerEpoch(usize);
+
+/// Internal information about allocated render targets in the pool
+struct RenderTarget {
+    size: DeviceIntSize,
+    format: ImageFormat,
+    texture_id: CacheTextureId,
+    /// If true, this is currently leant out, and not available to other passes
+    is_active: bool,
+    last_frame_used: FrameId,
+}
+
+impl RenderTarget {
+    fn size_in_bytes(&self) -> usize {
+        let bpp = self.format.bytes_per_pixel() as usize;
+        (self.size.width * self.size.height) as usize * bpp
+    }
+
+    /// Returns true if this texture was used within `threshold` frames of
+    /// the current frame.
+    pub fn used_recently(&self, current_frame_id: FrameId, threshold: usize) -> bool {
+        self.last_frame_used + threshold >= current_frame_id
+    }
+}
+
+/// High-level container for resources managed by the `RenderBackend`.
+///
+/// This includes a variety of things, including images, fonts, and glyphs,
+/// which may be stored as memory buffers, GPU textures, or handles to resources
+/// managed by the OS or other parts of WebRender.
+pub struct ResourceCache {
+    cached_glyphs: GlyphCache,
+    cached_images: ImageCache,
+    cached_render_tasks: RenderTaskCache,
+
+    resources: Resources,
+    state: State,
+    current_frame_id: FrameId,
+
+    #[cfg(feature = "capture")]
+    /// Used for capture sequences. If the resource cache is updated, then we
+    /// mark it as dirty. When the next frame is captured in the sequence, we
+    /// dump the state of the resource cache.
+    capture_dirty: bool,
+
+    pub texture_cache: TextureCache,
+    pub picture_textures: PictureTextures,
+
+    /// TODO(gw): We should expire (parts of) this cache semi-regularly!
+    cached_glyph_dimensions: GlyphDimensionsCache,
+    glyph_rasterizer: GlyphRasterizer,
+
+    /// The set of images that aren't present or valid in the texture cache,
+    /// and need to be rasterized and/or uploaded this frame. This includes
+    /// both blobs and regular images.
+    pending_image_requests: FastHashSet<ImageRequest>,
+
+    rasterized_blob_images: FastHashMap<BlobImageKey, RasterizedBlob>,
+
+    /// A log of the last three frames worth of deleted image keys kept
+    /// for debugging purposes.
+    deleted_blob_keys: VecDeque<Vec<BlobImageKey>>,
+
+    /// We keep one around to be able to call clear_namespace
+    /// after the api object is deleted. For most purposes the
+    /// api object's blob handler should be used instead.
+    blob_image_handler: Option<Box<dyn BlobImageHandler>>,
+
+    /// A list of queued compositor surface updates to apply next frame.
+    pending_native_surface_updates: Vec<NativeSurfaceOperation>,
+
+    image_templates_memory: usize,
+    font_templates_memory: usize,
+
+    /// A pool of render targets for use by the render task graph
+    render_target_pool: Vec<RenderTarget>,
+}
+
+impl ResourceCache {
+    pub fn new(
+        texture_cache: TextureCache,
+        picture_textures: PictureTextures,
+        glyph_rasterizer: GlyphRasterizer,
+        cached_glyphs: GlyphCache,
+        fonts: SharedFontResources,
+        blob_image_handler: Option<Box<dyn BlobImageHandler>>,
+    ) -> Self {
+        ResourceCache {
+            cached_glyphs,
+            cached_images: ResourceClassCache::new(),
+            cached_render_tasks: RenderTaskCache::new(),
+            resources: Resources {
+                fonts,
+                image_templates: ImageTemplates::default(),
+                weak_fonts: WeakTable::new(),
+            },
+            cached_glyph_dimensions: FastHashMap::default(),
+            texture_cache,
+            picture_textures,
+            state: State::Idle,
+            current_frame_id: FrameId::INVALID,
+            pending_image_requests: FastHashSet::default(),
+            glyph_rasterizer,
+            rasterized_blob_images: FastHashMap::default(),
+            // We want to keep three frames worth of delete blob keys
+            deleted_blob_keys: vec![Vec::new(), Vec::new(), Vec::new()].into(),
+            blob_image_handler,
+            pending_native_surface_updates: Vec::new(),
+            #[cfg(feature = "capture")]
+            capture_dirty: true,
+            image_templates_memory: 0,
+            font_templates_memory: 0,
+            render_target_pool: Vec::new(),
+        }
+    }
+
+    /// Construct a resource cache for use in unit tests.
+    #[cfg(test)]
+    pub fn new_for_testing() -> Self {
+        use rayon::ThreadPoolBuilder;
+
+        let texture_cache = TextureCache::new_for_testing(
+            4096,
+            ImageFormat::RGBA8,
+        );
+        let workers = Arc::new(ThreadPoolBuilder::new().build().unwrap());
+        let glyph_rasterizer = GlyphRasterizer::new(workers, true);
+        let cached_glyphs = GlyphCache::new();
+        let fonts = SharedFontResources::new(IdNamespace(0));
+        let picture_textures = PictureTextures::new(
+            crate::picture::TILE_SIZE_DEFAULT,
+            TextureFilter::Nearest,
+        );
+
+        ResourceCache::new(
+            texture_cache,
+            picture_textures,
+            glyph_rasterizer,
+            cached_glyphs,
+            fonts,
+            None,
+        )
+    }
+
+    pub fn max_texture_size(&self) -> i32 {
+        self.texture_cache.max_texture_size()
+    }
+
+    /// Maximum texture size before we consider it preferrable to break the texture
+    /// into tiles.
+    pub fn tiling_threshold(&self) -> i32 {
+        self.texture_cache.tiling_threshold()
+    }
+
+    pub fn enable_multithreading(&mut self, enable: bool) {
+        self.glyph_rasterizer.enable_multithreading(enable);
+    }
+
+    fn should_tile(limit: i32, descriptor: &ImageDescriptor, data: &CachedImageData) -> bool {
+        let size_check = descriptor.size.width > limit || descriptor.size.height > limit;
+        match *data {
+            CachedImageData::Raw(_) | CachedImageData::Blob => size_check,
+            CachedImageData::External(info) => {
+                // External handles already represent existing textures so it does
+                // not make sense to tile them into smaller ones.
+                info.image_type == ExternalImageType::Buffer && size_check
+            }
+        }
+    }
+
+    // Request the texture cache item for a cacheable render
+    // task. If the item is already cached, the texture cache
+    // handle will be returned. Otherwise, the user supplied
+    // closure will be invoked to generate the render task
+    // chain that is required to draw this task.
+    pub fn request_render_task<F>(
+        &mut self,
+        key: RenderTaskCacheKey,
+        gpu_cache: &mut GpuCache,
+        gpu_buffer_builder: &mut GpuBufferBuilder,
+        rg_builder: &mut RenderTaskGraphBuilder,
+        user_data: Option<[f32; 4]>,
+        is_opaque: bool,
+        parent: RenderTaskParent,
+        surface_builder: &mut SurfaceBuilder,
+        f: F,
+    ) -> RenderTaskId
+    where
+        F: FnOnce(&mut RenderTaskGraphBuilder, &mut GpuBufferBuilder) -> RenderTaskId,
+    {
+        self.cached_render_tasks.request_render_task(
+            key,
+            &mut self.texture_cache,
+            gpu_cache,
+            gpu_buffer_builder,
+            rg_builder,
+            user_data,
+            is_opaque,
+            parent,
+            surface_builder,
+            |render_graph, gpu_buffer_builder| Ok(f(render_graph, gpu_buffer_builder))
+        ).expect("Failed to request a render task from the resource cache!")
+    }
+
+    pub fn post_scene_building_update(
+        &mut self,
+        updates: Vec<ResourceUpdate>,
+        profile: &mut TransactionProfile,
+    ) {
+        // TODO, there is potential for optimization here, by processing updates in
+        // bulk rather than one by one (for example by sorting allocations by size or
+        // in a way that reduces fragmentation in the atlas).
+        #[cfg(feature = "capture")]
+        match updates.is_empty() {
+            false => self.capture_dirty = true,
+            _ => {},
+        }
+
+        for update in updates {
+            match update {
+                ResourceUpdate::AddImage(img) => {
+                    if let ImageData::Raw(ref bytes) = img.data {
+                        self.image_templates_memory += bytes.len();
+                        profile.set(profiler::IMAGE_TEMPLATES_MEM, bytes_to_mb(self.image_templates_memory));
+                    }
+                    self.add_image_template(
+                        img.key,
+                        img.descriptor,
+                        img.data.into(),
+                        &img.descriptor.size.into(),
+                        img.tiling,
+                    );
+                    profile.set(profiler::IMAGE_TEMPLATES, self.resources.image_templates.images.len());
+                }
+                ResourceUpdate::UpdateImage(img) => {
+                    self.update_image_template(img.key, img.descriptor, img.data.into(), &img.dirty_rect);
+                }
+                ResourceUpdate::AddBlobImage(img) => {
+                    self.add_image_template(
+                        img.key.as_image(),
+                        img.descriptor,
+                        CachedImageData::Blob,
+                        &img.visible_rect,
+                        Some(img.tile_size),
+                    );
+                }
+                ResourceUpdate::UpdateBlobImage(img) => {
+                    self.update_image_template(
+                        img.key.as_image(),
+                        img.descriptor,
+                        CachedImageData::Blob,
+                        &to_image_dirty_rect(
+                            &img.dirty_rect
+                        ),
+                    );
+                    self.discard_tiles_outside_visible_area(img.key, &img.visible_rect); // TODO: remove?
+                    self.set_image_visible_rect(img.key.as_image(), &img.visible_rect);
+                }
+                ResourceUpdate::DeleteImage(img) => {
+                    self.delete_image_template(img);
+                    profile.set(profiler::IMAGE_TEMPLATES, self.resources.image_templates.images.len());
+                    profile.set(profiler::IMAGE_TEMPLATES_MEM, bytes_to_mb(self.image_templates_memory));
+                }
+                ResourceUpdate::DeleteBlobImage(img) => {
+                    self.delete_image_template(img.as_image());
+                }
+                ResourceUpdate::DeleteFont(font) => {
+                    if let Some(shared_key) = self.resources.fonts.font_keys.delete_key(&font) {
+                        self.delete_font_template(shared_key);
+                        if let Some(ref mut handler) = &mut self.blob_image_handler {
+                            handler.delete_font(shared_key);
+                        }
+                        profile.set(profiler::FONT_TEMPLATES, self.resources.fonts.templates.len());
+                        profile.set(profiler::FONT_TEMPLATES_MEM, bytes_to_mb(self.font_templates_memory));
+                    }
+                }
+                ResourceUpdate::DeleteFontInstance(font) => {
+                    if let Some(shared_key) = self.resources.fonts.instance_keys.delete_key(&font) {
+                        self.delete_font_instance(shared_key);
+                    }
+                    if let Some(ref mut handler) = &mut self.blob_image_handler {
+                        handler.delete_font_instance(font);
+                    }
+                }
+                ResourceUpdate::SetBlobImageVisibleArea(key, area) => {
+                    self.discard_tiles_outside_visible_area(key, &area);
+                    self.set_image_visible_rect(key.as_image(), &area);
+                }
+                ResourceUpdate::AddFont(font) => {
+                    // The shared key was already added in ApiResources, but the first time it is
+                    // seen on the backend we still need to do some extra initialization here.
+                    let (key, template) = match font {
+                        AddFont::Raw(key, bytes, index) => {
+                            (key, FontTemplate::Raw(bytes, index))
+                        }
+                        AddFont::Native(key, native_font_handle) => {
+                            (key, FontTemplate::Native(native_font_handle))
+                        }
+                    };
+                    let shared_key = self.resources.fonts.font_keys.map_key(&key);
+                    if !self.glyph_rasterizer.has_font(shared_key) {
+                        self.add_font_template(shared_key, template);
+                        profile.set(profiler::FONT_TEMPLATES, self.resources.fonts.templates.len());
+                        profile.set(profiler::FONT_TEMPLATES_MEM, bytes_to_mb(self.font_templates_memory));
+                    }
+                }
+                ResourceUpdate::AddFontInstance(..) => {
+                    // Already added in ApiResources.
+                }
+            }
+        }
+    }
+
+    pub fn add_rasterized_blob_images(
+        &mut self,
+        images: Vec<(BlobImageRequest, BlobImageResult)>,
+        profile: &mut TransactionProfile,
+    ) {
+        for (request, result) in images {
+            let data = match result {
+                Ok(data) => data,
+                Err(..) => {
+                    warn!("Failed to rasterize a blob image");
+                    continue;
+                }
+            };
+
+            profile.add(profiler::RASTERIZED_BLOBS_PX, data.rasterized_rect.area());
+
+            // First make sure we have an entry for this key (using a placeholder
+            // if need be).
+            let tiles = self.rasterized_blob_images.entry(request.key).or_insert_with(
+                || { RasterizedBlob::default() }
+            );
+
+            tiles.insert(request.tile, data);
+
+            match self.cached_images.try_get_mut(&request.key.as_image()) {
+                Some(&mut ImageResult::Multi(ref mut entries)) => {
+                    let cached_key = CachedImageKey {
+                        rendering: ImageRendering::Auto, // TODO(nical)
+                        tile: Some(request.tile),
+                    };
+                    if let Some(entry) = entries.try_get_mut(&cached_key) {
+                        entry.dirty_rect = DirtyRect::All;
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+
+    pub fn add_font_template(&mut self, font_key: FontKey, template: FontTemplate) {
+        // Push the new font to the font renderer, and also store
+        // it locally for glyph metric requests.
+        if let FontTemplate::Raw(ref data, _) = template {
+            self.resources.weak_fonts.insert(Arc::downgrade(data));
+            self.font_templates_memory += data.len();
+        }
+        self.glyph_rasterizer.add_font(font_key, template.clone());
+        self.resources.fonts.templates.add_font(font_key, template);
+    }
+
+    pub fn delete_font_template(&mut self, font_key: FontKey) {
+        self.glyph_rasterizer.delete_font(font_key);
+        if let Some(FontTemplate::Raw(data, _)) = self.resources.fonts.templates.delete_font(&font_key) {
+            self.font_templates_memory -= data.len();
+        }
+        self.cached_glyphs.delete_fonts(&[font_key]);
+    }
+
+    pub fn delete_font_instance(&mut self, instance_key: FontInstanceKey) {
+        self.resources.fonts.instances.delete_font_instance(instance_key);
+    }
+
+    pub fn get_font_instance(&self, instance_key: FontInstanceKey) -> Option<Arc<BaseFontInstance>> {
+        self.resources.fonts.instances.get_font_instance(instance_key)
+    }
+
+    pub fn get_fonts(&self) -> SharedFontResources {
+        self.resources.fonts.clone()
+    }
+
+    pub fn add_image_template(
+        &mut self,
+        image_key: ImageKey,
+        descriptor: ImageDescriptor,
+        data: CachedImageData,
+        visible_rect: &DeviceIntRect,
+        mut tiling: Option<TileSize>,
+    ) {
+        if let Some(ref mut tile_size) = tiling {
+            // Sanitize the value since it can be set by a pref.
+            *tile_size = (*tile_size).max(16).min(2048);
+        }
+
+        if tiling.is_none() && Self::should_tile(self.tiling_threshold(), &descriptor, &data) {
+            // We aren't going to be able to upload a texture this big, so tile it, even
+            // if tiling was not requested.
+            tiling = Some(DEFAULT_TILE_SIZE);
+        }
+
+        let resource = ImageResource {
+            descriptor,
+            data,
+            tiling,
+            visible_rect: *visible_rect,
+            generation: ImageGeneration(0),
+        };
+
+        self.resources.image_templates.insert(image_key, resource);
+    }
+
+    pub fn update_image_template(
+        &mut self,
+        image_key: ImageKey,
+        descriptor: ImageDescriptor,
+        data: CachedImageData,
+        dirty_rect: &ImageDirtyRect,
+    ) {
+        let tiling_threshold = self.tiling_threshold();
+        let image = match self.resources.image_templates.get_mut(image_key) {
+            Some(res) => res,
+            None => panic!("Attempt to update non-existent image"),
+        };
+
+        let mut tiling = image.tiling;
+        if tiling.is_none() && Self::should_tile(tiling_threshold, &descriptor, &data) {
+            tiling = Some(DEFAULT_TILE_SIZE);
+        }
+
+        // Each cache entry stores its own copy of the image's dirty rect. This allows them to be
+        // updated independently.
+        match self.cached_images.try_get_mut(&image_key) {
+            Some(&mut ImageResult::UntiledAuto(ref mut entry)) => {
+                entry.dirty_rect = entry.dirty_rect.union(dirty_rect);
+            }
+            Some(&mut ImageResult::Multi(ref mut entries)) => {
+                for (key, entry) in entries.iter_mut() {
+                    // We want the dirty rect relative to the tile and not the whole image.
+                    let local_dirty_rect = match (tiling, key.tile) {
+                        (Some(tile_size), Some(tile)) => {
+                            dirty_rect.map(|mut rect|{
+                                let tile_offset = DeviceIntPoint::new(
+                                    tile.x as i32,
+                                    tile.y as i32,
+                                ) * tile_size as i32;
+                                rect = rect.translate(-tile_offset.to_vector());
+
+                                let tile_rect = compute_tile_size(
+                                    &descriptor.size.into(),
+                                    tile_size,
+                                    tile,
+                                ).into();
+
+                                rect.intersection(&tile_rect).unwrap_or_else(DeviceIntRect::zero)
+                            })
+                        }
+                        (None, Some(..)) => DirtyRect::All,
+                        _ => *dirty_rect,
+                    };
+                    entry.dirty_rect = entry.dirty_rect.union(&local_dirty_rect);
+                }
+            }
+            _ => {}
+        }
+
+        if image.descriptor.format != descriptor.format {
+            // could be a stronger warning/error?
+            trace!("Format change {:?} -> {:?}", image.descriptor.format, descriptor.format);
+        }
+        *image = ImageResource {
+            descriptor,
+            data,
+            tiling,
+            visible_rect: descriptor.size.into(),
+            generation: ImageGeneration(image.generation.0 + 1),
+        };
+    }
+
+    pub fn delete_image_template(&mut self, image_key: ImageKey) {
+        // Remove the template.
+        let value = self.resources.image_templates.remove(image_key);
+
+        // Release the corresponding texture cache entry, if any.
+        if let Some(mut cached) = self.cached_images.remove(&image_key) {
+            cached.drop_from_cache(&mut self.texture_cache);
+        }
+
+        match value {
+            Some(image) => if image.data.is_blob() {
+                if let CachedImageData::Raw(data) = image.data {
+                    self.image_templates_memory -= data.len();
+                }
+
+                let blob_key = BlobImageKey(image_key);
+                self.deleted_blob_keys.back_mut().unwrap().push(blob_key);
+                self.rasterized_blob_images.remove(&blob_key);
+            },
+            None => {
+                warn!("Delete the non-exist key");
+                debug!("key={:?}", image_key);
+            }
+        }
+    }
+
+    /// Return the current generation of an image template
+    pub fn get_image_generation(&self, key: ImageKey) -> ImageGeneration {
+        self.resources
+            .image_templates
+            .get(key)
+            .map_or(ImageGeneration::INVALID, |template| template.generation)
+    }
+
+    /// Requests an image to ensure that it will be in the texture cache this frame.
+    ///
+    /// returns the size in device pixel of the image or tile.
+    pub fn request_image(
+        &mut self,
+        request: ImageRequest,
+        gpu_cache: &mut GpuCache,
+    ) -> DeviceIntSize {
+        debug_assert_eq!(self.state, State::AddResources);
+
+        let template = match self.resources.image_templates.get(request.key) {
+            Some(template) => template,
+            None => {
+                warn!("ERROR: Trying to render deleted / non-existent key");
+                debug!("key={:?}", request.key);
+                return DeviceIntSize::zero();
+            }
+        };
+
+        let size = match request.tile {
+            Some(tile) => compute_tile_size(&template.visible_rect, template.tiling.unwrap(), tile),
+            None => template.descriptor.size,
+        };
+
+        // Images that don't use the texture cache can early out.
+        if !template.data.uses_texture_cache() {
+            return size;
+        }
+
+        let side_size =
+            template.tiling.map_or(cmp::max(template.descriptor.size.width, template.descriptor.size.height),
+                                   |tile_size| tile_size as i32);
+        if side_size > self.texture_cache.max_texture_size() {
+            // The image or tiling size is too big for hardware texture size.
+            warn!("Dropping image, image:(w:{},h:{}, tile:{}) is too big for hardware!",
+                  template.descriptor.size.width, template.descriptor.size.height, template.tiling.unwrap_or(0));
+            self.cached_images.insert(request.key, ImageResult::Err(ImageCacheError::OverLimitSize));
+            return DeviceIntSize::zero();
+        }
+
+        let storage = match self.cached_images.entry(request.key) {
+            Occupied(e) => {
+                // We might have an existing untiled entry, and need to insert
+                // a second entry. In such cases we need to move the old entry
+                // out first, replacing it with a dummy entry, and then creating
+                // the tiled/multi-entry variant.
+                let entry = e.into_mut();
+                if !request.is_untiled_auto() {
+                    let untiled_entry = match entry {
+                        &mut ImageResult::UntiledAuto(ref mut entry) => {
+                            Some(mem::replace(entry, CachedImageInfo {
+                                texture_cache_handle: TextureCacheHandle::invalid(),
+                                dirty_rect: DirtyRect::All,
+                                manual_eviction: false,
+                            }))
+                        }
+                        _ => None
+                    };
+
+                    if let Some(untiled_entry) = untiled_entry {
+                        let mut entries = ResourceClassCache::new();
+                        let untiled_key = CachedImageKey {
+                            rendering: ImageRendering::Auto,
+                            tile: None,
+                        };
+                        entries.insert(untiled_key, untiled_entry);
+                        *entry = ImageResult::Multi(entries);
+                    }
+                }
+                entry
+            }
+            Vacant(entry) => {
+                entry.insert(if request.is_untiled_auto() {
+                    ImageResult::UntiledAuto(CachedImageInfo {
+                        texture_cache_handle: TextureCacheHandle::invalid(),
+                        dirty_rect: DirtyRect::All,
+                        manual_eviction: false,
+                    })
+                } else {
+                    ImageResult::Multi(ResourceClassCache::new())
+                })
+            }
+        };
+
+        // If this image exists in the texture cache, *and* the dirty rect
+        // in the cache is empty, then it is valid to use as-is.
+        let entry = match *storage {
+            ImageResult::UntiledAuto(ref mut entry) => entry,
+            ImageResult::Multi(ref mut entries) => {
+                entries.entry(request.into())
+                    .or_insert(CachedImageInfo {
+                        texture_cache_handle: TextureCacheHandle::invalid(),
+                        dirty_rect: DirtyRect::All,
+                        manual_eviction: false,
+                    })
+            },
+            ImageResult::Err(_) => panic!("Errors should already have been handled"),
+        };
+
+        let needs_upload = self.texture_cache.request(&entry.texture_cache_handle, gpu_cache);
+
+        if !needs_upload && entry.dirty_rect.is_empty() {
+            return size;
+        }
+
+        if !self.pending_image_requests.insert(request) {
+            return size;
+        }
+
+        if template.data.is_blob() {
+            let request: BlobImageRequest = request.into();
+            let missing = match self.rasterized_blob_images.get(&request.key) {
+                Some(tiles) => !tiles.contains_key(&request.tile),
+                _ => true,
+            };
+
+            assert!(!missing);
+        }
+
+        size
+    }
+
+    fn discard_tiles_outside_visible_area(
+        &mut self,
+        key: BlobImageKey,
+        area: &DeviceIntRect
+    ) {
+        let tile_size = match self.resources.image_templates.get(key.as_image()) {
+            Some(template) => template.tiling.unwrap(),
+            None => {
+                //debug!("Missing image template (key={:?})!", key);
+                return;
+            }
+        };
+
+        let tiles = match self.rasterized_blob_images.get_mut(&key) {
+            Some(tiles) => tiles,
+            _ => { return; }
+        };
+
+        let tile_range = compute_tile_range(
+            &area,
+            tile_size,
+        );
+
+        tiles.retain(|tile, _| { tile_range.contains(*tile) });
+
+        let texture_cache = &mut self.texture_cache;
+        match self.cached_images.try_get_mut(&key.as_image()) {
+            Some(&mut ImageResult::Multi(ref mut entries)) => {
+                entries.retain(|key, entry| {
+                    if key.tile.is_none() || tile_range.contains(key.tile.unwrap()) {
+                        return true;
+                    }
+                    entry.mark_unused(texture_cache);
+                    return false;
+                });
+            }
+            _ => {}
+        }
+    }
+
+    fn set_image_visible_rect(&mut self, key: ImageKey, rect: &DeviceIntRect) {
+        if let Some(image) = self.resources.image_templates.get_mut(key) {
+            image.visible_rect = *rect;
+            image.descriptor.size = rect.size();
+        }
+    }
+
+    pub fn request_glyphs(
+        &mut self,
+        mut font: FontInstance,
+        glyph_keys: &[GlyphKey],
+        gpu_cache: &mut GpuCache,
+    ) {
+        debug_assert_eq!(self.state, State::AddResources);
+
+        self.glyph_rasterizer.prepare_font(&mut font);
+        let glyph_key_cache = self.cached_glyphs.insert_glyph_key_cache_for_font(&font);
+        let texture_cache = &mut self.texture_cache;
+        self.glyph_rasterizer.request_glyphs(
+            font,
+            glyph_keys,
+            |key| {
+                if let Some(entry) = glyph_key_cache.try_get(key) {
+                    match entry {
+                        GlyphCacheEntry::Cached(ref glyph) => {
+                            // Skip the glyph if it is already has a valid texture cache handle.
+                            if !texture_cache.request(&glyph.texture_cache_handle, gpu_cache) {
+                                return false;
+                            }
+                            // This case gets hit when we already rasterized the glyph, but the
+                            // glyph has been evicted from the texture cache. Just force it to
+                            // pending so it gets rematerialized.
+                        }
+                        // Otherwise, skip the entry if it is blank or pending.
+                        GlyphCacheEntry::Blank | GlyphCacheEntry::Pending => return false,
+                    }
+                };
+
+                glyph_key_cache.add_glyph(*key, GlyphCacheEntry::Pending);
+
+                true
+            }
+        );
+    }
+
+    pub fn pending_updates(&mut self) -> ResourceUpdateList {
+        ResourceUpdateList {
+            texture_updates: self.texture_cache.pending_updates(),
+            native_surface_updates: mem::replace(&mut self.pending_native_surface_updates, Vec::new()),
+        }
+    }
+
+    pub fn fetch_glyphs<F>(
+        &self,
+        mut font: FontInstance,
+        glyph_keys: &[GlyphKey],
+        fetch_buffer: &mut Vec<GlyphFetchResult>,
+        gpu_cache: &mut GpuCache,
+        mut f: F,
+    ) where
+        F: FnMut(TextureSource, GlyphFormat, &[GlyphFetchResult]),
+    {
+        debug_assert_eq!(self.state, State::QueryResources);
+
+        self.glyph_rasterizer.prepare_font(&mut font);
+        let glyph_key_cache = self.cached_glyphs.get_glyph_key_cache_for_font(&font);
+
+        let mut current_texture_id = TextureSource::Invalid;
+        let mut current_glyph_format = GlyphFormat::Subpixel;
+        debug_assert!(fetch_buffer.is_empty());
+
+        for (loop_index, key) in glyph_keys.iter().enumerate() {
+            let (cache_item, glyph_format) = match *glyph_key_cache.get(key) {
+                GlyphCacheEntry::Cached(ref glyph) => {
+                    (self.texture_cache.get(&glyph.texture_cache_handle), glyph.format)
+                }
+                GlyphCacheEntry::Blank | GlyphCacheEntry::Pending => continue,
+            };
+            if current_texture_id != cache_item.texture_id ||
+                current_glyph_format != glyph_format {
+                if !fetch_buffer.is_empty() {
+                    f(current_texture_id, current_glyph_format, fetch_buffer);
+                    fetch_buffer.clear();
+                }
+                current_texture_id = cache_item.texture_id;
+                current_glyph_format = glyph_format;
+            }
+            fetch_buffer.push(GlyphFetchResult {
+                index_in_text_run: loop_index as i32,
+                uv_rect_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
+                offset: DevicePoint::new(cache_item.user_data[0], cache_item.user_data[1]),
+                size: cache_item.uv_rect.size(),
+                scale: cache_item.user_data[2],
+            });
+        }
+
+        if !fetch_buffer.is_empty() {
+            f(current_texture_id, current_glyph_format, fetch_buffer);
+            fetch_buffer.clear();
+        }
+    }
+
+    pub fn map_font_key(&self, key: FontKey) -> FontKey {
+        self.resources.fonts.font_keys.map_key(&key)
+    }
+
+    pub fn map_font_instance_key(&self, key: FontInstanceKey) -> FontInstanceKey {
+        self.resources.fonts.instance_keys.map_key(&key)
+    }
+
+    pub fn get_glyph_dimensions(
+        &mut self,
+        font: &FontInstance,
+        glyph_index: GlyphIndex,
+    ) -> Option<GlyphDimensions> {
+        match self.cached_glyph_dimensions.entry((font.instance_key, glyph_index)) {
+            Occupied(entry) => *entry.get(),
+            Vacant(entry) => *entry.insert(
+                self.glyph_rasterizer
+                    .get_glyph_dimensions(font, glyph_index),
+            ),
+        }
+    }
+
+    pub fn get_glyph_index(&mut self, font_key: FontKey, ch: char) -> Option<u32> {
+        self.glyph_rasterizer.get_glyph_index(font_key, ch)
+    }
+
+    #[inline]
+    pub fn get_cached_image(&self, request: ImageRequest) -> Result<CacheItem, ()> {
+        debug_assert_eq!(self.state, State::QueryResources);
+        let image_info = self.get_image_info(request)?;
+        Ok(self.get_texture_cache_item(&image_info.texture_cache_handle))
+    }
+
+    pub fn get_cached_render_task(
+        &self,
+        handle: &RenderTaskCacheEntryHandle,
+    ) -> &RenderTaskCacheEntry {
+        self.cached_render_tasks.get_cache_entry(handle)
+    }
+
+    #[inline]
+    fn get_image_info(&self, request: ImageRequest) -> Result<&CachedImageInfo, ()> {
+        // TODO(Jerry): add a debug option to visualize the corresponding area for
+        // the Err() case of CacheItem.
+        match *self.cached_images.get(&request.key) {
+            ImageResult::UntiledAuto(ref image_info) => Ok(image_info),
+            ImageResult::Multi(ref entries) => Ok(entries.get(&request.into())),
+            ImageResult::Err(_) => Err(()),
+        }
+    }
+
+    #[inline]
+    pub fn get_texture_cache_item(&self, handle: &TextureCacheHandle) -> CacheItem {
+        self.texture_cache.get(handle)
+    }
+
+    pub fn get_image_properties(&self, image_key: ImageKey) -> Option<ImageProperties> {
+        let image_template = &self.resources.image_templates.get(image_key);
+
+        image_template.map(|image_template| {
+            let external_image = match image_template.data {
+                CachedImageData::External(ext_image) => match ext_image.image_type {
+                    ExternalImageType::TextureHandle(_) => Some(ext_image),
+                    // external buffer uses resource_cache.
+                    ExternalImageType::Buffer => None,
+                },
+                // raw and blob image are all using resource_cache.
+                CachedImageData::Raw(..) | CachedImageData::Blob => None,
+            };
+
+            ImageProperties {
+                descriptor: image_template.descriptor,
+                external_image,
+                tiling: image_template.tiling,
+                visible_rect: image_template.visible_rect,
+            }
+        })
+    }
+
+    pub fn begin_frame(&mut self, stamp: FrameStamp, gpu_cache: &mut GpuCache, profile: &mut TransactionProfile) {
+        profile_scope!("begin_frame");
+        debug_assert_eq!(self.state, State::Idle);
+        self.state = State::AddResources;
+        self.texture_cache.begin_frame(stamp, profile);
+        self.picture_textures.begin_frame(stamp, &mut self.texture_cache.pending_updates);
+
+        self.cached_glyphs.begin_frame(
+            stamp,
+            &mut self.texture_cache,
+            &mut self.glyph_rasterizer,
+        );
+        self.cached_render_tasks.begin_frame(&mut self.texture_cache);
+        self.current_frame_id = stamp.frame_id();
+
+        // pop the old frame and push a new one
+        self.deleted_blob_keys.pop_front();
+        self.deleted_blob_keys.push_back(Vec::new());
+
+        self.texture_cache.run_compaction(gpu_cache);
+    }
+
+    pub fn block_until_all_resources_added(
+        &mut self,
+        gpu_cache: &mut GpuCache,
+        profile: &mut TransactionProfile,
+    ) {
+        profile_scope!("block_until_all_resources_added");
+
+        debug_assert_eq!(self.state, State::AddResources);
+        self.state = State::QueryResources;
+
+        let cached_glyphs = &mut self.cached_glyphs;
+        let texture_cache = &mut self.texture_cache;
+
+        self.glyph_rasterizer.resolve_glyphs(
+            |job, can_use_r8_format| {
+                let GlyphRasterJob { font, key, result } = job;
+                let glyph_key_cache = cached_glyphs.get_glyph_key_cache_for_font_mut(&*font);
+                let glyph_info = match result {
+                    Err(_) => GlyphCacheEntry::Blank,
+                    Ok(ref glyph) if glyph.width == 0 || glyph.height == 0 => {
+                        GlyphCacheEntry::Blank
+                    }
+                    Ok(glyph) => {
+                        let mut texture_cache_handle = TextureCacheHandle::invalid();
+                        texture_cache.request(&texture_cache_handle, gpu_cache);
+                        texture_cache.update(
+                            &mut texture_cache_handle,
+                            ImageDescriptor {
+                                size: size2(glyph.width, glyph.height),
+                                stride: None,
+                                format: glyph.format.image_format(can_use_r8_format),
+                                flags: ImageDescriptorFlags::empty(),
+                                offset: 0,
+                            },
+                            TextureFilter::Linear,
+                            Some(CachedImageData::Raw(Arc::new(glyph.bytes))),
+                            [glyph.left, -glyph.top, glyph.scale, 0.0],
+                            DirtyRect::All,
+                            gpu_cache,
+                            Some(glyph_key_cache.eviction_notice()),
+                            UvRectKind::Rect,
+                            Eviction::Auto,
+                            TargetShader::Text,
+                        );
+                        GlyphCacheEntry::Cached(CachedGlyphInfo {
+                            texture_cache_handle,
+                            format: glyph.format,
+                        })
+                    }
+                };
+                glyph_key_cache.insert(key, glyph_info);
+            },
+            profile,
+        );
+
+        // Apply any updates of new / updated images (incl. blobs) to the texture cache.
+        self.update_texture_cache(gpu_cache);
+    }
+
+    fn update_texture_cache(&mut self, gpu_cache: &mut GpuCache) {
+        profile_scope!("update_texture_cache");
+        for request in self.pending_image_requests.drain() {
+            let image_template = self.resources.image_templates.get_mut(request.key).unwrap();
+            debug_assert!(image_template.data.uses_texture_cache());
+
+            let mut updates: SmallVec<[(CachedImageData, Option<DeviceIntRect>); 1]> = SmallVec::new();
+
+            match image_template.data {
+                CachedImageData::Raw(..) | CachedImageData::External(..) => {
+                    // Safe to clone here since the Raw image data is an
+                    // Arc, and the external image data is small.
+                    updates.push((image_template.data.clone(), None));
+                }
+                CachedImageData::Blob => {
+                    let blob_image = self.rasterized_blob_images.get_mut(&BlobImageKey(request.key)).unwrap();
+                    let img = &blob_image[&request.tile.unwrap()];
+                    updates.push((
+                        CachedImageData::Raw(Arc::clone(&img.data)),
+                        Some(img.rasterized_rect)
+                    ));
+                }
+            };
+
+            for (image_data, blob_rasterized_rect) in updates {
+                let entry = match *self.cached_images.get_mut(&request.key) {
+                    ImageResult::UntiledAuto(ref mut entry) => entry,
+                    ImageResult::Multi(ref mut entries) => entries.get_mut(&request.into()),
+                    ImageResult::Err(_) => panic!("Update requested for invalid entry")
+                };
+
+                let mut descriptor = image_template.descriptor.clone();
+                let mut dirty_rect = entry.dirty_rect.replace_with_empty();
+
+                if let Some(tile) = request.tile {
+                    let tile_size = image_template.tiling.unwrap();
+                    let clipped_tile_size = compute_tile_size(&image_template.visible_rect, tile_size, tile);
+                    // The tiled image could be stored on the CPU as one large image or be
+                    // already broken up into tiles. This affects the way we compute the stride
+                    // and offset.
+                    let tiled_on_cpu = image_template.data.is_blob();
+                    if !tiled_on_cpu {
+                        // we don't expect to have partial tiles at the top and left of non-blob
+                        // images.
+                        debug_assert_eq!(image_template.visible_rect.min, point2(0, 0));
+                        let bpp = descriptor.format.bytes_per_pixel();
+                        let stride = descriptor.compute_stride();
+                        descriptor.stride = Some(stride);
+                        descriptor.offset +=
+                            tile.y as i32 * tile_size as i32 * stride +
+                            tile.x as i32 * tile_size as i32 * bpp;
+                    }
+
+                    descriptor.size = clipped_tile_size;
+                }
+
+                // If we are uploading the dirty region of a blob image we might have several
+                // rects to upload so we use each of these rasterized rects rather than the
+                // overall dirty rect of the image.
+                if let Some(rect) = blob_rasterized_rect {
+                    dirty_rect = DirtyRect::Partial(rect);
+                }
+
+                let filter = match request.rendering {
+                    ImageRendering::Pixelated => {
+                        TextureFilter::Nearest
+                    }
+                    ImageRendering::Auto | ImageRendering::CrispEdges => {
+                        // If the texture uses linear filtering, enable mipmaps and
+                        // trilinear filtering, for better image quality. We only
+                        // support this for now on textures that are not placed
+                        // into the shared cache. This accounts for any image
+                        // that is > 512 in either dimension, so it should cover
+                        // the most important use cases. We may want to support
+                        // mip-maps on shared cache items in the future.
+                        if descriptor.allow_mipmaps() &&
+                           descriptor.size.width > 512 &&
+                           descriptor.size.height > 512 &&
+                           !self.texture_cache.is_allowed_in_shared_cache(
+                            TextureFilter::Linear,
+                            &descriptor,
+                        ) {
+                            TextureFilter::Trilinear
+                        } else {
+                            TextureFilter::Linear
+                        }
+                    }
+                };
+
+                let eviction = if image_template.data.is_blob() {
+                    entry.manual_eviction = true;
+                    Eviction::Manual
+                } else {
+                    Eviction::Auto
+                };
+
+                //Note: at this point, the dirty rectangle is local to the descriptor space
+                self.texture_cache.update(
+                    &mut entry.texture_cache_handle,
+                    descriptor,
+                    filter,
+                    Some(image_data),
+                    [0.0; 4],
+                    dirty_rect,
+                    gpu_cache,
+                    None,
+                    UvRectKind::Rect,
+                    eviction,
+                    TargetShader::Default,
+                );
+            }
+        }
+    }
+
+    pub fn create_compositor_backdrop_surface(
+        &mut self,
+        color: ColorF
+    ) -> NativeSurfaceId {
+        let id = NativeSurfaceId(NEXT_NATIVE_SURFACE_ID.fetch_add(1, Ordering::Relaxed) as u64);
+
+        self.pending_native_surface_updates.push(
+            NativeSurfaceOperation {
+                details: NativeSurfaceOperationDetails::CreateBackdropSurface {
+                    id,
+                    color,
+                },
+            }
+        );
+
+        id
+    }
+
+    /// Queue up allocation of a new OS native compositor surface with the
+    /// specified tile size.
+    pub fn create_compositor_surface(
+        &mut self,
+        virtual_offset: DeviceIntPoint,
+        tile_size: DeviceIntSize,
+        is_opaque: bool,
+    ) -> NativeSurfaceId {
+        let id = NativeSurfaceId(NEXT_NATIVE_SURFACE_ID.fetch_add(1, Ordering::Relaxed) as u64);
+
+        self.pending_native_surface_updates.push(
+            NativeSurfaceOperation {
+                details: NativeSurfaceOperationDetails::CreateSurface {
+                    id,
+                    virtual_offset,
+                    tile_size,
+                    is_opaque,
+                },
+            }
+        );
+
+        id
+    }
+
+    pub fn create_compositor_external_surface(
+        &mut self,
+        is_opaque: bool,
+    ) -> NativeSurfaceId {
+        let id = NativeSurfaceId(NEXT_NATIVE_SURFACE_ID.fetch_add(1, Ordering::Relaxed) as u64);
+
+        self.pending_native_surface_updates.push(
+            NativeSurfaceOperation {
+                details: NativeSurfaceOperationDetails::CreateExternalSurface {
+                    id,
+                    is_opaque,
+                },
+            }
+        );
+
+        id
+    }
+
+    /// Queue up destruction of an existing native OS surface. This is used when
+    /// a picture cache surface is dropped or resized.
+    pub fn destroy_compositor_surface(
+        &mut self,
+        id: NativeSurfaceId,
+    ) {
+        self.pending_native_surface_updates.push(
+            NativeSurfaceOperation {
+                details: NativeSurfaceOperationDetails::DestroySurface {
+                    id,
+                }
+            }
+        );
+    }
+
+    /// Queue construction of a native compositor tile on a given surface.
+    pub fn create_compositor_tile(
+        &mut self,
+        id: NativeTileId,
+    ) {
+        self.pending_native_surface_updates.push(
+            NativeSurfaceOperation {
+                details: NativeSurfaceOperationDetails::CreateTile {
+                    id,
+                },
+            }
+        );
+    }
+
+    /// Queue destruction of a native compositor tile.
+    pub fn destroy_compositor_tile(
+        &mut self,
+        id: NativeTileId,
+    ) {
+        self.pending_native_surface_updates.push(
+            NativeSurfaceOperation {
+                details: NativeSurfaceOperationDetails::DestroyTile {
+                    id,
+                },
+            }
+        );
+    }
+
+    pub fn attach_compositor_external_image(
+        &mut self,
+        id: NativeSurfaceId,
+        external_image: ExternalImageId,
+    ) {
+        self.pending_native_surface_updates.push(
+            NativeSurfaceOperation {
+                details: NativeSurfaceOperationDetails::AttachExternalImage {
+                    id,
+                    external_image,
+                },
+            }
+        );
+    }
+
+
+    pub fn end_frame(&mut self, profile: &mut TransactionProfile) {
+        debug_assert_eq!(self.state, State::QueryResources);
+        profile_scope!("end_frame");
+        self.state = State::Idle;
+
+        // GC the render target pool, if it's currently > 64 MB in size.
+        //
+        // We use a simple scheme whereby we drop any texture that hasn't been used
+        // in the last 60 frames, until we are below the size threshold. This should
+        // generally prevent any sustained build-up of unused textures, unless we don't
+        // generate frames for a long period. This can happen when the window is
+        // minimized, and we probably want to flush all the WebRender caches in that case [1].
+        // There is also a second "red line" memory threshold which prevents
+        // memory exhaustion if many render targets are allocated within a small
+        // number of frames. For now this is set at 320 MB (10x the normal memory threshold).
+        //
+        // [1] https://bugzilla.mozilla.org/show_bug.cgi?id=1494099
+        self.gc_render_targets(
+            64 * 1024 * 1024,
+            32 * 1024 * 1024 * 10,
+            60,
+        );
+
+        self.texture_cache.end_frame(profile);
+        self.picture_textures.gc(
+            &mut self.texture_cache.pending_updates,
+        );
+
+        self.picture_textures.update_profile(profile);
+    }
+
+    pub fn set_debug_flags(&mut self, flags: DebugFlags) {
+        GLYPH_FLASHING.store(flags.contains(DebugFlags::GLYPH_FLASHING), std::sync::atomic::Ordering::Relaxed);
+        self.texture_cache.set_debug_flags(flags);
+        self.picture_textures.set_debug_flags(flags);
+    }
+
+    pub fn clear(&mut self, what: ClearCache) {
+        if what.contains(ClearCache::IMAGES) {
+            for (_key, mut cached) in self.cached_images.resources.drain() {
+                cached.drop_from_cache(&mut self.texture_cache);
+            }
+        }
+        if what.contains(ClearCache::GLYPHS) {
+            self.cached_glyphs.clear();
+        }
+        if what.contains(ClearCache::GLYPH_DIMENSIONS) {
+            self.cached_glyph_dimensions.clear();
+        }
+        if what.contains(ClearCache::RENDER_TASKS) {
+            self.cached_render_tasks.clear();
+        }
+        if what.contains(ClearCache::TEXTURE_CACHE) {
+            self.texture_cache.clear_all();
+            self.picture_textures.clear(&mut self.texture_cache.pending_updates);
+        }
+        if what.contains(ClearCache::RENDER_TARGETS) {
+            self.clear_render_target_pool();
+        }
+    }
+
+    pub fn clear_namespace(&mut self, namespace: IdNamespace) {
+        self.clear_images(|k| k.0 == namespace);
+
+        // First clear out any non-shared resources associated with the namespace.
+        self.resources.fonts.instances.clear_namespace(namespace);
+        let deleted_keys = self.resources.fonts.templates.clear_namespace(namespace);
+        self.glyph_rasterizer.delete_fonts(&deleted_keys);
+        self.cached_glyphs.clear_namespace(namespace);
+        if let Some(handler) = &mut self.blob_image_handler {
+            handler.clear_namespace(namespace);
+        }
+
+        // Check for any shared instance keys that were remapped from the namespace.
+        let shared_instance_keys = self.resources.fonts.instance_keys.clear_namespace(namespace);
+        if !shared_instance_keys.is_empty() {
+            self.resources.fonts.instances.delete_font_instances(&shared_instance_keys);
+            self.cached_glyphs.delete_font_instances(&shared_instance_keys, &mut self.glyph_rasterizer);
+            // Blob font instances are not shared across namespaces, so there is no
+            // need to call the handler for them individually.
+        }
+
+        // Finally check for any shared font keys that were remapped from the namespace.
+        let shared_keys = self.resources.fonts.font_keys.clear_namespace(namespace);
+        if !shared_keys.is_empty() {
+            self.glyph_rasterizer.delete_fonts(&shared_keys);
+            self.resources.fonts.templates.delete_fonts(&shared_keys);
+            self.cached_glyphs.delete_fonts(&shared_keys);
+            if let Some(handler) = &mut self.blob_image_handler {
+                for &key in &shared_keys {
+                    handler.delete_font(key);
+                }
+            }
+        }
+    }
+
+    /// Reports the CPU heap usage of this ResourceCache.
+    ///
+    /// NB: It would be much better to use the derive(MallocSizeOf) machinery
+    /// here, but the Arcs complicate things. The two ways to handle that would
+    /// be to either (a) Implement MallocSizeOf manually for the things that own
+    /// them and manually avoid double-counting, or (b) Use the "seen this pointer
+    /// yet" machinery from the proper malloc_size_of crate. We can do this if/when
+    /// more accurate memory reporting on these resources becomes a priority.
+    pub fn report_memory(&self, op: VoidPtrToSizeFn) -> MemoryReport {
+        let mut report = MemoryReport::default();
+
+        let mut seen_fonts = std::collections::HashSet::new();
+        // Measure fonts. We only need the templates here, because the instances
+        // don't have big buffers.
+        for (_, font) in self.resources.fonts.templates.lock().iter() {
+            if let FontTemplate::Raw(ref raw, _) = font {
+                report.fonts += unsafe { op(raw.as_ptr() as *const c_void) };
+                seen_fonts.insert(raw.as_ptr());
+            }
+        }
+
+        for font in self.resources.weak_fonts.iter() {
+            if !seen_fonts.contains(&font.as_ptr()) {
+                report.weak_fonts += unsafe { op(font.as_ptr() as *const c_void) };
+            }
+        }
+
+        // Measure images.
+        for (_, image) in self.resources.image_templates.images.iter() {
+            report.images += match image.data {
+                CachedImageData::Raw(ref v) => unsafe { op(v.as_ptr() as *const c_void) },
+                CachedImageData::Blob | CachedImageData::External(..) => 0,
+            }
+        }
+
+        // Mesure rasterized blobs.
+        // TODO(gw): Temporarily disabled while we roll back a crash. We can re-enable
+        //           these when that crash is fixed.
+        /*
+        for (_, image) in self.rasterized_blob_images.iter() {
+            let mut accumulate = |b: &RasterizedBlobImage| {
+                report.rasterized_blobs += unsafe { op(b.data.as_ptr() as *const c_void) };
+            };
+            match image {
+                RasterizedBlob::Tiled(map) => map.values().for_each(&mut accumulate),
+                RasterizedBlob::NonTiled(vec) => vec.iter().for_each(&mut accumulate),
+            };
+        }
+        */
+
+        report
+    }
+
+    /// Properly deletes all images matching the predicate.
+    fn clear_images<F: Fn(&ImageKey) -> bool>(&mut self, f: F) {
+        let keys = self.resources.image_templates.images.keys().filter(|k| f(*k))
+            .cloned().collect::<SmallVec<[ImageKey; 16]>>();
+
+        for key in keys {
+            self.delete_image_template(key);
+        }
+
+        #[cfg(features="leak_checks")]
+        let check_leaks = true;
+        #[cfg(not(features="leak_checks"))]
+        let check_leaks = false;
+
+        if check_leaks {
+            let blob_f = |key: &BlobImageKey| { f(&key.as_image()) };
+            assert!(!self.resources.image_templates.images.keys().any(&f));
+            assert!(!self.cached_images.resources.keys().any(&f));
+            assert!(!self.rasterized_blob_images.keys().any(&blob_f));
+        }
+    }
+
+    /// Get a render target from the pool, or allocate a new one if none are
+    /// currently available that match the requested parameters.
+    pub fn get_or_create_render_target_from_pool(
+        &mut self,
+        size: DeviceIntSize,
+        format: ImageFormat,
+    ) -> CacheTextureId {
+        for target in &mut self.render_target_pool {
+            if target.size == size &&
+               target.format == format &&
+               !target.is_active {
+                // Found a target that's not currently in use which matches. Update
+                // the last_frame_used for GC purposes.
+                target.is_active = true;
+                target.last_frame_used = self.current_frame_id;
+                return target.texture_id;
+            }
+        }
+
+        // Need to create a new render target and add it to the pool
+
+        let texture_id = self.texture_cache.alloc_render_target(
+            size,
+            format,
+        );
+
+        self.render_target_pool.push(RenderTarget {
+            size,
+            format,
+            texture_id,
+            is_active: true,
+            last_frame_used: self.current_frame_id,
+        });
+
+        texture_id
+    }
+
+    /// Return a render target to the pool.
+    pub fn return_render_target_to_pool(
+        &mut self,
+        id: CacheTextureId,
+    ) {
+        let target = self.render_target_pool
+            .iter_mut()
+            .find(|t| t.texture_id == id)
+            .expect("bug: invalid render target id");
+
+        assert!(target.is_active);
+        target.is_active = false;
+    }
+
+    /// Clear all current render targets (e.g. on memory pressure)
+    fn clear_render_target_pool(
+        &mut self,
+    ) {
+        for target in self.render_target_pool.drain(..) {
+            debug_assert!(!target.is_active);
+            self.texture_cache.free_render_target(target.texture_id);
+        }
+    }
+
+    /// Garbage collect and remove old render targets from the pool that haven't
+    /// been used for some time.
+    fn gc_render_targets(
+        &mut self,
+        total_bytes_threshold: usize,
+        total_bytes_red_line_threshold: usize,
+        frames_threshold: usize,
+    ) {
+        // Get the total GPU memory size used by the current render target pool
+        let mut rt_pool_size_in_bytes: usize = self.render_target_pool
+            .iter()
+            .map(|t| t.size_in_bytes())
+            .sum();
+
+        // If the total size of the pool is less than the threshold, don't bother
+        // trying to GC any targets
+        if rt_pool_size_in_bytes <= total_bytes_threshold {
+            return;
+        }
+
+        // Sort the current pool by age, so that we remove oldest textures first
+        self.render_target_pool.sort_by_key(|t| t.last_frame_used);
+
+        // We can't just use retain() because `RenderTarget` requires manual cleanup.
+        let mut retained_targets = SmallVec::<[RenderTarget; 8]>::new();
+
+        for target in self.render_target_pool.drain(..) {
+            assert!(!target.is_active);
+
+            // Drop oldest textures until we are under the allowed size threshold.
+            // However, if it's been used in very recently, it is always kept around,
+            // which ensures we don't thrash texture allocations on pages that do
+            // require a very large render target pool and are regularly changing.
+            let above_red_line = rt_pool_size_in_bytes > total_bytes_red_line_threshold;
+            let above_threshold = rt_pool_size_in_bytes > total_bytes_threshold;
+            let used_recently = target.used_recently(self.current_frame_id, frames_threshold);
+            let used_this_frame = target.last_frame_used == self.current_frame_id;
+
+            if !used_this_frame && (above_red_line || (above_threshold && !used_recently)) {
+                rt_pool_size_in_bytes -= target.size_in_bytes();
+                self.texture_cache.free_render_target(target.texture_id);
+            } else {
+                retained_targets.push(target);
+            }
+        }
+
+        self.render_target_pool.extend(retained_targets);
+    }
+
+    #[cfg(test)]
+    pub fn validate_surfaces(
+        &self,
+        expected_surfaces: &[(i32, i32, ImageFormat)],
+    ) {
+        assert_eq!(expected_surfaces.len(), self.render_target_pool.len());
+
+        for (expected, surface) in expected_surfaces.iter().zip(self.render_target_pool.iter()) {
+            assert_eq!(DeviceIntSize::new(expected.0, expected.1), surface.size);
+            assert_eq!(expected.2, surface.format);
+        }
+    }
+}
+
+impl Drop for ResourceCache {
+    fn drop(&mut self) {
+        self.clear_images(|_| true);
+    }
+}
+
+#[cfg(any(feature = "capture", feature = "replay"))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct PlainFontTemplate {
+    data: String,
+    index: u32,
+}
+
+#[cfg(any(feature = "capture", feature = "replay"))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct PlainImageTemplate {
+    data: String,
+    descriptor: ImageDescriptor,
+    tiling: Option<TileSize>,
+    generation: ImageGeneration,
+}
+
+#[cfg(any(feature = "capture", feature = "replay"))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PlainResources {
+    font_templates: FastHashMap<FontKey, PlainFontTemplate>,
+    font_instances: Vec<BaseFontInstance>,
+    image_templates: FastHashMap<ImageKey, PlainImageTemplate>,
+}
+
+#[cfg(feature = "capture")]
+#[derive(Serialize)]
+pub struct PlainCacheRef<'a> {
+    current_frame_id: FrameId,
+    glyphs: &'a GlyphCache,
+    glyph_dimensions: &'a GlyphDimensionsCache,
+    images: &'a ImageCache,
+    render_tasks: &'a RenderTaskCache,
+    textures: &'a TextureCache,
+    picture_textures: &'a PictureTextures,
+}
+
+#[cfg(feature = "replay")]
+#[derive(Deserialize)]
+pub struct PlainCacheOwn {
+    current_frame_id: FrameId,
+    glyphs: GlyphCache,
+    glyph_dimensions: GlyphDimensionsCache,
+    images: ImageCache,
+    render_tasks: RenderTaskCache,
+    textures: TextureCache,
+    picture_textures: PictureTextures,
+}
+
+#[cfg(feature = "replay")]
+const NATIVE_FONT: &'static [u8] = include_bytes!("../res/Proggy.ttf");
+
+// This currently only casts the unit but will soon apply an offset
+fn to_image_dirty_rect(blob_dirty_rect: &BlobDirtyRect) -> ImageDirtyRect {
+    match *blob_dirty_rect {
+        DirtyRect::Partial(rect) => DirtyRect::Partial(rect.cast_unit()),
+        DirtyRect::All => DirtyRect::All,
+    }
+}
+
+impl ResourceCache {
+    #[cfg(feature = "capture")]
+    pub fn save_capture(
+        &mut self, root: &PathBuf
+    ) -> (PlainResources, Vec<ExternalCaptureImage>) {
+        use std::fs;
+        use std::io::Write;
+
+        info!("saving resource cache");
+        let res = &self.resources;
+        let path_fonts = root.join("fonts");
+        if !path_fonts.is_dir() {
+            fs::create_dir(&path_fonts).unwrap();
+        }
+        let path_images = root.join("images");
+        if !path_images.is_dir() {
+            fs::create_dir(&path_images).unwrap();
+        }
+        let path_blobs = root.join("blobs");
+        if !path_blobs.is_dir() {
+            fs::create_dir(&path_blobs).unwrap();
+        }
+        let path_externals = root.join("externals");
+        if !path_externals.is_dir() {
+            fs::create_dir(&path_externals).unwrap();
+        }
+
+        info!("\tfont templates");
+        let mut font_paths = FastHashMap::default();
+        for template in res.fonts.templates.lock().values() {
+            let data: &[u8] = match *template {
+                FontTemplate::Raw(ref arc, _) => arc,
+                FontTemplate::Native(_) => continue,
+            };
+            let font_id = res.fonts.templates.len() + 1;
+            let entry = match font_paths.entry(data.as_ptr()) {
+                Entry::Occupied(_) => continue,
+                Entry::Vacant(e) => e,
+            };
+            let file_name = format!("{}.raw", font_id);
+            let short_path = format!("fonts/{}", file_name);
+            fs::File::create(path_fonts.join(file_name))
+                .expect(&format!("Unable to create {}", short_path))
+                .write_all(data)
+                .unwrap();
+            entry.insert(short_path);
+        }
+
+        info!("\timage templates");
+        let mut image_paths = FastHashMap::default();
+        let mut other_paths = FastHashMap::default();
+        let mut num_blobs = 0;
+        let mut external_images = Vec::new();
+        for (&key, template) in res.image_templates.images.iter() {
+            let desc = &template.descriptor;
+            match template.data {
+                CachedImageData::Raw(ref arc) => {
+                    let image_id = image_paths.len() + 1;
+                    let entry = match image_paths.entry(arc.as_ptr()) {
+                        Entry::Occupied(_) => continue,
+                        Entry::Vacant(e) => e,
+                    };
+
+                    #[cfg(feature = "png")]
+                    CaptureConfig::save_png(
+                        root.join(format!("images/{}.png", image_id)),
+                        desc.size,
+                        desc.format,
+                        desc.stride,
+                        &arc,
+                    );
+                    let file_name = format!("{}.raw", image_id);
+                    let short_path = format!("images/{}", file_name);
+                    fs::File::create(path_images.join(file_name))
+                        .expect(&format!("Unable to create {}", short_path))
+                        .write_all(&*arc)
+                        .unwrap();
+                    entry.insert(short_path);
+                }
+                CachedImageData::Blob => {
+                    warn!("Tiled blob images aren't supported yet");
+                    let result = RasterizedBlobImage {
+                        rasterized_rect: desc.size.into(),
+                        data: Arc::new(vec![0; desc.compute_total_size() as usize])
+                    };
+
+                    assert_eq!(result.rasterized_rect.size(), desc.size);
+                    assert_eq!(result.data.len(), desc.compute_total_size() as usize);
+
+                    num_blobs += 1;
+                    #[cfg(feature = "png")]
+                    CaptureConfig::save_png(
+                        root.join(format!("blobs/{}.png", num_blobs)),
+                        desc.size,
+                        desc.format,
+                        desc.stride,
+                        &result.data,
+                    );
+                    let file_name = format!("{}.raw", num_blobs);
+                    let short_path = format!("blobs/{}", file_name);
+                    let full_path = path_blobs.clone().join(&file_name);
+                    fs::File::create(full_path)
+                        .expect(&format!("Unable to create {}", short_path))
+                        .write_all(&result.data)
+                        .unwrap();
+                    other_paths.insert(key, short_path);
+                }
+                CachedImageData::External(ref ext) => {
+                    let short_path = format!("externals/{}", external_images.len() + 1);
+                    other_paths.insert(key, short_path.clone());
+                    external_images.push(ExternalCaptureImage {
+                        short_path,
+                        descriptor: desc.clone(),
+                        external: ext.clone(),
+                    });
+                }
+            }
+        }
+
+        let mut font_templates = FastHashMap::default();
+        let mut font_remap = FastHashMap::default();
+        // Generate a map from duplicate font keys to their template.
+        for key in res.fonts.font_keys.keys() {
+            let shared_key = res.fonts.font_keys.map_key(&key);
+            let template = match res.fonts.templates.get_font(&shared_key) {
+                Some(template) => template,
+                None => {
+                    debug!("Failed serializing font template {:?}", key);
+                    continue;
+                }
+            };
+            let plain_font = match template {
+                FontTemplate::Raw(arc, index) => {
+                    PlainFontTemplate {
+                        data: font_paths[&arc.as_ptr()].clone(),
+                        index,
+                    }
+                }
+                #[cfg(not(target_os = "macos"))]
+                FontTemplate::Native(native) => {
+                    PlainFontTemplate {
+                        data: native.path.to_string_lossy().to_string(),
+                        index: native.index,
+                    }
+                }
+                #[cfg(target_os = "macos")]
+                FontTemplate::Native(native) => {
+                    PlainFontTemplate {
+                        data: native.name,
+                        index: 0,
+                    }
+                }
+            };
+            font_templates.insert(key, plain_font);
+            // Generate a reverse map from a shared key to a representive key.
+            font_remap.insert(shared_key, key);
+        }
+        let mut font_instances = Vec::new();
+        // Build a list of duplicate instance keys.
+        for instance_key in res.fonts.instance_keys.keys() {
+            let shared_key = res.fonts.instance_keys.map_key(&instance_key);
+            let instance = match res.fonts.instances.get_font_instance(shared_key) {
+                Some(instance) => instance,
+                None => {
+                    debug!("Failed serializing font instance {:?}", instance_key);
+                    continue;
+                }
+            };
+            // Target the instance towards a representive duplicate font key. The font key will be
+            // de-duplicated on load to an appropriate shared key.
+            font_instances.push(BaseFontInstance {
+                font_key: font_remap.get(&instance.font_key).cloned().unwrap_or(instance.font_key),
+                instance_key,
+                ..(*instance).clone()
+            });
+        }
+        let resources = PlainResources {
+            font_templates,
+            font_instances,
+            image_templates: res.image_templates.images
+                .iter()
+                .map(|(key, template)| {
+                    (*key, PlainImageTemplate {
+                        data: match template.data {
+                            CachedImageData::Raw(ref arc) => image_paths[&arc.as_ptr()].clone(),
+                            _ => other_paths[key].clone(),
+                        },
+                        descriptor: template.descriptor.clone(),
+                        tiling: template.tiling,
+                        generation: template.generation,
+                    })
+                })
+                .collect(),
+        };
+
+        (resources, external_images)
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn save_caches(&self, _root: &PathBuf) -> PlainCacheRef {
+        PlainCacheRef {
+            current_frame_id: self.current_frame_id,
+            glyphs: &self.cached_glyphs,
+            glyph_dimensions: &self.cached_glyph_dimensions,
+            images: &self.cached_images,
+            render_tasks: &self.cached_render_tasks,
+            textures: &self.texture_cache,
+            picture_textures: &self.picture_textures,
+        }
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn load_capture(
+        &mut self,
+        resources: PlainResources,
+        caches: Option<PlainCacheOwn>,
+        config: &CaptureConfig,
+    ) -> Vec<PlainExternalImage> {
+        use std::{fs, path::Path};
+        use crate::texture_cache::TextureCacheConfig;
+
+        info!("loading resource cache");
+        //TODO: instead of filling the local path to Arc<data> map as we process
+        // each of the resource types, we could go through all of the local paths
+        // and fill out the map as the first step.
+        let mut raw_map = FastHashMap::<String, Arc<Vec<u8>>>::default();
+
+        self.clear(ClearCache::all());
+        self.clear_images(|_| true);
+
+        match caches {
+            Some(cached) => {
+                self.current_frame_id = cached.current_frame_id;
+                self.cached_glyphs = cached.glyphs;
+                self.cached_glyph_dimensions = cached.glyph_dimensions;
+                self.cached_images = cached.images;
+                self.cached_render_tasks = cached.render_tasks;
+                self.texture_cache = cached.textures;
+                self.picture_textures = cached.picture_textures;
+            }
+            None => {
+                self.current_frame_id = FrameId::INVALID;
+                self.texture_cache = TextureCache::new(
+                    self.texture_cache.max_texture_size(),
+                    self.texture_cache.tiling_threshold(),
+                    self.texture_cache.color_formats(),
+                    self.texture_cache.swizzle_settings(),
+                    &TextureCacheConfig::DEFAULT,
+                );
+                self.picture_textures = PictureTextures::new(
+                    self.picture_textures.default_tile_size(),
+                    self.picture_textures.filter(),
+                );
+            }
+        }
+
+        self.glyph_rasterizer.reset();
+        let res = &mut self.resources;
+        res.fonts.templates.clear();
+        res.fonts.instances.clear();
+        res.image_templates.images.clear();
+
+        info!("\tfont templates...");
+        let root = config.resource_root();
+        let native_font_replacement = Arc::new(NATIVE_FONT.to_vec());
+        for (key, plain_template) in resources.font_templates {
+            let arc = match raw_map.entry(plain_template.data) {
+                Entry::Occupied(e) => {
+                    e.get().clone()
+                }
+                Entry::Vacant(e) => {
+                    let file_path = if Path::new(e.key()).is_absolute() {
+                        PathBuf::from(e.key())
+                    } else {
+                        root.join(e.key())
+                    };
+                    let arc = match fs::read(file_path) {
+                        Ok(buffer) => Arc::new(buffer),
+                        Err(err) => {
+                            error!("Unable to open font template {:?}: {:?}", e.key(), err);
+                            Arc::clone(&native_font_replacement)
+                        }
+                    };
+                    e.insert(arc).clone()
+                }
+            };
+
+            let template = FontTemplate::Raw(arc, plain_template.index);
+            // Only add the template if this is the first time it has been seen.
+            if let Some(shared_key) = res.fonts.font_keys.add_key(&key, &template) {
+                self.glyph_rasterizer.add_font(shared_key, template.clone());
+                res.fonts.templates.add_font(shared_key, template);
+            }
+        }
+
+        info!("\tfont instances...");
+        for instance in resources.font_instances {
+            // Target the instance to a shared font key.
+            let base = BaseFontInstance {
+                font_key: res.fonts.font_keys.map_key(&instance.font_key),
+                ..instance
+            };
+            if let Some(shared_instance) = res.fonts.instance_keys.add_key(base) {
+                res.fonts.instances.add_font_instance(shared_instance);
+            }
+        }
+
+        info!("\timage templates...");
+        let mut external_images = Vec::new();
+        for (key, template) in resources.image_templates {
+            let data = match config.deserialize_for_resource::<PlainExternalImage, _>(&template.data) {
+                Some(plain) => {
+                    let ext_data = plain.external;
+                    external_images.push(plain);
+                    CachedImageData::External(ext_data)
+                }
+                None => {
+                    let arc = match raw_map.entry(template.data) {
+                        Entry::Occupied(e) => {
+                            e.get().clone()
+                        }
+                        Entry::Vacant(e) => {
+                            let buffer = fs::read(root.join(e.key()))
+                                .expect(&format!("Unable to open {}", e.key()));
+                            e.insert(Arc::new(buffer))
+                                .clone()
+                        }
+                    };
+                    CachedImageData::Raw(arc)
+                }
+            };
+
+            res.image_templates.images.insert(key, ImageResource {
+                data,
+                descriptor: template.descriptor,
+                tiling: template.tiling,
+                visible_rect: template.descriptor.size.into(),
+                generation: template.generation,
+            });
+        }
+
+        external_images
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn save_capture_sequence(&mut self, config: &mut CaptureConfig) -> Vec<ExternalCaptureImage> {
+        if self.capture_dirty {
+            self.capture_dirty = false;
+            config.prepare_resource();
+            let (resources, deferred) = self.save_capture(&config.resource_root());
+            config.serialize_for_resource(&resources, "plain-resources.ron");
+            deferred
+        } else {
+            Vec::new()
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/scene.rs b/gfx/wr/webrender/src/scene.rs
new file mode 100644
index 0000000000..454a2001a9
--- /dev/null
+++ b/gfx/wr/webrender/src/scene.rs
@@ -0,0 +1,373 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{BuiltDisplayList, DisplayListWithCache, ColorF, DynamicProperties, Epoch, FontRenderMode};
+use api::{PipelineId, PropertyBinding, PropertyBindingId, PropertyValue, MixBlendMode, StackingContext};
+use api::units::*;
+use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
+use crate::render_api::MemoryReport;
+use crate::composite::CompositorKind;
+use crate::clip::{ClipStore, ClipTree};
+use crate::spatial_tree::SpatialTree;
+use crate::frame_builder::{FrameBuilderConfig};
+use crate::hit_test::{HitTester, HitTestingScene, HitTestingSceneStats};
+use crate::internal_types::FastHashMap;
+use crate::picture::SurfaceInfo;
+use crate::picture_graph::PictureGraph;
+use crate::prim_store::{PrimitiveStore, PrimitiveStoreStats, PictureIndex, PrimitiveInstance};
+use crate::tile_cache::TileCacheConfig;
+use std::sync::Arc;
+
+/// Stores a map of the animated property bindings for the current display list. These
+/// can be used to animate the transform and/or opacity of a display list without
+/// re-submitting the display list itself.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SceneProperties {
+    transform_properties: FastHashMap<PropertyBindingId, LayoutTransform>,
+    float_properties: FastHashMap<PropertyBindingId, f32>,
+    color_properties: FastHashMap<PropertyBindingId, ColorF>,
+    current_properties: DynamicProperties,
+    pending_properties: Option<DynamicProperties>,
+}
+
+impl SceneProperties {
+    pub fn new() -> Self {
+        SceneProperties {
+            transform_properties: FastHashMap::default(),
+            float_properties: FastHashMap::default(),
+            color_properties: FastHashMap::default(),
+            current_properties: DynamicProperties::default(),
+            pending_properties: None,
+        }
+    }
+
+    /// Reset the pending properties without flush.
+    pub fn reset_properties(&mut self) {
+        self.pending_properties = None;
+    }
+
+    /// Add to the current property list for this display list.
+    pub fn add_properties(&mut self, properties: DynamicProperties) {
+        let mut pending_properties = self.pending_properties
+            .take()
+            .unwrap_or_default();
+
+        pending_properties.extend(properties);
+
+        self.pending_properties = Some(pending_properties);
+    }
+
+    /// Add to the current transform property list for this display list.
+    pub fn add_transforms(&mut self, transforms: Vec<PropertyValue<LayoutTransform>>) {
+        let mut pending_properties = self.pending_properties
+            .take()
+            .unwrap_or_default();
+
+        pending_properties.transforms.extend(transforms);
+
+        self.pending_properties = Some(pending_properties);
+    }
+
+    /// Flush any pending updates to the scene properties. Returns
+    /// true if the properties have changed since the last flush
+    /// was called. This code allows properties to be changed by
+    /// multiple reset_properties, add_properties and add_transforms calls
+    /// during a single transaction, and still correctly determine if any
+    /// properties have changed. This can have significant power
+    /// saving implications, allowing a frame build to be skipped
+    /// if the properties haven't changed in many cases.
+    pub fn flush_pending_updates(&mut self) -> bool {
+        let mut properties_changed = false;
+
+        if let Some(ref pending_properties) = self.pending_properties {
+            if *pending_properties != self.current_properties {
+                self.transform_properties.clear();
+                self.float_properties.clear();
+                self.color_properties.clear();
+
+                for property in &pending_properties.transforms {
+                    self.transform_properties
+                        .insert(property.key.id, property.value);
+                }
+
+                for property in &pending_properties.floats {
+                    self.float_properties
+                        .insert(property.key.id, property.value);
+                }
+
+                for property in &pending_properties.colors {
+                    self.color_properties
+                        .insert(property.key.id, property.value);
+                }
+
+                self.current_properties = pending_properties.clone();
+                properties_changed = true;
+            }
+        }
+
+        properties_changed
+    }
+
+    /// Get the current value for a transform property.
+    pub fn resolve_layout_transform(
+        &self,
+        property: &PropertyBinding<LayoutTransform>,
+    ) -> LayoutTransform {
+        match *property {
+            PropertyBinding::Value(value) => value,
+            PropertyBinding::Binding(ref key, v) => {
+                self.transform_properties
+                    .get(&key.id)
+                    .cloned()
+                    .unwrap_or(v)
+            }
+        }
+    }
+
+    /// Get the current value for a float property.
+    pub fn resolve_float(
+        &self,
+        property: &PropertyBinding<f32>
+    ) -> f32 {
+        match *property {
+            PropertyBinding::Value(value) => value,
+            PropertyBinding::Binding(ref key, v) => {
+                self.float_properties
+                    .get(&key.id)
+                    .cloned()
+                    .unwrap_or(v)
+            }
+        }
+    }
+
+    pub fn float_properties(&self) -> &FastHashMap<PropertyBindingId, f32> {
+        &self.float_properties
+    }
+
+    /// Get the current value for a color property.
+    pub fn resolve_color(
+        &self,
+        property: &PropertyBinding<ColorF>
+    ) -> ColorF {
+        match *property {
+            PropertyBinding::Value(value) => value,
+            PropertyBinding::Binding(ref key, v) => {
+                self.color_properties
+                    .get(&key.id)
+                    .cloned()
+                    .unwrap_or(v)
+            }
+        }
+    }
+
+    pub fn color_properties(&self) -> &FastHashMap<PropertyBindingId, ColorF> {
+        &self.color_properties
+    }
+
+}
+
+/// A representation of the layout within the display port for a given document or iframe.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone)]
+pub struct ScenePipeline {
+    pub pipeline_id: PipelineId,
+    pub viewport_size: LayoutSize,
+    pub background_color: Option<ColorF>,
+    pub display_list: DisplayListWithCache,
+}
+
+/// A complete representation of the layout bundling visible pipelines together.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone)]
+pub struct Scene {
+    pub root_pipeline_id: Option<PipelineId>,
+    pub pipelines: FastHashMap<PipelineId, ScenePipeline>,
+    pub pipeline_epochs: FastHashMap<PipelineId, Epoch>,
+}
+
+impl Scene {
+    pub fn new() -> Self {
+        Scene {
+            root_pipeline_id: None,
+            pipelines: FastHashMap::default(),
+            pipeline_epochs: FastHashMap::default(),
+        }
+    }
+
+    pub fn set_root_pipeline_id(&mut self, pipeline_id: PipelineId) {
+        self.root_pipeline_id = Some(pipeline_id);
+    }
+
+    pub fn set_display_list(
+        &mut self,
+        pipeline_id: PipelineId,
+        epoch: Epoch,
+        display_list: BuiltDisplayList,
+        background_color: Option<ColorF>,
+        viewport_size: LayoutSize,
+    ) {
+        // Adds a cache to the given display list. If this pipeline already had
+        // a display list before, that display list is updated and used instead.
+        let display_list = match self.pipelines.remove(&pipeline_id) {
+            Some(mut pipeline) => {
+                pipeline.display_list.update(display_list);
+                pipeline.display_list
+            }
+            None => DisplayListWithCache::new_from_list(display_list)
+        };
+
+        let new_pipeline = ScenePipeline {
+            pipeline_id,
+            viewport_size,
+            background_color,
+            display_list,
+        };
+
+        self.pipelines.insert(pipeline_id, new_pipeline);
+        self.pipeline_epochs.insert(pipeline_id, epoch);
+    }
+
+    pub fn remove_pipeline(&mut self, pipeline_id: PipelineId) {
+        if self.root_pipeline_id == Some(pipeline_id) {
+            self.root_pipeline_id = None;
+        }
+        self.pipelines.remove(&pipeline_id);
+        self.pipeline_epochs.remove(&pipeline_id);
+    }
+
+    pub fn update_epoch(&mut self, pipeline_id: PipelineId, epoch: Epoch) {
+        self.pipeline_epochs.insert(pipeline_id, epoch);
+    }
+
+    pub fn has_root_pipeline(&self) -> bool {
+        if let Some(ref root_id) = self.root_pipeline_id {
+            return self.pipelines.contains_key(root_id);
+        }
+
+        false
+    }
+
+    pub fn report_memory(
+        &self,
+        ops: &mut MallocSizeOfOps,
+        report: &mut MemoryReport
+    ) {
+        for (_, pipeline) in &self.pipelines {
+            report.display_list += pipeline.display_list.size_of(ops)
+        }
+    }
+}
+
+pub trait StackingContextHelpers {
+    fn mix_blend_mode_for_compositing(&self) -> Option<MixBlendMode>;
+}
+
+impl StackingContextHelpers for StackingContext {
+    fn mix_blend_mode_for_compositing(&self) -> Option<MixBlendMode> {
+        match self.mix_blend_mode {
+            MixBlendMode::Normal => None,
+            _ => Some(self.mix_blend_mode),
+        }
+    }
+}
+
+
+/// WebRender's internal representation of the scene.
+pub struct BuiltScene {
+    pub has_root_pipeline: bool,
+    pub pipeline_epochs: FastHashMap<PipelineId, Epoch>,
+    pub output_rect: DeviceIntRect,
+    pub background_color: Option<ColorF>,
+    pub prim_store: PrimitiveStore,
+    pub clip_store: ClipStore,
+    pub config: FrameBuilderConfig,
+    pub hit_testing_scene: Arc<HitTestingScene>,
+    pub tile_cache_config: TileCacheConfig,
+    pub tile_cache_pictures: Vec<PictureIndex>,
+    pub picture_graph: PictureGraph,
+    pub num_plane_splitters: usize,
+    pub prim_instances: Vec<PrimitiveInstance>,
+    pub surfaces: Vec<SurfaceInfo>,
+    pub clip_tree: ClipTree,
+}
+
+impl BuiltScene {
+    pub fn empty() -> Self {
+        BuiltScene {
+            has_root_pipeline: false,
+            pipeline_epochs: FastHashMap::default(),
+            output_rect: DeviceIntRect::zero(),
+            background_color: None,
+            prim_store: PrimitiveStore::new(&PrimitiveStoreStats::empty()),
+            clip_store: ClipStore::new(),
+            hit_testing_scene: Arc::new(HitTestingScene::new(&HitTestingSceneStats::empty())),
+            tile_cache_config: TileCacheConfig::new(0),
+            tile_cache_pictures: Vec::new(),
+            picture_graph: PictureGraph::new(),
+            num_plane_splitters: 0,
+            prim_instances: Vec::new(),
+            surfaces: Vec::new(),
+            clip_tree: ClipTree::new(),
+            config: FrameBuilderConfig {
+                default_font_render_mode: FontRenderMode::Mono,
+                dual_source_blending_is_supported: false,
+                testing: false,
+                gpu_supports_fast_clears: false,
+                gpu_supports_advanced_blend: false,
+                advanced_blend_is_coherent: false,
+                gpu_supports_render_target_partial_update: true,
+                external_images_require_copy: false,
+                batch_lookback_count: 0,
+                background_color: None,
+                compositor_kind: CompositorKind::default(),
+                tile_size_override: None,
+                max_surface_override: None,
+                max_depth_ids: 0,
+                max_target_size: 0,
+                force_invalidation: false,
+                is_software: false,
+                low_quality_pinch_zoom: false,
+            },
+        }
+    }
+
+    /// Get the memory usage statistics to pre-allocate for the next scene.
+    pub fn get_stats(&self) -> SceneStats {
+        SceneStats {
+            prim_store_stats: self.prim_store.get_stats(),
+            hit_test_stats: self.hit_testing_scene.get_stats(),
+        }
+    }
+
+    pub fn create_hit_tester(
+        &mut self,
+        spatial_tree: &SpatialTree,
+    ) -> HitTester {
+        HitTester::new(
+            Arc::clone(&self.hit_testing_scene),
+            spatial_tree,
+        )
+    }
+}
+
+/// Stores the allocation sizes of various arrays in the built
+/// scene. This is retrieved from the current frame builder
+/// and used to reserve an approximately correct capacity of
+/// the arrays for the next scene that is getting built.
+pub struct SceneStats {
+    pub prim_store_stats: PrimitiveStoreStats,
+    pub hit_test_stats: HitTestingSceneStats,
+}
+
+impl SceneStats {
+    pub fn empty() -> Self {
+        SceneStats {
+            prim_store_stats: PrimitiveStoreStats::empty(),
+            hit_test_stats: HitTestingSceneStats::empty(),
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/scene_builder_thread.rs b/gfx/wr/webrender/src/scene_builder_thread.rs
new file mode 100644
index 0000000000..c6386f3e66
--- /dev/null
+++ b/gfx/wr/webrender/src/scene_builder_thread.rs
@@ -0,0 +1,798 @@
+/* This Source Code Form is subject to the terms of the Mozilla Publi
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{AsyncBlobImageRasterizer, BlobImageResult, Parameter};
+use api::{DocumentId, PipelineId, ExternalEvent, BlobImageRequest};
+use api::{NotificationRequest, Checkpoint, IdNamespace, QualitySettings};
+use api::{PrimitiveKeyKind, GlyphDimensionRequest, GlyphIndexRequest};
+use api::channel::{unbounded_channel, single_msg_channel, Receiver, Sender};
+use api::units::*;
+use crate::render_api::{ApiMsg, FrameMsg, SceneMsg, ResourceUpdate, TransactionMsg, MemoryReport};
+#[cfg(feature = "capture")]
+use crate::capture::CaptureConfig;
+use crate::frame_builder::FrameBuilderConfig;
+use crate::scene_building::SceneBuilder;
+use crate::clip::{ClipIntern, PolygonIntern};
+use crate::filterdata::FilterDataIntern;
+use glyph_rasterizer::SharedFontResources;
+use crate::intern::{Internable, Interner, UpdateList};
+use crate::internal_types::{FastHashMap, FastHashSet};
+use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
+use crate::prim_store::backdrop::{BackdropCapture, BackdropRender};
+use crate::prim_store::borders::{ImageBorder, NormalBorderPrim};
+use crate::prim_store::gradient::{LinearGradient, RadialGradient, ConicGradient};
+use crate::prim_store::image::{Image, YuvImage};
+use crate::prim_store::line_dec::LineDecoration;
+use crate::prim_store::picture::Picture;
+use crate::prim_store::text_run::TextRun;
+use crate::profiler::{self, TransactionProfile};
+use crate::render_backend::SceneView;
+use crate::renderer::{FullFrameStats, PipelineInfo};
+use crate::scene::{Scene, BuiltScene, SceneStats};
+use crate::spatial_tree::{SceneSpatialTree, SpatialTreeUpdates};
+use crate::telemetry::Telemetry;
+use crate::SceneBuilderHooks;
+use std::iter;
+use time::precise_time_ns;
+use crate::util::drain_filter;
+use std::thread;
+use std::time::Duration;
+
+fn rasterize_blobs(txn: &mut TransactionMsg, is_low_priority: bool) {
+    profile_scope!("rasterize_blobs");
+
+    if let Some(ref mut rasterizer) = txn.blob_rasterizer {
+        let mut rasterized_blobs = rasterizer.rasterize(&txn.blob_requests, is_low_priority);
+        // try using the existing allocation if our current list is empty
+        if txn.rasterized_blobs.is_empty() {
+            txn.rasterized_blobs = rasterized_blobs;
+        } else {
+            txn.rasterized_blobs.append(&mut rasterized_blobs);
+        }
+    }
+}
+
+/// Represent the remaining work associated to a transaction after the scene building
+/// phase as well as the result of scene building itself if applicable.
+pub struct BuiltTransaction {
+    pub document_id: DocumentId,
+    pub built_scene: Option<BuiltScene>,
+    pub view: SceneView,
+    pub resource_updates: Vec<ResourceUpdate>,
+    pub rasterized_blobs: Vec<(BlobImageRequest, BlobImageResult)>,
+    pub blob_rasterizer: Option<Box<dyn AsyncBlobImageRasterizer>>,
+    pub frame_ops: Vec<FrameMsg>,
+    pub removed_pipelines: Vec<(PipelineId, DocumentId)>,
+    pub notifications: Vec<NotificationRequest>,
+    pub interner_updates: Option<InternerUpdates>,
+    pub spatial_tree_updates: Option<SpatialTreeUpdates>,
+    pub render_frame: bool,
+    pub invalidate_rendered_frame: bool,
+    pub profile: TransactionProfile,
+    pub frame_stats: FullFrameStats,
+}
+
+#[cfg(feature = "replay")]
+pub struct LoadScene {
+    pub document_id: DocumentId,
+    pub scene: Scene,
+    pub fonts: SharedFontResources,
+    pub view: SceneView,
+    pub config: FrameBuilderConfig,
+    pub build_frame: bool,
+    pub interners: Interners,
+    pub spatial_tree: SceneSpatialTree,
+}
+
+/// Message to the scene builder thread.
+pub enum SceneBuilderRequest {
+    Transactions(Vec<Box<TransactionMsg>>),
+    AddDocument(DocumentId, DeviceIntSize),
+    DeleteDocument(DocumentId),
+    GetGlyphDimensions(GlyphDimensionRequest),
+    GetGlyphIndices(GlyphIndexRequest),
+    ClearNamespace(IdNamespace),
+    SimulateLongSceneBuild(u32),
+    ExternalEvent(ExternalEvent),
+    WakeUp,
+    StopRenderBackend,
+    ShutDown(Option<Sender<()>>),
+    Flush(Sender<()>),
+    SetFrameBuilderConfig(FrameBuilderConfig),
+    SetParameter(Parameter),
+    ReportMemory(Box<MemoryReport>, Sender<Box<MemoryReport>>),
+    #[cfg(feature = "capture")]
+    SaveScene(CaptureConfig),
+    #[cfg(feature = "replay")]
+    LoadScenes(Vec<LoadScene>),
+    #[cfg(feature = "capture")]
+    StartCaptureSequence(CaptureConfig),
+    #[cfg(feature = "capture")]
+    StopCaptureSequence,
+}
+
+// Message from scene builder to render backend.
+pub enum SceneBuilderResult {
+    Transactions(Vec<Box<BuiltTransaction>>, Option<Sender<SceneSwapResult>>),
+    ExternalEvent(ExternalEvent),
+    FlushComplete(Sender<()>),
+    DeleteDocument(DocumentId),
+    ClearNamespace(IdNamespace),
+    GetGlyphDimensions(GlyphDimensionRequest),
+    GetGlyphIndices(GlyphIndexRequest),
+    SetParameter(Parameter),
+    StopRenderBackend,
+    ShutDown(Option<Sender<()>>),
+
+    #[cfg(feature = "capture")]
+    /// The same as `Transactions`, but also supplies a `CaptureConfig` that the
+    /// render backend should use for sequence capture, until the next
+    /// `CapturedTransactions` or `StopCaptureSequence` result.
+    CapturedTransactions(Vec<Box<BuiltTransaction>>, CaptureConfig, Option<Sender<SceneSwapResult>>),
+
+    #[cfg(feature = "capture")]
+    /// The scene builder has stopped sequence capture, so the render backend
+    /// should do the same.
+    StopCaptureSequence,
+}
+
+// Message from render backend to scene builder to indicate the
+// scene swap was completed. We need a separate channel for this
+// so that they don't get mixed with SceneBuilderRequest messages.
+pub enum SceneSwapResult {
+    Complete(Sender<()>),
+    Aborted,
+}
+
+macro_rules! declare_interners {
+    ( $( $name:ident : $ty:ident, )+ ) => {
+        /// This struct contains all items that can be shared between
+        /// display lists. We want to intern and share the same clips,
+        /// primitives and other things between display lists so that:
+        /// - GPU cache handles remain valid, reducing GPU cache updates.
+        /// - Comparison of primitives and pictures between two
+        ///   display lists is (a) fast (b) done during scene building.
+        #[cfg_attr(feature = "capture", derive(Serialize))]
+        #[cfg_attr(feature = "replay", derive(Deserialize))]
+        #[derive(Default)]
+        pub struct Interners {
+            $(
+                pub $name: Interner<$ty>,
+            )+
+        }
+
+        $(
+            impl AsMut<Interner<$ty>> for Interners {
+                fn as_mut(&mut self) -> &mut Interner<$ty> {
+                    &mut self.$name
+                }
+            }
+        )+
+
+        pub struct InternerUpdates {
+            $(
+                pub $name: UpdateList<<$ty as Internable>::Key>,
+            )+
+        }
+
+        impl Interners {
+            /// Reports CPU heap memory used by the interners.
+            fn report_memory(
+                &self,
+                ops: &mut MallocSizeOfOps,
+                r: &mut MemoryReport,
+            ) {
+                $(
+                    r.interning.interners.$name += self.$name.size_of(ops);
+                )+
+            }
+
+            fn end_frame_and_get_pending_updates(&mut self) -> InternerUpdates {
+                InternerUpdates {
+                    $(
+                        $name: self.$name.end_frame_and_get_pending_updates(),
+                    )+
+                }
+            }
+        }
+    }
+}
+
+crate::enumerate_interners!(declare_interners);
+
+// A document in the scene builder contains the current scene,
+// as well as a persistent clip interner. This allows clips
+// to be de-duplicated, and persisted in the GPU cache between
+// display lists.
+struct Document {
+    scene: Scene,
+    interners: Interners,
+    stats: SceneStats,
+    view: SceneView,
+    spatial_tree: SceneSpatialTree,
+}
+
+impl Document {
+    fn new(device_rect: DeviceIntRect) -> Self {
+        Document {
+            scene: Scene::new(),
+            interners: Interners::default(),
+            stats: SceneStats::empty(),
+            spatial_tree: SceneSpatialTree::new(),
+            view: SceneView {
+                device_rect,
+                quality_settings: QualitySettings::default(),
+            },
+        }
+    }
+}
+
+pub struct SceneBuilderThread {
+    documents: FastHashMap<DocumentId, Document>,
+    rx: Receiver<SceneBuilderRequest>,
+    tx: Sender<ApiMsg>,
+    config: FrameBuilderConfig,
+    fonts: SharedFontResources,
+    size_of_ops: Option<MallocSizeOfOps>,
+    hooks: Option<Box<dyn SceneBuilderHooks + Send>>,
+    simulate_slow_ms: u32,
+    removed_pipelines: FastHashSet<PipelineId>,
+    #[cfg(feature = "capture")]
+    capture_config: Option<CaptureConfig>,
+}
+
+pub struct SceneBuilderThreadChannels {
+    rx: Receiver<SceneBuilderRequest>,
+    tx: Sender<ApiMsg>,
+}
+
+impl SceneBuilderThreadChannels {
+    pub fn new(
+        tx: Sender<ApiMsg>
+    ) -> (Self, Sender<SceneBuilderRequest>) {
+        let (in_tx, in_rx) = unbounded_channel();
+        (
+            Self {
+                rx: in_rx,
+                tx,
+            },
+            in_tx,
+        )
+    }
+}
+
+impl SceneBuilderThread {
+    pub fn new(
+        config: FrameBuilderConfig,
+        fonts: SharedFontResources,
+        size_of_ops: Option<MallocSizeOfOps>,
+        hooks: Option<Box<dyn SceneBuilderHooks + Send>>,
+        channels: SceneBuilderThreadChannels,
+    ) -> Self {
+        let SceneBuilderThreadChannels { rx, tx } = channels;
+
+        Self {
+            documents: Default::default(),
+            rx,
+            tx,
+            config,
+            fonts,
+            size_of_ops,
+            hooks,
+            simulate_slow_ms: 0,
+            removed_pipelines: FastHashSet::default(),
+            #[cfg(feature = "capture")]
+            capture_config: None,
+        }
+    }
+
+    /// Send a message to the render backend thread.
+    ///
+    /// We first put something in the result queue and then send a wake-up
+    /// message to the api queue that the render backend is blocking on.
+    pub fn send(&self, msg: SceneBuilderResult) {
+        self.tx.send(ApiMsg::SceneBuilderResult(msg)).unwrap();
+    }
+
+    /// The scene builder thread's event loop.
+    pub fn run(&mut self) {
+        if let Some(ref hooks) = self.hooks {
+            hooks.register();
+        }
+
+        loop {
+            tracy_begin_frame!("scene_builder_thread");
+
+            match self.rx.recv() {
+                Ok(SceneBuilderRequest::WakeUp) => {}
+                Ok(SceneBuilderRequest::Flush(tx)) => {
+                    self.send(SceneBuilderResult::FlushComplete(tx));
+                }
+                Ok(SceneBuilderRequest::Transactions(txns)) => {
+                    let built_txns : Vec<Box<BuiltTransaction>> = txns.into_iter()
+                        .map(|txn| self.process_transaction(*txn))
+                        .collect();
+                    #[cfg(feature = "capture")]
+                    match built_txns.iter().any(|txn| txn.built_scene.is_some()) {
+                        true => self.save_capture_sequence(),
+                        _ => {},
+                    }
+                    self.forward_built_transactions(built_txns);
+                }
+                Ok(SceneBuilderRequest::AddDocument(document_id, initial_size)) => {
+                    let old = self.documents.insert(document_id, Document::new(
+                        initial_size.into(),
+                    ));
+                    debug_assert!(old.is_none());
+                }
+                Ok(SceneBuilderRequest::DeleteDocument(document_id)) => {
+                    self.documents.remove(&document_id);
+                    self.send(SceneBuilderResult::DeleteDocument(document_id));
+                }
+                Ok(SceneBuilderRequest::ClearNamespace(id)) => {
+                    self.documents.retain(|doc_id, _doc| doc_id.namespace_id != id);
+                    self.send(SceneBuilderResult::ClearNamespace(id));
+                }
+                Ok(SceneBuilderRequest::ExternalEvent(evt)) => {
+                    self.send(SceneBuilderResult::ExternalEvent(evt));
+                }
+                Ok(SceneBuilderRequest::GetGlyphDimensions(request)) => {
+                    self.send(SceneBuilderResult::GetGlyphDimensions(request));
+                }
+                Ok(SceneBuilderRequest::GetGlyphIndices(request)) => {
+                    self.send(SceneBuilderResult::GetGlyphIndices(request));
+                }
+                Ok(SceneBuilderRequest::StopRenderBackend) => {
+                    self.send(SceneBuilderResult::StopRenderBackend);
+                }
+                Ok(SceneBuilderRequest::ShutDown(sync)) => {
+                    self.send(SceneBuilderResult::ShutDown(sync));
+                    break;
+                }
+                Ok(SceneBuilderRequest::SimulateLongSceneBuild(time_ms)) => {
+                    self.simulate_slow_ms = time_ms
+                }
+                Ok(SceneBuilderRequest::ReportMemory(mut report, tx)) => {
+                    (*report) += self.report_memory();
+                    tx.send(report).unwrap();
+                }
+                Ok(SceneBuilderRequest::SetFrameBuilderConfig(cfg)) => {
+                    self.config = cfg;
+                }
+                Ok(SceneBuilderRequest::SetParameter(prop)) => {
+                    self.send(SceneBuilderResult::SetParameter(prop));
+                }
+                #[cfg(feature = "replay")]
+                Ok(SceneBuilderRequest::LoadScenes(msg)) => {
+                    self.load_scenes(msg);
+                }
+                #[cfg(feature = "capture")]
+                Ok(SceneBuilderRequest::SaveScene(config)) => {
+                    self.save_scene(config);
+                }
+                #[cfg(feature = "capture")]
+                Ok(SceneBuilderRequest::StartCaptureSequence(config)) => {
+                    self.start_capture_sequence(config);
+                }
+                #[cfg(feature = "capture")]
+                Ok(SceneBuilderRequest::StopCaptureSequence) => {
+                    // FIXME(aosmond): clear config for frames and resource cache without scene
+                    // rebuild?
+                    self.capture_config = None;
+                    self.send(SceneBuilderResult::StopCaptureSequence);
+                }
+                Err(_) => {
+                    break;
+                }
+            }
+
+            if let Some(ref hooks) = self.hooks {
+                hooks.poke();
+            }
+
+            tracy_end_frame!("scene_builder_thread");
+        }
+
+        if let Some(ref hooks) = self.hooks {
+            hooks.deregister();
+        }
+    }
+
+    #[cfg(feature = "capture")]
+    fn save_scene(&mut self, config: CaptureConfig) {
+        for (id, doc) in &self.documents {
+            let interners_name = format!("interners-{}-{}", id.namespace_id.0, id.id);
+            config.serialize_for_scene(&doc.interners, interners_name);
+
+            let scene_spatial_tree_name = format!("scene-spatial-tree-{}-{}", id.namespace_id.0, id.id);
+            config.serialize_for_scene(&doc.spatial_tree, scene_spatial_tree_name);
+
+            use crate::render_api::CaptureBits;
+            if config.bits.contains(CaptureBits::SCENE) {
+                let file_name = format!("scene-{}-{}", id.namespace_id.0, id.id);
+                config.serialize_for_scene(&doc.scene, file_name);
+            }
+        }
+    }
+
+    #[cfg(feature = "replay")]
+    fn load_scenes(&mut self, scenes: Vec<LoadScene>) {
+        for mut item in scenes {
+            self.config = item.config;
+
+            let mut built_scene = None;
+            let mut interner_updates = None;
+            let mut spatial_tree_updates = None;
+
+            if item.scene.has_root_pipeline() {
+                built_scene = Some(SceneBuilder::build(
+                    &item.scene,
+                    item.fonts,
+                    &item.view,
+                    &self.config,
+                    &mut item.interners,
+                    &mut item.spatial_tree,
+                    &SceneStats::empty(),
+                ));
+
+                interner_updates = Some(
+                    item.interners.end_frame_and_get_pending_updates()
+                );
+
+                spatial_tree_updates = Some(
+                    item.spatial_tree.end_frame_and_get_pending_updates()
+                );
+            }
+
+            self.documents.insert(
+                item.document_id,
+                Document {
+                    scene: item.scene,
+                    interners: item.interners,
+                    stats: SceneStats::empty(),
+                    view: item.view.clone(),
+                    spatial_tree: item.spatial_tree,
+                },
+            );
+
+            let txns = vec![Box::new(BuiltTransaction {
+                document_id: item.document_id,
+                render_frame: item.build_frame,
+                invalidate_rendered_frame: false,
+                built_scene,
+                view: item.view,
+                resource_updates: Vec::new(),
+                rasterized_blobs: Vec::new(),
+                blob_rasterizer: None,
+                frame_ops: Vec::new(),
+                removed_pipelines: Vec::new(),
+                notifications: Vec::new(),
+                interner_updates,
+                spatial_tree_updates,
+                profile: TransactionProfile::new(),
+                frame_stats: FullFrameStats::default(),
+            })];
+
+            self.forward_built_transactions(txns);
+        }
+    }
+
+    #[cfg(feature = "capture")]
+    fn save_capture_sequence(
+        &mut self,
+    ) {
+        if let Some(ref mut config) = self.capture_config {
+            config.prepare_scene();
+            for (id, doc) in &self.documents {
+                let interners_name = format!("interners-{}-{}", id.namespace_id.0, id.id);
+                config.serialize_for_scene(&doc.interners, interners_name);
+
+                use crate::render_api::CaptureBits;
+                if config.bits.contains(CaptureBits::SCENE) {
+                    let file_name = format!("scene-{}-{}", id.namespace_id.0, id.id);
+                    config.serialize_for_scene(&doc.scene, file_name);
+                }
+            }
+        }
+    }
+
+    #[cfg(feature = "capture")]
+    fn start_capture_sequence(
+        &mut self,
+        config: CaptureConfig,
+    ) {
+        self.capture_config = Some(config);
+        self.save_capture_sequence();
+    }
+
+    /// Do the bulk of the work of the scene builder thread.
+    fn process_transaction(&mut self, mut txn: TransactionMsg) -> Box<BuiltTransaction> {
+        profile_scope!("process_transaction");
+
+        if let Some(ref hooks) = self.hooks {
+            hooks.pre_scene_build();
+        }
+
+        let doc = self.documents.get_mut(&txn.document_id).unwrap();
+        let scene = &mut doc.scene;
+
+        let mut profile = txn.profile.take();
+
+        let scene_build_start = precise_time_ns();
+        let mut removed_pipelines = Vec::new();
+        let mut rebuild_scene = false;
+        let mut frame_stats = FullFrameStats::default();
+
+        for message in txn.scene_ops.drain(..) {
+            match message {
+                SceneMsg::UpdateEpoch(pipeline_id, epoch) => {
+                    scene.update_epoch(pipeline_id, epoch);
+                }
+                SceneMsg::SetQualitySettings { settings } => {
+                    doc.view.quality_settings = settings;
+                }
+                SceneMsg::SetDocumentView { device_rect } => {
+                    doc.view.device_rect = device_rect;
+                }
+                SceneMsg::SetDisplayList {
+                    epoch,
+                    pipeline_id,
+                    background,
+                    viewport_size,
+                    display_list,
+                } => {
+                    let (builder_start_time_ns, builder_end_time_ns, send_time_ns) =
+                      display_list.times();
+                    let content_send_time = profiler::ns_to_ms(precise_time_ns() - send_time_ns);
+                    let dl_build_time = profiler::ns_to_ms(builder_end_time_ns - builder_start_time_ns);
+                    profile.set(profiler::CONTENT_SEND_TIME, content_send_time);
+                    profile.set(profiler::DISPLAY_LIST_BUILD_TIME, dl_build_time);
+                    profile.set(profiler::DISPLAY_LIST_MEM, profiler::bytes_to_mb(display_list.size_in_bytes()));
+
+                    let (gecko_display_list_time, full_display_list) = display_list.gecko_display_list_stats();
+                    frame_stats.full_display_list = full_display_list;
+                    frame_stats.gecko_display_list_time = gecko_display_list_time;
+                    frame_stats.wr_display_list_time += dl_build_time;
+
+                    if self.removed_pipelines.contains(&pipeline_id) {
+                        continue;
+                    }
+
+                    // Note: We could further reduce the amount of unnecessary scene
+                    // building by keeping track of which pipelines are used by the
+                    // scene (bug 1490751).
+                    rebuild_scene = true;
+
+                    scene.set_display_list(
+                        pipeline_id,
+                        epoch,
+                        display_list,
+                        background,
+                        viewport_size,
+                    );
+                }
+                SceneMsg::SetRootPipeline(pipeline_id) => {
+                    if scene.root_pipeline_id != Some(pipeline_id) {
+                        rebuild_scene = true;
+                        scene.set_root_pipeline_id(pipeline_id);
+                    }
+                }
+                SceneMsg::RemovePipeline(pipeline_id) => {
+                    scene.remove_pipeline(pipeline_id);
+                    self.removed_pipelines.insert(pipeline_id);
+                    removed_pipelines.push((pipeline_id, txn.document_id));
+                }
+            }
+        }
+
+        self.removed_pipelines.clear();
+
+        let mut built_scene = None;
+        let mut interner_updates = None;
+        let mut spatial_tree_updates = None;
+
+        if scene.has_root_pipeline() && rebuild_scene {
+
+            let built = SceneBuilder::build(
+                &scene,
+                self.fonts.clone(),
+                &doc.view,
+                &self.config,
+                &mut doc.interners,
+                &mut doc.spatial_tree,
+                &doc.stats,
+            );
+
+            // Update the allocation stats for next scene
+            doc.stats = built.get_stats();
+
+            // Retrieve the list of updates from the clip interner.
+            interner_updates = Some(
+                doc.interners.end_frame_and_get_pending_updates()
+            );
+
+            spatial_tree_updates = Some(
+                doc.spatial_tree.end_frame_and_get_pending_updates()
+            );
+
+            built_scene = Some(built);
+        }
+
+        let scene_build_time_ms =
+            profiler::ns_to_ms(precise_time_ns() - scene_build_start);
+        profile.set(profiler::SCENE_BUILD_TIME, scene_build_time_ms);
+
+        frame_stats.scene_build_time += scene_build_time_ms;
+
+        if !txn.blob_requests.is_empty() {
+            profile.start_time(profiler::BLOB_RASTERIZATION_TIME);
+
+            let is_low_priority = false;
+            rasterize_blobs(&mut txn, is_low_priority);
+
+            profile.end_time(profiler::BLOB_RASTERIZATION_TIME);
+            Telemetry::record_rasterize_blobs_time(Duration::from_micros((profile.get(profiler::BLOB_RASTERIZATION_TIME).unwrap() * 1000.00) as u64));
+        }
+
+        drain_filter(
+            &mut txn.notifications,
+            |n| { n.when() == Checkpoint::SceneBuilt },
+            |n| { n.notify(); },
+        );
+
+        if self.simulate_slow_ms > 0 {
+            thread::sleep(Duration::from_millis(self.simulate_slow_ms as u64));
+        }
+
+        Box::new(BuiltTransaction {
+            document_id: txn.document_id,
+            render_frame: txn.generate_frame.as_bool(),
+            invalidate_rendered_frame: txn.invalidate_rendered_frame,
+            built_scene,
+            view: doc.view,
+            rasterized_blobs: txn.rasterized_blobs,
+            resource_updates: txn.resource_updates,
+            blob_rasterizer: txn.blob_rasterizer,
+            frame_ops: txn.frame_ops,
+            removed_pipelines,
+            notifications: txn.notifications,
+            interner_updates,
+            spatial_tree_updates,
+            profile,
+            frame_stats,
+        })
+    }
+
+    /// Send the results of process_transaction back to the render backend.
+    fn forward_built_transactions(&mut self, txns: Vec<Box<BuiltTransaction>>) {
+        let (pipeline_info, result_tx, result_rx) = match self.hooks {
+            Some(ref hooks) => {
+                if txns.iter().any(|txn| txn.built_scene.is_some()) {
+                    let info = PipelineInfo {
+                        epochs: txns.iter()
+                            .filter(|txn| txn.built_scene.is_some())
+                            .map(|txn| {
+                                txn.built_scene.as_ref().unwrap()
+                                    .pipeline_epochs.iter()
+                                    .zip(iter::repeat(txn.document_id))
+                                    .map(|((&pipeline_id, &epoch), document_id)| ((pipeline_id, document_id), epoch))
+                            }).flatten().collect(),
+                        removed_pipelines: txns.iter()
+                            .map(|txn| txn.removed_pipelines.clone())
+                            .flatten().collect(),
+                    };
+
+                    let (tx, rx) = single_msg_channel();
+                    let txn = txns.iter().find(|txn| txn.built_scene.is_some()).unwrap();
+                    Telemetry::record_scenebuild_time(Duration::from_millis(txn.profile.get(profiler::SCENE_BUILD_TIME).unwrap() as u64));
+                    hooks.pre_scene_swap();
+
+                    (Some(info), Some(tx), Some(rx))
+                } else {
+                    (None, None, None)
+                }
+            }
+            _ => (None, None, None)
+        };
+
+        let timer_id = Telemetry::start_sceneswap_time();
+        let document_ids = txns.iter().map(|txn| txn.document_id).collect();
+        let have_resources_updates : Vec<DocumentId> = if pipeline_info.is_none() {
+            txns.iter()
+                .filter(|txn| !txn.resource_updates.is_empty() || txn.invalidate_rendered_frame)
+                .map(|txn| txn.document_id)
+                .collect()
+        } else {
+            Vec::new()
+        };
+
+        #[cfg(feature = "capture")]
+        match self.capture_config {
+            Some(ref config) => self.send(SceneBuilderResult::CapturedTransactions(txns, config.clone(), result_tx)),
+            None => self.send(SceneBuilderResult::Transactions(txns, result_tx)),
+        };
+
+        #[cfg(not(feature = "capture"))]
+        self.send(SceneBuilderResult::Transactions(txns, result_tx));
+
+        if let Some(pipeline_info) = pipeline_info {
+            // Block until the swap is done, then invoke the hook.
+            let swap_result = result_rx.unwrap().recv();
+            Telemetry::stop_and_accumulate_sceneswap_time(timer_id);
+            self.hooks.as_ref().unwrap().post_scene_swap(&document_ids,
+                                                         pipeline_info);
+            // Once the hook is done, allow the RB thread to resume
+            if let Ok(SceneSwapResult::Complete(resume_tx)) = swap_result {
+                resume_tx.send(()).ok();
+            }
+        } else {
+            Telemetry::cancel_sceneswap_time(timer_id);
+            if !have_resources_updates.is_empty() {
+                if let Some(ref hooks) = self.hooks {
+                    hooks.post_resource_update(&have_resources_updates);
+                }
+            } else if let Some(ref hooks) = self.hooks {
+                hooks.post_empty_scene_build();
+            }
+        }
+    }
+
+    /// Reports CPU heap memory used by the SceneBuilder.
+    fn report_memory(&mut self) -> MemoryReport {
+        let ops = self.size_of_ops.as_mut().unwrap();
+        let mut report = MemoryReport::default();
+        for doc in self.documents.values() {
+            doc.interners.report_memory(ops, &mut report);
+            doc.scene.report_memory(ops, &mut report);
+        }
+
+        report
+    }
+}
+
+/// A scene builder thread which executes expensive operations such as blob rasterization
+/// with a lower priority than the normal scene builder thread.
+///
+/// After rasterizing blobs, the secene building request is forwarded to the normal scene
+/// builder where the FrameBuilder is generated.
+pub struct LowPrioritySceneBuilderThread {
+    pub rx: Receiver<SceneBuilderRequest>,
+    pub tx: Sender<SceneBuilderRequest>,
+}
+
+impl LowPrioritySceneBuilderThread {
+    pub fn run(&mut self) {
+        loop {
+            match self.rx.recv() {
+                Ok(SceneBuilderRequest::Transactions(mut txns)) => {
+                    let txns : Vec<Box<TransactionMsg>> = txns.drain(..)
+                        .map(|txn| self.process_transaction(txn))
+                        .collect();
+                    self.tx.send(SceneBuilderRequest::Transactions(txns)).unwrap();
+                }
+                Ok(SceneBuilderRequest::ShutDown(sync)) => {
+                    self.tx.send(SceneBuilderRequest::ShutDown(sync)).unwrap();
+                    break;
+                }
+                Ok(other) => {
+                    self.tx.send(other).unwrap();
+                }
+                Err(_) => {
+                    break;
+                }
+            }
+        }
+    }
+
+    fn process_transaction(&mut self, mut txn: Box<TransactionMsg>) -> Box<TransactionMsg> {
+        let is_low_priority = true;
+        txn.profile.start_time(profiler::BLOB_RASTERIZATION_TIME);
+        rasterize_blobs(&mut txn, is_low_priority);
+        txn.profile.end_time(profiler::BLOB_RASTERIZATION_TIME);
+        Telemetry::record_rasterize_blobs_time(Duration::from_micros((txn.profile.get(profiler::BLOB_RASTERIZATION_TIME).unwrap() * 1000.00) as u64));
+        txn.blob_requests = Vec::new();
+
+        txn
+    }
+}
diff --git a/gfx/wr/webrender/src/scene_building.rs b/gfx/wr/webrender/src/scene_building.rs
new file mode 100644
index 0000000000..8ef2cee560
--- /dev/null
+++ b/gfx/wr/webrender/src/scene_building.rs
@@ -0,0 +1,4128 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! # Scene building
+//!
+//! Scene building is the phase during which display lists, a representation built for
+//! serialization, are turned into a scene, webrender's internal representation that is
+//! suited for rendering frames.
+//!
+//! This phase is happening asynchronously on the scene builder thread.
+//!
+//! # General algorithm
+//!
+//! The important aspects of scene building are:
+//! - Building up primitive lists (much of the cost of scene building goes here).
+//! - Creating pictures for content that needs to be rendered into a surface, be it so that
+//!   filters can be applied or for caching purposes.
+//! - Maintaining a temporary stack of stacking contexts to keep track of some of the
+//!   drawing states.
+//! - Stitching multiple display lists which reference each other (without cycles) into
+//!   a single scene (see build_reference_frame).
+//! - Interning, which detects when some of the retained state stays the same between display
+//!   lists.
+//!
+//! The scene builder linearly traverses the serialized display list which is naturally
+//! ordered back-to-front, accumulating primitives in the top-most stacking context's
+//! primitive list.
+//! At the end of each stacking context (see pop_stacking_context), its primitive list is
+//! either handed over to a picture if one is created, or it is concatenated into the parent
+//! stacking context's primitive list.
+//!
+//! The flow of the algorithm is mostly linear except when handling:
+//!  - shadow stacks (see push_shadow and pop_all_shadows),
+//!  - backdrop filters (see add_backdrop_filter)
+//!
+
+use api::{AlphaType, BorderDetails, BorderDisplayItem, BuiltDisplayListIter, BuiltDisplayList, PrimitiveFlags};
+use api::{ClipId, ColorF, CommonItemProperties, ComplexClipRegion, ComponentTransferFuncType, RasterSpace};
+use api::{DisplayItem, DisplayItemRef, ExtendMode, ExternalScrollId, FilterData};
+use api::{FilterOp, FilterPrimitive, FontInstanceKey, FontSize, GlyphInstance, GlyphOptions, GradientStop};
+use api::{IframeDisplayItem, ImageKey, ImageRendering, ItemRange, ColorDepth, QualitySettings};
+use api::{LineOrientation, LineStyle, NinePatchBorderSource, PipelineId, MixBlendMode, StackingContextFlags};
+use api::{PropertyBinding, ReferenceFrameKind, ScrollFrameDescriptor, ReferenceFrameMapper};
+use api::{APZScrollGeneration, HasScrollLinkedEffect, Shadow, SpatialId, StickyFrameDescriptor, ImageMask, ItemTag};
+use api::{ClipMode, PrimitiveKeyKind, TransformStyle, YuvColorSpace, ColorRange, YuvData, TempFilterData};
+use api::{ReferenceTransformBinding, Rotation, FillRule, SpatialTreeItem, ReferenceFrameDescriptor};
+use api::units::*;
+use crate::image_tiling::simplify_repeated_primitive;
+use crate::clip::{ClipItemKey, ClipStore, ClipItemKeyKind};
+use crate::clip::{ClipInternData, ClipNodeId, ClipLeafId};
+use crate::clip::{PolygonDataHandle, ClipTreeBuilder};
+use crate::segment::EdgeAaSegmentMask;
+use crate::spatial_tree::{SceneSpatialTree, SpatialNodeContainer, SpatialNodeIndex, get_external_scroll_offset};
+use crate::frame_builder::{FrameBuilderConfig};
+use glyph_rasterizer::{FontInstance, SharedFontResources};
+use crate::hit_test::HitTestingScene;
+use crate::intern::Interner;
+use crate::internal_types::{FastHashMap, LayoutPrimitiveInfo, Filter, PlaneSplitterIndex, PipelineInstanceId};
+use crate::picture::{Picture3DContext, PictureCompositeMode, PicturePrimitive};
+use crate::picture::{BlitReason, OrderedPictureChild, PrimitiveList, SurfaceInfo, PictureFlags};
+use crate::picture_graph::PictureGraph;
+use crate::prim_store::{PrimitiveInstance};
+use crate::prim_store::{PrimitiveInstanceKind, NinePatchDescriptor, PrimitiveStore};
+use crate::prim_store::{InternablePrimitive, SegmentInstanceIndex, PictureIndex};
+use crate::prim_store::{PolygonKey};
+use crate::prim_store::backdrop::{BackdropCapture, BackdropRender};
+use crate::prim_store::borders::{ImageBorder, NormalBorderPrim};
+use crate::prim_store::gradient::{
+    GradientStopKey, LinearGradient, RadialGradient, RadialGradientParams, ConicGradient,
+    ConicGradientParams, optimize_radial_gradient, apply_gradient_local_clip,
+    optimize_linear_gradient, self,
+};
+use crate::prim_store::image::{Image, YuvImage};
+use crate::prim_store::line_dec::{LineDecoration, LineDecorationCacheKey, get_line_decoration_size};
+use crate::prim_store::picture::{Picture, PictureCompositeKey, PictureKey};
+use crate::prim_store::text_run::TextRun;
+use crate::render_backend::SceneView;
+use crate::resource_cache::ImageRequest;
+use crate::scene::{Scene, ScenePipeline, BuiltScene, SceneStats, StackingContextHelpers};
+use crate::scene_builder_thread::Interners;
+use crate::space::SpaceSnapper;
+use crate::spatial_node::{
+    ReferenceFrameInfo, StickyFrameInfo, ScrollFrameKind, SpatialNodeUid, SpatialNodeType
+};
+use crate::tile_cache::TileCacheBuilder;
+use euclid::approxeq::ApproxEq;
+use std::{f32, mem, usize};
+use std::collections::vec_deque::VecDeque;
+use std::sync::Arc;
+use crate::util::{VecHelper};
+use crate::filterdata::{SFilterDataComponent, SFilterData, SFilterDataKey};
+
+/// Offsets primitives (and clips) by the external scroll offset
+/// supplied to scroll nodes.
+pub struct ScrollOffsetMapper {
+    pub current_spatial_node: SpatialNodeIndex,
+    pub current_offset: LayoutVector2D,
+}
+
+impl ScrollOffsetMapper {
+    fn new() -> Self {
+        ScrollOffsetMapper {
+            current_spatial_node: SpatialNodeIndex::INVALID,
+            current_offset: LayoutVector2D::zero(),
+        }
+    }
+
+    /// Return the accumulated external scroll offset for a spatial
+    /// node. This caches the last result, which is the common case,
+    /// or defers to the spatial tree to build the value.
+    fn external_scroll_offset(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        spatial_tree: &SceneSpatialTree,
+    ) -> LayoutVector2D {
+        if spatial_node_index != self.current_spatial_node {
+            self.current_spatial_node = spatial_node_index;
+            self.current_offset = get_external_scroll_offset(spatial_tree, spatial_node_index);
+        }
+
+        self.current_offset
+    }
+}
+
+/// A data structure that keeps track of mapping between API Ids for spatials and the indices
+/// used internally in the SpatialTree to avoid having to do HashMap lookups for primitives
+/// and clips during frame building.
+#[derive(Default)]
+pub struct NodeIdToIndexMapper {
+    spatial_node_map: FastHashMap<SpatialId, SpatialNodeIndex>,
+}
+
+impl NodeIdToIndexMapper {
+    fn add_spatial_node(&mut self, id: SpatialId, index: SpatialNodeIndex) {
+        let _old_value = self.spatial_node_map.insert(id, index);
+        assert!(_old_value.is_none());
+    }
+
+    fn get_spatial_node_index(&self, id: SpatialId) -> SpatialNodeIndex {
+        self.spatial_node_map[&id]
+    }
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct CompositeOps {
+    // Requires only a single texture as input (e.g. most filters)
+    pub filters: Vec<Filter>,
+    pub filter_datas: Vec<FilterData>,
+    pub filter_primitives: Vec<FilterPrimitive>,
+
+    // Requires two source textures (e.g. mix-blend-mode)
+    pub mix_blend_mode: Option<MixBlendMode>,
+}
+
+impl CompositeOps {
+    pub fn new(
+        filters: Vec<Filter>,
+        filter_datas: Vec<FilterData>,
+        filter_primitives: Vec<FilterPrimitive>,
+        mix_blend_mode: Option<MixBlendMode>
+    ) -> Self {
+        CompositeOps {
+            filters,
+            filter_datas,
+            filter_primitives,
+            mix_blend_mode,
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.filters.is_empty() &&
+            self.filter_primitives.is_empty() &&
+            self.mix_blend_mode.is_none()
+    }
+
+    /// Returns true if this CompositeOps contains any filters that affect
+    /// the content (false if no filters, or filters are all no-ops).
+    fn has_valid_filters(&self) -> bool {
+        // For each filter, create a new image with that composite mode.
+        let mut current_filter_data_index = 0;
+        for filter in &self.filters {
+            match filter {
+                Filter::ComponentTransfer => {
+                    let filter_data =
+                        &self.filter_datas[current_filter_data_index];
+                    let filter_data = filter_data.sanitize();
+                    current_filter_data_index = current_filter_data_index + 1;
+                    if filter_data.is_identity() {
+                        continue
+                    } else {
+                        return true;
+                    }
+                }
+                _ => {
+                    if filter.is_noop() {
+                        continue;
+                    } else {
+                        return true;
+                    }
+                }
+            }
+        }
+
+        if !self.filter_primitives.is_empty() {
+            return true;
+        }
+
+        false
+    }
+}
+
+/// Represents the current input for a picture chain builder (either a
+/// prim list from the stacking context, or a wrapped picture instance).
+enum PictureSource {
+    PrimitiveList {
+        prim_list: PrimitiveList,
+    },
+    WrappedPicture {
+        instance: PrimitiveInstance,
+    },
+}
+
+/// Helper struct to build picture chains during scene building from
+/// a flattened stacking context struct.
+struct PictureChainBuilder {
+    /// The current input source for the next picture
+    current: PictureSource,
+
+    /// Positioning node for this picture chain
+    spatial_node_index: SpatialNodeIndex,
+    /// Prim flags for any pictures in this chain
+    flags: PrimitiveFlags,
+    /// Requested raster space for enclosing stacking context
+    raster_space: RasterSpace,
+    /// If true, set first picture as a resolve target
+    set_resolve_target: bool,
+    /// If true, mark the last picture as a sub-graph
+    establishes_sub_graph: bool,
+}
+
+impl PictureChainBuilder {
+    /// Create a new picture chain builder, from a primitive list
+    fn from_prim_list(
+        prim_list: PrimitiveList,
+        flags: PrimitiveFlags,
+        spatial_node_index: SpatialNodeIndex,
+        raster_space: RasterSpace,
+        is_sub_graph: bool,
+    ) -> Self {
+        PictureChainBuilder {
+            current: PictureSource::PrimitiveList {
+                prim_list,
+            },
+            spatial_node_index,
+            flags,
+            raster_space,
+            establishes_sub_graph: is_sub_graph,
+            set_resolve_target: is_sub_graph,
+        }
+    }
+
+    /// Create a new picture chain builder, from a picture wrapper instance
+    fn from_instance(
+        instance: PrimitiveInstance,
+        flags: PrimitiveFlags,
+        spatial_node_index: SpatialNodeIndex,
+        raster_space: RasterSpace,
+    ) -> Self {
+        PictureChainBuilder {
+            current: PictureSource::WrappedPicture {
+                instance,
+            },
+            flags,
+            spatial_node_index,
+            raster_space,
+            establishes_sub_graph: false,
+            set_resolve_target: false,
+        }
+    }
+
+    /// Wrap the existing content with a new picture with the given parameters
+    #[must_use]
+    fn add_picture(
+        self,
+        composite_mode: PictureCompositeMode,
+        clip_node_id: ClipNodeId,
+        context_3d: Picture3DContext<OrderedPictureChild>,
+        interners: &mut Interners,
+        prim_store: &mut PrimitiveStore,
+        prim_instances: &mut Vec<PrimitiveInstance>,
+        clip_tree_builder: &mut ClipTreeBuilder,
+    ) -> PictureChainBuilder {
+        let prim_list = match self.current {
+            PictureSource::PrimitiveList { prim_list } => {
+                prim_list
+            }
+            PictureSource::WrappedPicture { instance } => {
+                let mut prim_list = PrimitiveList::empty();
+
+                prim_list.add_prim(
+                    instance,
+                    LayoutRect::zero(),
+                    self.spatial_node_index,
+                    self.flags,
+                    prim_instances,
+                    clip_tree_builder,
+                );
+
+                prim_list
+            }
+        };
+
+        let flags = if self.set_resolve_target {
+            PictureFlags::IS_RESOLVE_TARGET
+        } else {
+            PictureFlags::empty()
+        };
+
+        let pic_index = PictureIndex(prim_store.pictures
+            .alloc()
+            .init(PicturePrimitive::new_image(
+                Some(composite_mode.clone()),
+                context_3d,
+                self.flags,
+                prim_list,
+                self.spatial_node_index,
+                self.raster_space,
+                flags,
+            ))
+        );
+
+        let instance = create_prim_instance(
+            pic_index,
+            Some(composite_mode).into(),
+            self.raster_space,
+            clip_node_id,
+            interners,
+            clip_tree_builder,
+        );
+
+        PictureChainBuilder {
+            current: PictureSource::WrappedPicture {
+                instance,
+            },
+            spatial_node_index: self.spatial_node_index,
+            flags: self.flags,
+            raster_space: self.raster_space,
+            // We are now on a subsequent picture, so set_resolve_target has been handled
+            set_resolve_target: false,
+            establishes_sub_graph: self.establishes_sub_graph,
+        }
+    }
+
+    /// Finish building this picture chain. Set the clip chain on the outermost picture
+    fn finalize(
+        self,
+        clip_node_id: ClipNodeId,
+        interners: &mut Interners,
+        prim_store: &mut PrimitiveStore,
+        clip_tree_builder: &mut ClipTreeBuilder,
+    ) -> PrimitiveInstance {
+        let mut flags = PictureFlags::empty();
+        if self.establishes_sub_graph {
+            flags |= PictureFlags::IS_SUB_GRAPH;
+        }
+
+        match self.current {
+            PictureSource::WrappedPicture { instance } => {
+                let pic_index = instance.kind.as_pic();
+                prim_store.pictures[pic_index.0].flags |= flags;
+
+                instance
+            }
+            PictureSource::PrimitiveList { prim_list } => {
+                if self.set_resolve_target {
+                    flags |= PictureFlags::IS_RESOLVE_TARGET;
+                }
+
+                // If no picture was created for this stacking context, create a
+                // pass-through wrapper now. This is only needed in 1-2 edge cases
+                // now, and will be removed as a follow up.
+                let pic_index = PictureIndex(prim_store.pictures
+                    .alloc()
+                    .init(PicturePrimitive::new_image(
+                        None,
+                        Picture3DContext::Out,
+                        self.flags,
+                        prim_list,
+                        self.spatial_node_index,
+                        self.raster_space,
+                        flags,
+                    ))
+                );
+
+                create_prim_instance(
+                    pic_index,
+                    None.into(),
+                    self.raster_space,
+                    clip_node_id,
+                    interners,
+                    clip_tree_builder,
+                )
+            }
+        }
+    }
+
+    /// Returns true if this builder wraps a picture
+    #[allow(dead_code)]
+    fn has_picture(&self) -> bool {
+        match self.current {
+            PictureSource::WrappedPicture { .. } => true,
+            PictureSource::PrimitiveList { .. } => false,
+        }
+    }
+}
+
+bitflags! {
+    /// Slice flags
+    pub struct SliceFlags : u8 {
+        /// Slice created by a prim that has PrimitiveFlags::IS_SCROLLBAR_CONTAINER
+        const IS_SCROLLBAR = 1;
+        /// Represents an atomic container (can't split out compositor surfaces in this slice)
+        const IS_ATOMIC = 2;
+    }
+}
+
+/// A structure that converts a serialized display list into a form that WebRender
+/// can use to later build a frame. This structure produces a BuiltScene. Public
+/// members are typically those that are destructured into the BuiltScene.
+pub struct SceneBuilder<'a> {
+    /// The scene that we are currently building.
+    scene: &'a Scene,
+
+    /// The map of all font instances.
+    fonts: SharedFontResources,
+
+    /// The data structure that converts between ClipId/SpatialId and the various
+    /// index types that the SpatialTree uses.
+    id_to_index_mapper_stack: Vec<NodeIdToIndexMapper>,
+
+    /// A stack of stacking context properties.
+    sc_stack: Vec<FlattenedStackingContext>,
+
+    /// Stack of spatial node indices forming containing block for 3d contexts
+    containing_block_stack: Vec<SpatialNodeIndex>,
+
+    /// Stack of requested raster spaces for stacking contexts
+    raster_space_stack: Vec<RasterSpace>,
+
+    /// Maintains state for any currently active shadows
+    pending_shadow_items: VecDeque<ShadowItem>,
+
+    /// The SpatialTree that we are currently building during building.
+    pub spatial_tree: &'a mut SceneSpatialTree,
+
+    /// The store of primitives.
+    pub prim_store: PrimitiveStore,
+
+    /// Information about all primitives involved in hit testing.
+    pub hit_testing_scene: HitTestingScene,
+
+    /// The store which holds all complex clipping information.
+    pub clip_store: ClipStore,
+
+    /// The configuration to use for the FrameBuilder. We consult this in
+    /// order to determine the default font.
+    pub config: FrameBuilderConfig,
+
+    /// Reference to the set of data that is interned across display lists.
+    interners: &'a mut Interners,
+
+    /// Helper struct to map stacking context coords <-> reference frame coords.
+    rf_mapper: ReferenceFrameMapper,
+
+    /// Helper struct to map spatial nodes to external scroll offsets.
+    external_scroll_mapper: ScrollOffsetMapper,
+
+    /// The current recursion depth of iframes encountered. Used to restrict picture
+    /// caching slices to only the top-level content frame.
+    iframe_size: Vec<LayoutSize>,
+
+    /// Clip-chain for root iframes applied to any tile caches created within this iframe
+    root_iframe_clip: Option<ClipId>,
+
+    /// The current quality / performance settings for this scene.
+    quality_settings: QualitySettings,
+
+    /// Maintains state about the list of tile caches being built for this scene.
+    tile_cache_builder: TileCacheBuilder,
+
+    /// A helper struct to snap local rects in device space. During frame
+    /// building we may establish new raster roots, however typically that is in
+    /// cases where we won't be applying snapping (e.g. has perspective), or in
+    /// edge cases (e.g. SVG filter) where we can accept slightly incorrect
+    /// behaviour in favour of getting the common case right.
+    snap_to_device: SpaceSnapper,
+
+    /// A DAG that represents dependencies between picture primitives. This builds
+    /// a set of passes to run various picture processing passes in during frame
+    /// building, in a way that pictures are processed before (or after) their
+    /// dependencies, without relying on recursion for those passes.
+    picture_graph: PictureGraph,
+
+    /// Keep track of allocated plane splitters for this scene. A plane
+    /// splitter is allocated whenever we encounter a new 3d rendering context.
+    /// They are stored outside the picture since it makes it easier for them
+    /// to be referenced by both the owning 3d rendering context and the child
+    /// pictures that contribute to the splitter.
+    /// During scene building "allocating" a splitter is just incrementing an index.
+    /// Splitter objects themselves are allocated and recycled in the frame builder.
+    next_plane_splitter_index: usize,
+
+    /// A list of all primitive instances in the scene. We store them as a single
+    /// array so that multiple different systems (e.g. tile-cache, visibility, property
+    /// animation bindings) can store index buffers to prim instances.
+    prim_instances: Vec<PrimitiveInstance>,
+
+    /// A map of pipeline ids encountered during scene build - used to create unique
+    /// pipeline instance ids as they are encountered.
+    pipeline_instance_ids: FastHashMap<PipelineId, u32>,
+
+    /// A list of surfaces (backing textures) that are relevant for this scene.
+    /// Every picture is assigned to a surface (either a new surface if the picture
+    /// has a composite mode, or the parent surface if it's a pass-through).
+    surfaces: Vec<SurfaceInfo>,
+
+    /// Used to build a ClipTree from the clip-chains, clips and state during scene building.
+    clip_tree_builder: ClipTreeBuilder,
+}
+
+impl<'a> SceneBuilder<'a> {
+    pub fn build(
+        scene: &Scene,
+        fonts: SharedFontResources,
+        view: &SceneView,
+        frame_builder_config: &FrameBuilderConfig,
+        interners: &mut Interners,
+        spatial_tree: &mut SceneSpatialTree,
+        stats: &SceneStats,
+    ) -> BuiltScene {
+        profile_scope!("build_scene");
+
+        // We checked that the root pipeline is available on the render backend.
+        let root_pipeline_id = scene.root_pipeline_id.unwrap();
+        let root_pipeline = scene.pipelines.get(&root_pipeline_id).unwrap();
+
+        let background_color = root_pipeline
+            .background_color
+            .and_then(|color| if color.a > 0.0 { Some(color) } else { None });
+
+        let root_reference_frame_index = spatial_tree.root_reference_frame_index();
+
+        // During scene building, we assume a 1:1 picture -> raster pixel scale
+        let snap_to_device = SpaceSnapper::new(
+            root_reference_frame_index,
+            RasterPixelScale::new(1.0),
+        );
+
+        let mut builder = SceneBuilder {
+            scene,
+            spatial_tree,
+            fonts,
+            config: *frame_builder_config,
+            id_to_index_mapper_stack: Vec::new(),
+            hit_testing_scene: HitTestingScene::new(&stats.hit_test_stats),
+            pending_shadow_items: VecDeque::new(),
+            sc_stack: Vec::new(),
+            containing_block_stack: Vec::new(),
+            raster_space_stack: vec![RasterSpace::Screen],
+            prim_store: PrimitiveStore::new(&stats.prim_store_stats),
+            clip_store: ClipStore::new(),
+            interners,
+            rf_mapper: ReferenceFrameMapper::new(),
+            external_scroll_mapper: ScrollOffsetMapper::new(),
+            iframe_size: Vec::new(),
+            root_iframe_clip: None,
+            quality_settings: view.quality_settings,
+            tile_cache_builder: TileCacheBuilder::new(
+                root_reference_frame_index,
+                frame_builder_config.background_color,
+            ),
+            snap_to_device,
+            picture_graph: PictureGraph::new(),
+            next_plane_splitter_index: 0,
+            prim_instances: Vec::new(),
+            pipeline_instance_ids: FastHashMap::default(),
+            surfaces: Vec::new(),
+            clip_tree_builder: ClipTreeBuilder::new(),
+        };
+
+        builder.build_all(&root_pipeline);
+
+        // Construct the picture cache primitive instance(s) from the tile cache builder
+        let (tile_cache_config, tile_cache_pictures) = builder.tile_cache_builder.build(
+            &builder.config,
+            &mut builder.prim_store,
+            &builder.spatial_tree,
+            &builder.prim_instances,
+            &mut builder.clip_tree_builder,
+        );
+
+        // Add all the tile cache pictures as roots of the picture graph
+        for pic_index in &tile_cache_pictures {
+            builder.picture_graph.add_root(*pic_index);
+            SceneBuilder::finalize_picture(
+                *pic_index,
+                &mut builder.prim_store.pictures,
+                None,
+            );
+        }
+
+        let clip_tree = builder.clip_tree_builder.finalize();
+
+        BuiltScene {
+            has_root_pipeline: scene.has_root_pipeline(),
+            pipeline_epochs: scene.pipeline_epochs.clone(),
+            output_rect: view.device_rect.size().into(),
+            background_color,
+            hit_testing_scene: Arc::new(builder.hit_testing_scene),
+            prim_store: builder.prim_store,
+            clip_store: builder.clip_store,
+            config: builder.config,
+            tile_cache_config,
+            tile_cache_pictures,
+            picture_graph: builder.picture_graph,
+            num_plane_splitters: builder.next_plane_splitter_index,
+            prim_instances: builder.prim_instances,
+            surfaces: builder.surfaces,
+            clip_tree,
+        }
+    }
+
+    /// Traverse the picture prim list and update any late-set spatial nodes
+    // TODO(gw): This is somewhat hacky - it's unfortunate we need to do this, but it's
+    //           because we can't determine the scroll root until we have checked all the
+    //           primitives in the slice. Perhaps we could simplify this by doing some
+    //           work earlier in the DL builder, so we know what scroll root will be picked?
+    fn finalize_picture(
+        pic_index: PictureIndex,
+        pictures: &mut [PicturePrimitive],
+        parent_spatial_node_index: Option<SpatialNodeIndex>,
+    ) {
+        // Extract the prim_list (borrow check) and select the spatial node to
+        // assign to unknown clusters
+        let (mut prim_list, spatial_node_index) = {
+            let pic = &mut pictures[pic_index.0];
+            assert_ne!(pic.spatial_node_index, SpatialNodeIndex::UNKNOWN);
+
+            if pic.flags.contains(PictureFlags::IS_RESOLVE_TARGET) {
+                pic.flags |= PictureFlags::DISABLE_SNAPPING;
+            }
+
+            // If we're a surface, use that spatial node, otherwise the parent
+            let spatial_node_index = match pic.composite_mode {
+                Some(_) => pic.spatial_node_index,
+                None => parent_spatial_node_index.expect("bug: no parent"),
+            };
+
+            (
+                mem::replace(&mut pic.prim_list, PrimitiveList::empty()),
+                spatial_node_index,
+            )
+        };
+
+        // Update the spatial node of any unknown clusters
+        for cluster in &mut prim_list.clusters {
+            if cluster.spatial_node_index == SpatialNodeIndex::UNKNOWN {
+                cluster.spatial_node_index = spatial_node_index;
+            }
+        }
+
+        // Update the spatial node of any child pictures
+        for child_pic_index in &prim_list.child_pictures {
+            let child_pic = &mut pictures[child_pic_index.0];
+
+            if child_pic.spatial_node_index == SpatialNodeIndex::UNKNOWN {
+                child_pic.spatial_node_index = spatial_node_index;
+            }
+
+            // Recurse into child pictures which may also have unknown spatial nodes
+            SceneBuilder::finalize_picture(
+                *child_pic_index,
+                pictures,
+                Some(spatial_node_index),
+            );
+
+            if pictures[child_pic_index.0].flags.contains(PictureFlags::DISABLE_SNAPPING) {
+                pictures[pic_index.0].flags |= PictureFlags::DISABLE_SNAPPING;
+            }
+        }
+
+        // Restore the prim_list
+        pictures[pic_index.0].prim_list = prim_list;
+    }
+
+    /// Retrieve the current offset to allow converting a stacking context
+    /// relative coordinate to be relative to the owing reference frame,
+    /// also considering any external scroll offset on the provided
+    /// spatial node.
+    fn current_offset(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+    ) -> LayoutVector2D {
+        // Get the current offset from stacking context <-> reference frame space.
+        let rf_offset = self.rf_mapper.current_offset();
+
+        // Get the external scroll offset, if applicable.
+        let scroll_offset = self
+            .external_scroll_mapper
+            .external_scroll_offset(
+                spatial_node_index,
+                self.spatial_tree,
+            );
+
+        rf_offset + scroll_offset
+    }
+
+    fn build_spatial_tree_for_display_list(
+        &mut self,
+        dl: &BuiltDisplayList,
+        pipeline_id: PipelineId,
+        instance_id: PipelineInstanceId,
+    ) {
+        dl.iter_spatial_tree(|item| {
+            match item {
+                SpatialTreeItem::ScrollFrame(descriptor) => {
+                    let parent_space = self.get_space(descriptor.parent_space);
+                    self.build_scroll_frame(
+                        descriptor,
+                        parent_space,
+                        pipeline_id,
+                        instance_id,
+                    );
+                }
+                SpatialTreeItem::ReferenceFrame(descriptor) => {
+                    let parent_space = self.get_space(descriptor.parent_spatial_id);
+                    self.build_reference_frame(
+                        descriptor,
+                        parent_space,
+                        pipeline_id,
+                        instance_id,
+                    );
+                }
+                SpatialTreeItem::StickyFrame(descriptor) => {
+                    let parent_space = self.get_space(descriptor.parent_spatial_id);
+                    self.build_sticky_frame(
+                        descriptor,
+                        parent_space,
+                        instance_id,
+                    );
+                }
+                SpatialTreeItem::Invalid => {
+                    unreachable!();
+                }
+            }
+        });
+    }
+
+    fn build_all(&mut self, root_pipeline: &ScenePipeline) {
+        enum ContextKind<'a> {
+            Root,
+            StackingContext {
+                sc_info: StackingContextInfo,
+            },
+            ReferenceFrame,
+            Iframe {
+                parent_traversal: BuiltDisplayListIter<'a>,
+            }
+        }
+        struct BuildContext<'a> {
+            pipeline_id: PipelineId,
+            kind: ContextKind<'a>,
+        }
+
+        self.id_to_index_mapper_stack.push(NodeIdToIndexMapper::default());
+
+        let instance_id = self.get_next_instance_id_for_pipeline(root_pipeline.pipeline_id);
+
+        self.push_root(
+            root_pipeline.pipeline_id,
+            &root_pipeline.viewport_size,
+            instance_id,
+        );
+        self.build_spatial_tree_for_display_list(
+            &root_pipeline.display_list.display_list,
+            root_pipeline.pipeline_id,
+            instance_id,
+        );
+
+        let mut stack = vec![BuildContext {
+            pipeline_id: root_pipeline.pipeline_id,
+            kind: ContextKind::Root,
+        }];
+        let mut traversal = root_pipeline.display_list.iter();
+
+        'outer: while let Some(bc) = stack.pop() {
+            loop {
+                let item = match traversal.next() {
+                    Some(item) => item,
+                    None => break,
+                };
+
+                match item.item() {
+                    DisplayItem::PushStackingContext(ref info) => {
+                        profile_scope!("build_stacking_context");
+                        let spatial_node_index = self.get_space(info.spatial_id);
+                        let mut subtraversal = item.sub_iter();
+                        // Avoid doing unnecessary work for empty stacking contexts.
+                        if subtraversal.current_stacking_context_empty() {
+                            subtraversal.skip_current_stacking_context();
+                            traversal = subtraversal;
+                            continue;
+                        }
+
+                        let composition_operations = CompositeOps::new(
+                            filter_ops_for_compositing(item.filters()),
+                            filter_datas_for_compositing(item.filter_datas()),
+                            filter_primitives_for_compositing(item.filter_primitives()),
+                            info.stacking_context.mix_blend_mode_for_compositing(),
+                        );
+
+                        let sc_info = self.push_stacking_context(
+                            composition_operations,
+                            info.stacking_context.transform_style,
+                            info.prim_flags,
+                            spatial_node_index,
+                            info.stacking_context.clip_chain_id,
+                            info.stacking_context.raster_space,
+                            info.stacking_context.flags,
+                        );
+
+                        self.rf_mapper.push_offset(info.origin.to_vector());
+                        let new_context = BuildContext {
+                            pipeline_id: bc.pipeline_id,
+                            kind: ContextKind::StackingContext {
+                                sc_info,
+                            },
+                        };
+                        stack.push(bc);
+                        stack.push(new_context);
+
+                        subtraversal.merge_debug_stats_from(&mut traversal);
+                        traversal = subtraversal;
+                        continue 'outer;
+                    }
+                    DisplayItem::PushReferenceFrame(..) => {
+                        profile_scope!("build_reference_frame");
+                        let mut subtraversal = item.sub_iter();
+
+                        self.rf_mapper.push_scope();
+                        let new_context = BuildContext {
+                            pipeline_id: bc.pipeline_id,
+                            kind: ContextKind::ReferenceFrame,
+                        };
+                        stack.push(bc);
+                        stack.push(new_context);
+
+                        subtraversal.merge_debug_stats_from(&mut traversal);
+                        traversal = subtraversal;
+                        continue 'outer;
+                    }
+                    DisplayItem::PopReferenceFrame |
+                    DisplayItem::PopStackingContext => break,
+                    DisplayItem::Iframe(ref info) => {
+                        profile_scope!("iframe");
+
+                        let space = self.get_space(info.space_and_clip.spatial_id);
+                        let subtraversal = match self.push_iframe(info, space) {
+                            Some(pair) => pair,
+                            None => continue,
+                        };
+
+                        let new_context = BuildContext {
+                            pipeline_id: info.pipeline_id,
+                            kind: ContextKind::Iframe {
+                                parent_traversal: mem::replace(&mut traversal, subtraversal),
+                            },
+                        };
+                        stack.push(bc);
+                        stack.push(new_context);
+                        continue 'outer;
+                    }
+                    _ => {
+                        self.build_item(item);
+                    }
+                };
+            }
+
+            match bc.kind {
+                ContextKind::Root => {}
+                ContextKind::StackingContext { sc_info } => {
+                    self.rf_mapper.pop_offset();
+                    self.pop_stacking_context(sc_info);
+                }
+                ContextKind::ReferenceFrame => {
+                    self.rf_mapper.pop_scope();
+                }
+                ContextKind::Iframe { parent_traversal } => {
+                    self.iframe_size.pop();
+                    self.rf_mapper.pop_scope();
+                    self.clip_tree_builder.pop_clip();
+                    self.clip_tree_builder.pop_clip();
+
+                    if self.iframe_size.is_empty() {
+                        assert!(self.root_iframe_clip.is_some());
+                        self.root_iframe_clip = None;
+                        self.add_tile_cache_barrier_if_needed(SliceFlags::empty());
+                    }
+
+                    self.id_to_index_mapper_stack.pop().unwrap();
+
+                    traversal = parent_traversal;
+                }
+            }
+
+            // TODO: factor this out to be part of capture
+            if cfg!(feature = "display_list_stats") {
+                let stats = traversal.debug_stats();
+                let total_bytes: usize = stats.iter().map(|(_, stats)| stats.num_bytes).sum();
+                debug!("item, total count, total bytes, % of DL bytes, bytes per item");
+                for (label, stats) in stats {
+                    debug!("{}, {}, {}kb, {}%, {}",
+                        label,
+                        stats.total_count,
+                        stats.num_bytes / 1000,
+                        ((stats.num_bytes as f32 / total_bytes.max(1) as f32) * 100.0) as usize,
+                        stats.num_bytes / stats.total_count.max(1));
+                }
+                debug!("");
+            }
+        }
+
+        debug_assert!(self.sc_stack.is_empty());
+
+        self.id_to_index_mapper_stack.pop().unwrap();
+        assert!(self.id_to_index_mapper_stack.is_empty());
+    }
+
+    fn build_sticky_frame(
+        &mut self,
+        info: &StickyFrameDescriptor,
+        parent_node_index: SpatialNodeIndex,
+        instance_id: PipelineInstanceId,
+    ) {
+        let sticky_frame_info = StickyFrameInfo::new(
+            info.bounds,
+            info.margins,
+            info.vertical_offset_bounds,
+            info.horizontal_offset_bounds,
+            info.previously_applied_offset,
+        );
+
+        let index = self.spatial_tree.add_sticky_frame(
+            parent_node_index,
+            sticky_frame_info,
+            info.id.pipeline_id(),
+            info.key,
+            instance_id,
+        );
+        self.id_to_index_mapper_stack.last_mut().unwrap().add_spatial_node(info.id, index);
+    }
+
+    fn build_reference_frame(
+        &mut self,
+        info: &ReferenceFrameDescriptor,
+        parent_space: SpatialNodeIndex,
+        pipeline_id: PipelineId,
+        instance_id: PipelineInstanceId,
+    ) {
+        let transform = match info.reference_frame.transform {
+            ReferenceTransformBinding::Static { binding } => binding,
+            ReferenceTransformBinding::Computed { scale_from, vertical_flip, rotation } => {
+                let content_size = &self.iframe_size.last().unwrap();
+
+                let mut transform = if let Some(scale_from) = scale_from {
+                    // If we have a 90/270 degree rotation, then scale_from
+                    // and content_size are in different coordinate spaces and
+                    // we need to swap width/height for them to be correct.
+                    match rotation {
+                        Rotation::Degree0 |
+                        Rotation::Degree180 => {
+                            LayoutTransform::scale(
+                                content_size.width / scale_from.width,
+                                content_size.height / scale_from.height,
+                                1.0
+                            )
+                        },
+                        Rotation::Degree90 |
+                        Rotation::Degree270 => {
+                            LayoutTransform::scale(
+                                content_size.height / scale_from.width,
+                                content_size.width / scale_from.height,
+                                1.0
+                            )
+
+                        }
+                    }
+                } else {
+                    LayoutTransform::identity()
+                };
+
+                if vertical_flip {
+                    let content_size = &self.iframe_size.last().unwrap();
+                    let content_height = match rotation {
+                        Rotation::Degree0 | Rotation::Degree180 => content_size.height,
+                        Rotation::Degree90 | Rotation::Degree270 => content_size.width,
+                    };
+                    transform = transform
+                        .then_translate(LayoutVector3D::new(0.0, content_height, 0.0))
+                        .pre_scale(1.0, -1.0, 1.0);
+                }
+
+                let rotate = rotation.to_matrix(**content_size);
+                let transform = transform.then(&rotate);
+
+                PropertyBinding::Value(transform)
+            },
+        };
+
+        self.push_reference_frame(
+            info.reference_frame.id,
+            parent_space,
+            pipeline_id,
+            info.reference_frame.transform_style,
+            transform,
+            info.reference_frame.kind,
+            info.origin.to_vector(),
+            SpatialNodeUid::external(info.reference_frame.key, pipeline_id, instance_id),
+        );
+    }
+
+    fn build_scroll_frame(
+        &mut self,
+        info: &ScrollFrameDescriptor,
+        parent_node_index: SpatialNodeIndex,
+        pipeline_id: PipelineId,
+        instance_id: PipelineInstanceId,
+    ) {
+        // This is useful when calculating scroll extents for the
+        // SpatialNode::scroll(..) API as well as for properly setting sticky
+        // positioning offsets.
+        let content_size = info.content_rect.size();
+
+        self.add_scroll_frame(
+            info.scroll_frame_id,
+            parent_node_index,
+            info.external_id,
+            pipeline_id,
+            &info.frame_rect,
+            &content_size,
+            ScrollFrameKind::Explicit,
+            info.external_scroll_offset,
+            info.scroll_offset_generation,
+            info.has_scroll_linked_effect,
+            SpatialNodeUid::external(info.key, pipeline_id, instance_id),
+        );
+    }
+
+    /// Advance and return the next instance id for a given pipeline id
+    fn get_next_instance_id_for_pipeline(
+        &mut self,
+        pipeline_id: PipelineId,
+    ) -> PipelineInstanceId {
+        let next_instance = self.pipeline_instance_ids
+            .entry(pipeline_id)
+            .or_insert(0);
+
+        let instance_id = PipelineInstanceId::new(*next_instance);
+        *next_instance += 1;
+
+        instance_id
+    }
+
+    fn push_iframe(
+        &mut self,
+        info: &IframeDisplayItem,
+        spatial_node_index: SpatialNodeIndex,
+    ) -> Option<BuiltDisplayListIter<'a>> {
+        let iframe_pipeline_id = info.pipeline_id;
+        let pipeline = match self.scene.pipelines.get(&iframe_pipeline_id) {
+            Some(pipeline) => pipeline,
+            None => {
+                debug_assert!(info.ignore_missing_pipeline);
+                return None
+            },
+        };
+
+        self.clip_tree_builder.push_clip_chain(Some(info.space_and_clip.clip_chain_id), false);
+
+        // TODO(gw): This is the only remaining call site that relies on ClipId parenting, remove me!
+        self.add_rect_clip_node(
+            ClipId::root(iframe_pipeline_id),
+            info.space_and_clip.spatial_id,
+            &info.clip_rect,
+        );
+
+        self.clip_tree_builder.push_clip_id(ClipId::root(iframe_pipeline_id));
+
+        let instance_id = self.get_next_instance_id_for_pipeline(iframe_pipeline_id);
+
+        self.id_to_index_mapper_stack.push(NodeIdToIndexMapper::default());
+
+        let bounds = self.snap_rect(
+            &info.bounds,
+            spatial_node_index,
+        );
+
+        let spatial_node_index = self.push_reference_frame(
+            SpatialId::root_reference_frame(iframe_pipeline_id),
+            spatial_node_index,
+            iframe_pipeline_id,
+            TransformStyle::Flat,
+            PropertyBinding::Value(LayoutTransform::identity()),
+            ReferenceFrameKind::Transform {
+                is_2d_scale_translation: true,
+                should_snap: true,
+                paired_with_perspective: false,
+            },
+            bounds.min.to_vector(),
+            SpatialNodeUid::root_reference_frame(iframe_pipeline_id, instance_id),
+        );
+
+        let iframe_rect = LayoutRect::from_size(bounds.size());
+        let is_root_pipeline = self.iframe_size.is_empty();
+
+        self.add_scroll_frame(
+            SpatialId::root_scroll_node(iframe_pipeline_id),
+            spatial_node_index,
+            ExternalScrollId(0, iframe_pipeline_id),
+            iframe_pipeline_id,
+            &iframe_rect,
+            &bounds.size(),
+            ScrollFrameKind::PipelineRoot {
+                is_root_pipeline,
+            },
+            LayoutVector2D::zero(),
+            APZScrollGeneration::default(),
+            HasScrollLinkedEffect::No,
+            SpatialNodeUid::root_scroll_frame(iframe_pipeline_id, instance_id),
+        );
+
+        // If this is a root iframe, force a new tile cache both before and after
+        // adding primitives for this iframe.
+        if self.iframe_size.is_empty() {
+            assert!(self.root_iframe_clip.is_none());
+            self.root_iframe_clip = Some(ClipId::root(iframe_pipeline_id));
+            self.add_tile_cache_barrier_if_needed(SliceFlags::empty());
+        }
+        self.iframe_size.push(info.bounds.size());
+        self.rf_mapper.push_scope();
+
+        self.build_spatial_tree_for_display_list(
+            &pipeline.display_list.display_list,
+            iframe_pipeline_id,
+            instance_id,
+        );
+
+        Some(pipeline.display_list.iter())
+    }
+
+    fn get_space(
+        &self,
+        spatial_id: SpatialId,
+    ) -> SpatialNodeIndex {
+        self.id_to_index_mapper_stack.last().unwrap().get_spatial_node_index(spatial_id)
+    }
+
+    fn get_clip_node(
+        &mut self,
+        clip_chain_id: api::ClipChainId,
+    ) -> ClipNodeId {
+        self.clip_tree_builder.build_clip_set(
+            clip_chain_id,
+        )
+    }
+
+    fn process_common_properties(
+        &mut self,
+        common: &CommonItemProperties,
+        bounds: Option<&LayoutRect>,
+    ) -> (LayoutPrimitiveInfo, LayoutRect, SpatialNodeIndex, ClipNodeId) {
+        let spatial_node_index = self.get_space(common.spatial_id);
+        let current_offset = self.current_offset(spatial_node_index);
+
+        let unsnapped_clip_rect = common.clip_rect.translate(current_offset);
+        let unsnapped_rect = bounds.map(|bounds| {
+            bounds.translate(current_offset)
+        });
+
+        // If no bounds rect is given, default to clip rect.
+        let (rect, clip_rect) = if common.flags.contains(PrimitiveFlags::ANTIALISED) {
+            (unsnapped_rect.unwrap_or(unsnapped_clip_rect), unsnapped_clip_rect)
+        } else {
+            let clip_rect = self.snap_rect(
+                &unsnapped_clip_rect,
+                spatial_node_index,
+            );
+
+            let rect = unsnapped_rect.map_or(clip_rect, |bounds| {
+                self.snap_rect(
+                    &bounds,
+                    spatial_node_index,
+                )
+            });
+
+            (rect, clip_rect)
+        };
+
+        let clip_node_id = self.get_clip_node(
+            common.clip_chain_id,
+        );
+
+        let layout = LayoutPrimitiveInfo {
+            rect,
+            clip_rect,
+            flags: common.flags,
+        };
+
+        (layout, unsnapped_rect.unwrap_or(unsnapped_clip_rect), spatial_node_index, clip_node_id)
+    }
+
+    fn process_common_properties_with_bounds(
+        &mut self,
+        common: &CommonItemProperties,
+        bounds: &LayoutRect,
+    ) -> (LayoutPrimitiveInfo, LayoutRect, SpatialNodeIndex, ClipNodeId) {
+        self.process_common_properties(
+            common,
+            Some(bounds),
+        )
+    }
+
+    pub fn snap_rect(
+        &mut self,
+        rect: &LayoutRect,
+        target_spatial_node: SpatialNodeIndex,
+    ) -> LayoutRect {
+        self.snap_to_device.set_target_spatial_node(
+            target_spatial_node,
+            self.spatial_tree,
+        );
+        self.snap_to_device.snap_rect(&rect)
+    }
+
+    fn build_item<'b>(
+        &'b mut self,
+        item: DisplayItemRef,
+    ) {
+        match *item.item() {
+            DisplayItem::Image(ref info) => {
+                profile_scope!("image");
+
+                let (layout, _, spatial_node_index, clip_node_id) = self.process_common_properties_with_bounds(
+                    &info.common,
+                    &info.bounds,
+                );
+
+                self.add_image(
+                    spatial_node_index,
+                    clip_node_id,
+                    &layout,
+                    layout.rect.size(),
+                    LayoutSize::zero(),
+                    info.image_key,
+                    info.image_rendering,
+                    info.alpha_type,
+                    info.color,
+                );
+            }
+            DisplayItem::RepeatingImage(ref info) => {
+                profile_scope!("repeating_image");
+
+                let (layout, unsnapped_rect, spatial_node_index, clip_node_id) = self.process_common_properties_with_bounds(
+                    &info.common,
+                    &info.bounds,
+                );
+
+                let stretch_size = process_repeat_size(
+                    &layout.rect,
+                    &unsnapped_rect,
+                    info.stretch_size,
+                );
+
+                self.add_image(
+                    spatial_node_index,
+                    clip_node_id,
+                    &layout,
+                    stretch_size,
+                    info.tile_spacing,
+                    info.image_key,
+                    info.image_rendering,
+                    info.alpha_type,
+                    info.color,
+                );
+            }
+            DisplayItem::YuvImage(ref info) => {
+                profile_scope!("yuv_image");
+
+                let (layout, _, spatial_node_index, clip_node_id) = self.process_common_properties_with_bounds(
+                    &info.common,
+                    &info.bounds,
+                );
+
+                self.add_yuv_image(
+                    spatial_node_index,
+                    clip_node_id,
+                    &layout,
+                    info.yuv_data,
+                    info.color_depth,
+                    info.color_space,
+                    info.color_range,
+                    info.image_rendering,
+                );
+            }
+            DisplayItem::Text(ref info) => {
+                profile_scope!("text");
+
+                // TODO(aosmond): Snapping text primitives does not make much sense, given the
+                // primitive bounds and clip are supposed to be conservative, not definitive.
+                // E.g. they should be able to grow and not impact the output. However there
+                // are subtle interactions between the primitive origin and the glyph offset
+                // which appear to be significant (presumably due to some sort of accumulated
+                // error throughout the layers). We should fix this at some point.
+                let (layout, _, spatial_node_index, clip_node_id) = self.process_common_properties_with_bounds(
+                    &info.common,
+                    &info.bounds,
+                );
+
+                self.add_text(
+                    spatial_node_index,
+                    clip_node_id,
+                    &layout,
+                    &info.font_key,
+                    &info.color,
+                    item.glyphs(),
+                    info.glyph_options,
+                );
+            }
+            DisplayItem::Rectangle(ref info) => {
+                profile_scope!("rect");
+
+                let (layout, _, spatial_node_index, clip_node_id) = self.process_common_properties_with_bounds(
+                    &info.common,
+                    &info.bounds,
+                );
+
+                self.add_primitive(
+                    spatial_node_index,
+                    clip_node_id,
+                    &layout,
+                    Vec::new(),
+                    PrimitiveKeyKind::Rectangle {
+                        color: info.color.into(),
+                    },
+                );
+
+                if info.common.flags.contains(PrimitiveFlags::CHECKERBOARD_BACKGROUND) {
+                    self.add_tile_cache_barrier_if_needed(SliceFlags::empty());
+                }
+            }
+            DisplayItem::HitTest(ref info) => {
+                profile_scope!("hit_test");
+
+                let spatial_node_index = self.get_space(info.spatial_id);
+                let current_offset = self.current_offset(spatial_node_index);
+                let unsnapped_rect = info.rect.translate(current_offset);
+
+                let rect = self.snap_rect(
+                    &unsnapped_rect,
+                    spatial_node_index,
+                );
+
+                let layout = LayoutPrimitiveInfo {
+                    rect,
+                    clip_rect: rect,
+                    flags: info.flags,
+                };
+
+                let spatial_node = self.spatial_tree.get_node_info(spatial_node_index);
+                let anim_id: u64 =  match spatial_node.node_type {
+                    SpatialNodeType::ReferenceFrame(ReferenceFrameInfo {
+                        source_transform: PropertyBinding::Binding(key, _),
+                        ..
+                    }) => key.clone().into(),
+                    _ => 0,
+                };
+
+                let clip_node_id = self.get_clip_node(info.clip_chain_id);
+
+                self.add_primitive_to_hit_testing_list(
+                    &layout,
+                    spatial_node_index,
+                    clip_node_id,
+                    info.tag,
+                    anim_id,
+                );
+            }
+            DisplayItem::ClearRectangle(ref info) => {
+                profile_scope!("clear");
+
+                let (layout, _, spatial_node_index, clip_node_id) = self.process_common_properties_with_bounds(
+                    &info.common,
+                    &info.bounds,
+                );
+
+                self.add_clear_rectangle(
+                    spatial_node_index,
+                    clip_node_id,
+                    &layout,
+                );
+            }
+            DisplayItem::Line(ref info) => {
+                profile_scope!("line");
+
+                let (layout, _, spatial_node_index, clip_node_id) = self.process_common_properties_with_bounds(
+                    &info.common,
+                    &info.area,
+                );
+
+                self.add_line(
+                    spatial_node_index,
+                    clip_node_id,
+                    &layout,
+                    info.wavy_line_thickness,
+                    info.orientation,
+                    info.color,
+                    info.style,
+                );
+            }
+            DisplayItem::Gradient(ref info) => {
+                profile_scope!("gradient");
+
+                if !info.gradient.is_valid() {
+                    return;
+                }
+
+                let (mut layout, unsnapped_rect, spatial_node_index, clip_node_id) = self.process_common_properties_with_bounds(
+                    &info.common,
+                    &info.bounds,
+                );
+
+                let mut tile_size = process_repeat_size(
+                    &layout.rect,
+                    &unsnapped_rect,
+                    info.tile_size,
+                );
+
+                let mut stops = read_gradient_stops(item.gradient_stops());
+                let mut start = info.gradient.start_point;
+                let mut end = info.gradient.end_point;
+                let flags = layout.flags;
+
+                let optimized = optimize_linear_gradient(
+                    &mut layout.rect,
+                    &mut tile_size,
+                    info.tile_spacing,
+                    &layout.clip_rect,
+                    &mut start,
+                    &mut end,
+                    info.gradient.extend_mode,
+                    &mut stops,
+                    &mut |rect, start, end, stops, edge_aa_mask| {
+                        let layout = LayoutPrimitiveInfo { rect: *rect, clip_rect: *rect, flags };
+                        if let Some(prim_key_kind) = self.create_linear_gradient_prim(
+                            &layout,
+                            start,
+                            end,
+                            stops.to_vec(),
+                            ExtendMode::Clamp,
+                            rect.size(),
+                            LayoutSize::zero(),
+                            None,
+                            edge_aa_mask,
+                        ) {
+                            self.add_nonshadowable_primitive(
+                                spatial_node_index,
+                                clip_node_id,
+                                &layout,
+                                Vec::new(),
+                                prim_key_kind,
+                            );
+                        }
+                    }
+                );
+
+                if !optimized && !tile_size.ceil().is_empty() {
+                    if let Some(prim_key_kind) = self.create_linear_gradient_prim(
+                        &layout,
+                        start,
+                        end,
+                        stops,
+                        info.gradient.extend_mode,
+                        tile_size,
+                        info.tile_spacing,
+                        None,
+                        EdgeAaSegmentMask::all(),
+                    ) {
+                        self.add_nonshadowable_primitive(
+                            spatial_node_index,
+                            clip_node_id,
+                            &layout,
+                            Vec::new(),
+                            prim_key_kind,
+                        );
+                    }
+                }
+            }
+            DisplayItem::RadialGradient(ref info) => {
+                profile_scope!("radial");
+
+                if !info.gradient.is_valid() {
+                    return;
+                }
+
+                let (mut layout, unsnapped_rect, spatial_node_index, clip_node_id) = self.process_common_properties_with_bounds(
+                    &info.common,
+                    &info.bounds,
+                );
+
+                let mut center = info.gradient.center;
+
+                let stops = read_gradient_stops(item.gradient_stops());
+
+                let mut tile_size = process_repeat_size(
+                    &layout.rect,
+                    &unsnapped_rect,
+                    info.tile_size,
+                );
+
+                let mut prim_rect = layout.rect;
+                let mut tile_spacing = info.tile_spacing;
+                optimize_radial_gradient(
+                    &mut prim_rect,
+                    &mut tile_size,
+                    &mut center,
+                    &mut tile_spacing,
+                    &layout.clip_rect,
+                    info.gradient.radius,
+                    info.gradient.end_offset,
+                    info.gradient.extend_mode,
+                    &stops,
+                    &mut |solid_rect, color| {
+                        self.add_nonshadowable_primitive(
+                            spatial_node_index,
+                            clip_node_id,
+                            &LayoutPrimitiveInfo {
+                                rect: *solid_rect,
+                                .. layout
+                            },
+                            Vec::new(),
+                            PrimitiveKeyKind::Rectangle { color: PropertyBinding::Value(color) },
+                        );
+                    }
+                );
+
+                // TODO: create_radial_gradient_prim already calls
+                // this, but it leaves the info variable that is
+                // passed to add_nonshadowable_primitive unmodified
+                // which can cause issues.
+                simplify_repeated_primitive(&tile_size, &mut tile_spacing, &mut prim_rect);
+
+                if !tile_size.ceil().is_empty() {
+                    layout.rect = prim_rect;
+                    let prim_key_kind = self.create_radial_gradient_prim(
+                        &layout,
+                        center,
+                        info.gradient.start_offset * info.gradient.radius.width,
+                        info.gradient.end_offset * info.gradient.radius.width,
+                        info.gradient.radius.width / info.gradient.radius.height,
+                        stops,
+                        info.gradient.extend_mode,
+                        tile_size,
+                        tile_spacing,
+                        None,
+                    );
+
+                    self.add_nonshadowable_primitive(
+                        spatial_node_index,
+                        clip_node_id,
+                        &layout,
+                        Vec::new(),
+                        prim_key_kind,
+                    );
+                }
+            }
+            DisplayItem::ConicGradient(ref info) => {
+                profile_scope!("conic");
+
+                if !info.gradient.is_valid() {
+                    return;
+                }
+
+                let (mut layout, unsnapped_rect, spatial_node_index, clip_node_id) = self.process_common_properties_with_bounds(
+                    &info.common,
+                    &info.bounds,
+                );
+
+                let tile_size = process_repeat_size(
+                    &layout.rect,
+                    &unsnapped_rect,
+                    info.tile_size,
+                );
+
+                let offset = apply_gradient_local_clip(
+                    &mut layout.rect,
+                    &tile_size,
+                    &info.tile_spacing,
+                    &layout.clip_rect,
+                );
+                let center = info.gradient.center + offset;
+
+                if !tile_size.ceil().is_empty() {
+                    let prim_key_kind = self.create_conic_gradient_prim(
+                        &layout,
+                        center,
+                        info.gradient.angle,
+                        info.gradient.start_offset,
+                        info.gradient.end_offset,
+                        item.gradient_stops(),
+                        info.gradient.extend_mode,
+                        tile_size,
+                        info.tile_spacing,
+                        None,
+                    );
+
+                    self.add_nonshadowable_primitive(
+                        spatial_node_index,
+                        clip_node_id,
+                        &layout,
+                        Vec::new(),
+                        prim_key_kind,
+                    );
+                }
+            }
+            DisplayItem::BoxShadow(ref info) => {
+                profile_scope!("box_shadow");
+
+                let (layout, _, spatial_node_index, clip_node_id) = self.process_common_properties_with_bounds(
+                    &info.common,
+                    &info.box_bounds,
+                );
+
+                self.add_box_shadow(
+                    spatial_node_index,
+                    clip_node_id,
+                    &layout,
+                    &info.offset,
+                    info.color,
+                    info.blur_radius,
+                    info.spread_radius,
+                    info.border_radius,
+                    info.clip_mode,
+                );
+            }
+            DisplayItem::Border(ref info) => {
+                profile_scope!("border");
+
+                let (layout, _, spatial_node_index, clip_node_id) = self.process_common_properties_with_bounds(
+                    &info.common,
+                    &info.bounds,
+                );
+
+                self.add_border(
+                    spatial_node_index,
+                    clip_node_id,
+                    &layout,
+                    info,
+                    item.gradient_stops(),
+                );
+            }
+            DisplayItem::ImageMaskClip(ref info) => {
+                profile_scope!("image_clip");
+
+                self.add_image_mask_clip_node(
+                    info.id,
+                    info.spatial_id,
+                    &info.image_mask,
+                    info.fill_rule,
+                    item.points(),
+                );
+            }
+            DisplayItem::RoundedRectClip(ref info) => {
+                profile_scope!("rounded_clip");
+
+                self.add_rounded_rect_clip_node(
+                    info.id,
+                    info.spatial_id,
+                    &info.clip,
+                );
+            }
+            DisplayItem::RectClip(ref info) => {
+                profile_scope!("rect_clip");
+
+                self.add_rect_clip_node(
+                    info.id,
+                    info.spatial_id,
+                    &info.clip_rect,
+                );
+            }
+            DisplayItem::ClipChain(ref info) => {
+                profile_scope!("clip_chain");
+
+                self.clip_tree_builder.define_clip_chain(
+                    info.id,
+                    info.parent,
+                    item.clip_chain_items().into_iter(),
+                );
+            },
+            DisplayItem::BackdropFilter(ref info) => {
+                profile_scope!("backdrop");
+
+                let (layout, _, spatial_node_index, clip_node_id) = self.process_common_properties(
+                    &info.common,
+                    None,
+                );
+
+                let filters = filter_ops_for_compositing(item.filters());
+                let filter_datas = filter_datas_for_compositing(item.filter_datas());
+                let filter_primitives = filter_primitives_for_compositing(item.filter_primitives());
+
+                self.add_backdrop_filter(
+                    spatial_node_index,
+                    clip_node_id,
+                    &layout,
+                    filters,
+                    filter_datas,
+                    filter_primitives,
+                );
+            }
+
+            // Do nothing; these are dummy items for the display list parser
+            DisplayItem::SetGradientStops |
+            DisplayItem::SetFilterOps |
+            DisplayItem::SetFilterData |
+            DisplayItem::SetFilterPrimitives |
+            DisplayItem::SetPoints => {}
+
+            // Special items that are handled in the parent method
+            DisplayItem::PushStackingContext(..) |
+            DisplayItem::PushReferenceFrame(..) |
+            DisplayItem::PopReferenceFrame |
+            DisplayItem::PopStackingContext |
+            DisplayItem::Iframe(_) => {
+                unreachable!("Handled in `build_all`")
+            }
+
+            DisplayItem::ReuseItems(key) |
+            DisplayItem::RetainedItems(key) => {
+                unreachable!("Iterator logic error: {:?}", key);
+            }
+
+            DisplayItem::PushShadow(info) => {
+                profile_scope!("push_shadow");
+
+                let spatial_node_index = self.get_space(info.space_and_clip.spatial_id);
+
+                self.push_shadow(
+                    info.shadow,
+                    spatial_node_index,
+                    info.space_and_clip.clip_chain_id,
+                    info.should_inflate,
+                );
+            }
+            DisplayItem::PopAllShadows => {
+                profile_scope!("pop_all_shadows");
+
+                self.pop_all_shadows();
+            }
+        }
+    }
+
+    /// Create a primitive and add it to the prim store. This method doesn't
+    /// add the primitive to the draw list, so can be used for creating
+    /// sub-primitives.
+    ///
+    /// TODO(djg): Can this inline into `add_interned_prim_to_draw_list`
+    fn create_primitive<P>(
+        &mut self,
+        info: &LayoutPrimitiveInfo,
+        spatial_node_index: SpatialNodeIndex,
+        clip_leaf_id: ClipLeafId,
+        prim: P,
+    ) -> PrimitiveInstance
+    where
+        P: InternablePrimitive,
+        Interners: AsMut<Interner<P>>,
+    {
+        // Build a primitive key.
+        let prim_key = prim.into_key(info);
+
+        let current_offset = self.current_offset(spatial_node_index);
+        let interner = self.interners.as_mut();
+        let prim_data_handle = interner
+            .intern(&prim_key, || ());
+
+        let instance_kind = P::make_instance_kind(
+            prim_key,
+            prim_data_handle,
+            &mut self.prim_store,
+            current_offset,
+        );
+
+        PrimitiveInstance::new(
+            instance_kind,
+            clip_leaf_id,
+        )
+    }
+
+    fn add_primitive_to_hit_testing_list(
+        &mut self,
+        info: &LayoutPrimitiveInfo,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+        tag: ItemTag,
+        anim_id: u64,
+    ) {
+        self.hit_testing_scene.add_item(
+            tag,
+            anim_id,
+            info,
+            spatial_node_index,
+            clip_node_id,
+            &self.clip_tree_builder,
+            self.interners,
+        );
+    }
+
+    /// Add an already created primitive to the draw lists.
+    pub fn add_primitive_to_draw_list(
+        &mut self,
+        prim_instance: PrimitiveInstance,
+        prim_rect: LayoutRect,
+        spatial_node_index: SpatialNodeIndex,
+        flags: PrimitiveFlags,
+    ) {
+        // Add primitive to the top-most stacking context on the stack.
+
+        // If we have a valid stacking context, the primitive gets added to that.
+        // Otherwise, it gets added to a top-level picture cache slice.
+
+        match self.sc_stack.last_mut() {
+            Some(stacking_context) => {
+                stacking_context.prim_list.add_prim(
+                    prim_instance,
+                    prim_rect,
+                    spatial_node_index,
+                    flags,
+                    &mut self.prim_instances,
+                    &self.clip_tree_builder,
+                );
+            }
+            None => {
+                self.tile_cache_builder.add_prim(
+                    prim_instance,
+                    prim_rect,
+                    spatial_node_index,
+                    flags,
+                    self.spatial_tree,
+                    self.interners,
+                    &self.quality_settings,
+                    &mut self.prim_instances,
+                    &self.clip_tree_builder,
+                );
+            }
+        }
+    }
+
+    /// Convenience interface that creates a primitive entry and adds it
+    /// to the draw list.
+    fn add_nonshadowable_primitive<P>(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+        info: &LayoutPrimitiveInfo,
+        clip_items: Vec<ClipItemKey>,
+        prim: P,
+    )
+    where
+        P: InternablePrimitive + IsVisible,
+        Interners: AsMut<Interner<P>>,
+    {
+        if prim.is_visible() {
+            let clip_leaf_id = self.clip_tree_builder.build_for_prim(
+                clip_node_id,
+                info,
+                &clip_items,
+                &mut self.interners,
+            );
+
+            self.add_prim_to_draw_list(
+                info,
+                spatial_node_index,
+                clip_leaf_id,
+                prim,
+            );
+        }
+    }
+
+    pub fn add_primitive<P>(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+        info: &LayoutPrimitiveInfo,
+        clip_items: Vec<ClipItemKey>,
+        prim: P,
+    )
+    where
+        P: InternablePrimitive + IsVisible,
+        Interners: AsMut<Interner<P>>,
+        ShadowItem: From<PendingPrimitive<P>>
+    {
+        // If a shadow context is not active, then add the primitive
+        // directly to the parent picture.
+        if self.pending_shadow_items.is_empty() {
+            self.add_nonshadowable_primitive(
+                spatial_node_index,
+                clip_node_id,
+                info,
+                clip_items,
+                prim,
+            );
+        } else {
+            debug_assert!(clip_items.is_empty(), "No per-prim clips expected for shadowed primitives");
+
+            // There is an active shadow context. Store as a pending primitive
+            // for processing during pop_all_shadows.
+            self.pending_shadow_items.push_back(PendingPrimitive {
+                spatial_node_index,
+                clip_node_id,
+                info: *info,
+                prim,
+            }.into());
+        }
+    }
+
+    fn add_prim_to_draw_list<P>(
+        &mut self,
+        info: &LayoutPrimitiveInfo,
+        spatial_node_index: SpatialNodeIndex,
+        clip_leaf_id: ClipLeafId,
+        prim: P,
+    )
+    where
+        P: InternablePrimitive,
+        Interners: AsMut<Interner<P>>,
+    {
+        let prim_instance = self.create_primitive(
+            info,
+            spatial_node_index,
+            clip_leaf_id,
+            prim,
+        );
+        self.add_primitive_to_draw_list(
+            prim_instance,
+            info.rect,
+            spatial_node_index,
+            info.flags,
+        );
+    }
+
+    fn make_current_slice_atomic_if_required(&mut self) {
+        let has_non_wrapping_sc = self.sc_stack
+            .iter()
+            .position(|sc| {
+                !sc.flags.contains(StackingContextFlags::WRAPS_BACKDROP_FILTER)
+            })
+            .is_some();
+
+        if has_non_wrapping_sc {
+            return;
+        }
+
+        // Shadows can only exist within a stacking context
+        assert!(self.pending_shadow_items.is_empty());
+        self.tile_cache_builder.make_current_slice_atomic();
+    }
+
+    /// If no stacking contexts are present (i.e. we are adding prims to a tile
+    /// cache), set a barrier to force creation of a slice before the next prim
+    fn add_tile_cache_barrier_if_needed(
+        &mut self,
+        slice_flags: SliceFlags,
+    ) {
+        if self.sc_stack.is_empty() {
+            // Shadows can only exist within a stacking context
+            assert!(self.pending_shadow_items.is_empty());
+
+            self.tile_cache_builder.add_tile_cache_barrier(
+                slice_flags,
+                self.root_iframe_clip,
+            );
+        }
+    }
+
+    /// Push a new stacking context. Returns context that must be passed to pop_stacking_context().
+    fn push_stacking_context(
+        &mut self,
+        composite_ops: CompositeOps,
+        transform_style: TransformStyle,
+        prim_flags: PrimitiveFlags,
+        spatial_node_index: SpatialNodeIndex,
+        clip_chain_id: Option<api::ClipChainId>,
+        requested_raster_space: RasterSpace,
+        flags: StackingContextFlags,
+    ) -> StackingContextInfo {
+        profile_scope!("push_stacking_context");
+
+        let clip_node_id = match clip_chain_id {
+            Some(id) => {
+                self.clip_tree_builder.build_clip_set(id)
+            }
+            None => {
+                self.clip_tree_builder.build_clip_set(api::ClipChainId::INVALID)
+            }
+        };
+
+        self.clip_tree_builder.push_clip_chain(
+            clip_chain_id,
+            !composite_ops.is_empty(),
+        );
+
+        let new_space = match (self.raster_space_stack.last(), requested_raster_space) {
+            // If no parent space, just use the requested space
+            (None, _) => requested_raster_space,
+            // If screen, use the parent
+            (Some(parent_space), RasterSpace::Screen) => *parent_space,
+            // If currently screen, select the requested
+            (Some(RasterSpace::Screen), space) => space,
+            // If both local, take the maximum scale
+            (Some(RasterSpace::Local(parent_scale)), RasterSpace::Local(scale)) => RasterSpace::Local(parent_scale.max(scale)),
+        };
+        self.raster_space_stack.push(new_space);
+
+        // Get the transform-style of the parent stacking context,
+        // which determines if we *might* need to draw this on
+        // an intermediate surface for plane splitting purposes.
+        let (parent_is_3d, extra_3d_instance, plane_splitter_index) = match self.sc_stack.last_mut() {
+            Some(ref mut sc) if sc.is_3d() => {
+                let (flat_items_context_3d, plane_splitter_index) = match sc.context_3d {
+                    Picture3DContext::In { ancestor_index, plane_splitter_index, .. } => {
+                        (
+                            Picture3DContext::In {
+                                root_data: None,
+                                ancestor_index,
+                                plane_splitter_index,
+                            },
+                            plane_splitter_index,
+                        )
+                    }
+                    Picture3DContext::Out => panic!("Unexpected out of 3D context"),
+                };
+                // Cut the sequence of flat children before starting a child stacking context,
+                // so that the relative order between them and our current SC is preserved.
+                let extra_instance = sc.cut_item_sequence(
+                    &mut self.prim_store,
+                    &mut self.interners,
+                    Some(PictureCompositeMode::Blit(BlitReason::PRESERVE3D)),
+                    flat_items_context_3d,
+                    &mut self.clip_tree_builder,
+                );
+                let extra_instance = extra_instance.map(|(_, instance)| {
+                    ExtendedPrimitiveInstance {
+                        instance,
+                        spatial_node_index: sc.spatial_node_index,
+                        flags: sc.prim_flags,
+                    }
+                });
+                (true, extra_instance, Some(plane_splitter_index))
+            },
+            _ => (false, None, None),
+        };
+
+        if let Some(instance) = extra_3d_instance {
+            self.add_primitive_instance_to_3d_root(instance);
+        }
+
+        // If this is preserve-3d *or* the parent is, then this stacking
+        // context is participating in the 3d rendering context. In that
+        // case, hoist the picture up to the 3d rendering context
+        // container, so that it's rendered as a sibling with other
+        // elements in this context.
+        let participating_in_3d_context =
+            composite_ops.is_empty() &&
+            (parent_is_3d || transform_style == TransformStyle::Preserve3D);
+
+        let context_3d = if participating_in_3d_context {
+            // Get the spatial node index of the containing block, which
+            // defines the context of backface-visibility.
+            let ancestor_index = self.containing_block_stack
+                .last()
+                .cloned()
+                .unwrap_or(self.spatial_tree.root_reference_frame_index());
+
+            let plane_splitter_index = plane_splitter_index.unwrap_or_else(|| {
+                let index = self.next_plane_splitter_index;
+                self.next_plane_splitter_index += 1;
+                PlaneSplitterIndex(index)
+            });
+
+            Picture3DContext::In {
+                root_data: if parent_is_3d {
+                    None
+                } else {
+                    Some(Vec::new())
+                },
+                plane_splitter_index,
+                ancestor_index,
+            }
+        } else {
+            Picture3DContext::Out
+        };
+
+        // Force an intermediate surface if the stacking context has a
+        // complex clip node. In the future, we may decide during
+        // prepare step to skip the intermediate surface if the
+        // clip node doesn't affect the stacking context rect.
+        let mut blit_reason = BlitReason::empty();
+
+        // If this stacking context has any complex clips, we need to draw it
+        // to an off-screen surface.
+        if let Some(clip_chain_id) = clip_chain_id {
+            if self.clip_tree_builder.clip_chain_has_complex_clips(clip_chain_id, &self.interners) {
+                blit_reason |= BlitReason::CLIP;
+            }
+        }
+
+        // Check if we know this stacking context is redundant (doesn't need a surface)
+        // The check for blend-container redundancy is more involved so it's handled below.
+        let mut is_redundant = FlattenedStackingContext::is_redundant(
+            &context_3d,
+            &composite_ops,
+            blit_reason,
+            self.sc_stack.last(),
+            prim_flags,
+        );
+
+        // If the stacking context is a blend container, and if we're at the top level
+        // of the stacking context tree, we may be able to make this blend container into a tile
+        // cache. This means that we get caching and correct scrolling invalidation for
+        // root level blend containers. For these cases, the readbacks of the backdrop
+        // are handled by doing partial reads of the picture cache tiles during rendering.
+        if flags.contains(StackingContextFlags::IS_BLEND_CONTAINER) {
+            // Check if we're inside a stacking context hierarchy with an existing surface
+            match self.sc_stack.last() {
+                Some(_) => {
+                    // If we are already inside a stacking context hierarchy with a surface, then we
+                    // need to do the normal isolate of this blend container as a regular surface
+                    blit_reason |= BlitReason::ISOLATE;
+                    is_redundant = false;
+                }
+                None => {
+                    // If the current slice is empty, then we can just mark the slice as
+                    // atomic (so that compositor surfaces don't get promoted within it)
+                    // and use that slice as the backing surface for the blend container
+                    if self.tile_cache_builder.is_current_slice_empty() &&
+                       self.spatial_tree.is_root_coord_system(spatial_node_index) &&
+                       !self.clip_tree_builder.clip_node_has_complex_clips(clip_node_id, &self.interners)
+                    {
+                        self.add_tile_cache_barrier_if_needed(SliceFlags::IS_ATOMIC);
+                        self.tile_cache_builder.make_current_slice_atomic();
+                    } else {
+                        // If the slice wasn't empty, we need to isolate a separate surface
+                        // to ensure that the content already in the slice is not used as
+                        // an input to the mix-blend composite
+                        blit_reason |= BlitReason::ISOLATE;
+                        is_redundant = false;
+                    }
+                }
+            }
+        }
+
+        // If stacking context is a scrollbar, force a new slice for the primitives
+        // within. The stacking context will be redundant and removed by above check.
+        let set_tile_cache_barrier = prim_flags.contains(PrimitiveFlags::IS_SCROLLBAR_CONTAINER);
+
+        if set_tile_cache_barrier {
+            self.add_tile_cache_barrier_if_needed(SliceFlags::IS_SCROLLBAR);
+        }
+
+        let mut sc_info = StackingContextInfo {
+            pop_stacking_context: false,
+            pop_containing_block: false,
+            set_tile_cache_barrier,
+        };
+
+        // If this is not 3d, then it establishes an ancestor root for child 3d contexts.
+        if !participating_in_3d_context {
+            sc_info.pop_containing_block = true;
+            self.containing_block_stack.push(spatial_node_index);
+        }
+
+        // If not redundant, create a stacking context to hold primitive clusters
+        if !is_redundant {
+            sc_info.pop_stacking_context = true;
+
+            // Push the SC onto the stack, so we know how to handle things in
+            // pop_stacking_context.
+            self.sc_stack.push(FlattenedStackingContext {
+                prim_list: PrimitiveList::empty(),
+                prim_flags,
+                spatial_node_index,
+                clip_node_id,
+                composite_ops,
+                blit_reason,
+                transform_style,
+                context_3d,
+                flags,
+                raster_space: new_space,
+            });
+        }
+
+        sc_info
+    }
+
+    fn pop_stacking_context(
+        &mut self,
+        info: StackingContextInfo,
+    ) {
+        profile_scope!("pop_stacking_context");
+
+        self.clip_tree_builder.pop_clip();
+
+        // Pop off current raster space (pushed unconditionally in push_stacking_context)
+        self.raster_space_stack.pop().unwrap();
+
+        // If the stacking context formed a containing block, pop off the stack
+        if info.pop_containing_block {
+            self.containing_block_stack.pop().unwrap();
+        }
+
+        if info.set_tile_cache_barrier {
+            self.add_tile_cache_barrier_if_needed(SliceFlags::empty());
+        }
+
+        // If the stacking context was otherwise redundant, early exit
+        if !info.pop_stacking_context {
+            return;
+        }
+
+        let stacking_context = self.sc_stack.pop().unwrap();
+
+        let mut source = match stacking_context.context_3d {
+            // TODO(gw): For now, as soon as this picture is in
+            //           a 3D context, we draw it to an intermediate
+            //           surface and apply plane splitting. However,
+            //           there is a large optimization opportunity here.
+            //           During culling, we can check if there is actually
+            //           perspective present, and skip the plane splitting
+            //           completely when that is not the case.
+            Picture3DContext::In { ancestor_index, plane_splitter_index, .. } => {
+                let composite_mode = Some(
+                    PictureCompositeMode::Blit(BlitReason::PRESERVE3D | stacking_context.blit_reason)
+                );
+
+                // Add picture for this actual stacking context contents to render into.
+                let pic_index = PictureIndex(self.prim_store.pictures
+                    .alloc()
+                    .init(PicturePrimitive::new_image(
+                        composite_mode.clone(),
+                        Picture3DContext::In { root_data: None, ancestor_index, plane_splitter_index },
+                        stacking_context.prim_flags,
+                        stacking_context.prim_list,
+                        stacking_context.spatial_node_index,
+                        stacking_context.raster_space,
+                        PictureFlags::empty(),
+                    ))
+                );
+
+                let instance = create_prim_instance(
+                    pic_index,
+                    composite_mode.into(),
+                    stacking_context.raster_space,
+                    stacking_context.clip_node_id,
+                    &mut self.interners,
+                    &mut self.clip_tree_builder,
+                );
+
+                PictureChainBuilder::from_instance(
+                    instance,
+                    stacking_context.prim_flags,
+                    stacking_context.spatial_node_index,
+                    stacking_context.raster_space,
+                )
+            }
+            Picture3DContext::Out => {
+                if stacking_context.blit_reason.is_empty() {
+                    PictureChainBuilder::from_prim_list(
+                        stacking_context.prim_list,
+                        stacking_context.prim_flags,
+                        stacking_context.spatial_node_index,
+                        stacking_context.raster_space,
+                        false,
+                    )
+                } else {
+                    let composite_mode = Some(
+                        PictureCompositeMode::Blit(stacking_context.blit_reason)
+                    );
+
+                    // Add picture for this actual stacking context contents to render into.
+                    let pic_index = PictureIndex(self.prim_store.pictures
+                        .alloc()
+                        .init(PicturePrimitive::new_image(
+                            composite_mode.clone(),
+                            Picture3DContext::Out,
+                            stacking_context.prim_flags,
+                            stacking_context.prim_list,
+                            stacking_context.spatial_node_index,
+                            stacking_context.raster_space,
+                            PictureFlags::empty(),
+                        ))
+                    );
+
+                    let instance = create_prim_instance(
+                        pic_index,
+                        composite_mode.into(),
+                        stacking_context.raster_space,
+                        stacking_context.clip_node_id,
+                        &mut self.interners,
+                        &mut self.clip_tree_builder,
+                    );
+
+                    PictureChainBuilder::from_instance(
+                        instance,
+                        stacking_context.prim_flags,
+                        stacking_context.spatial_node_index,
+                        stacking_context.raster_space,
+                    )
+                }
+            }
+        };
+
+        // If establishing a 3d context, the `cur_instance` represents
+        // a picture with all the *trailing* immediate children elements.
+        // We append this to the preserve-3D picture set and make a container picture of them.
+        if let Picture3DContext::In { root_data: Some(mut prims), ancestor_index, plane_splitter_index } = stacking_context.context_3d {
+            let instance = source.finalize(
+                ClipNodeId::NONE,
+                &mut self.interners,
+                &mut self.prim_store,
+                &mut self.clip_tree_builder,
+            );
+
+            prims.push(ExtendedPrimitiveInstance {
+                instance,
+                spatial_node_index: stacking_context.spatial_node_index,
+                flags: stacking_context.prim_flags,
+            });
+
+            let mut prim_list = PrimitiveList::empty();
+
+            // Web content often specifies `preserve-3d` on pages that don't actually need
+            // a 3d rendering context (as a hint / hack to convince other browsers to
+            // layerize these elements to an off-screen surface). Detect cases where the
+            // preserve-3d has no effect on correctness and convert them to pass-through
+            // pictures instead. This has two benefits for WR:
+            //
+            // (1) We get correct subpixel-snapping behavior between preserve-3d elements
+            //     that don't have complex transforms without additional complexity of
+            //     handling subpixel-snapping across different surfaces.
+            // (2) We can draw this content directly in to the parent surface / tile cache,
+            //     which is a performance win by avoiding allocating, drawing,
+            //     plane-splitting and blitting an off-screen surface.
+            let mut needs_3d_context = false;
+
+            for ext_prim in prims.drain(..) {
+                // If all the preserve-3d elements are in the root coordinate system, we
+                // know that there is no need for a true 3d rendering context / plane-split.
+                // TODO(gw): We can expand this in future to handle this in more cases
+                //           (e.g. a non-root coord system that is 2d within the 3d context).
+                if !self.spatial_tree.is_root_coord_system(ext_prim.spatial_node_index) {
+                    needs_3d_context = true;
+                }
+
+                prim_list.add_prim(
+                    ext_prim.instance,
+                    LayoutRect::zero(),
+                    ext_prim.spatial_node_index,
+                    ext_prim.flags,
+                    &mut self.prim_instances,
+                    &self.clip_tree_builder,
+                );
+            }
+
+            let context_3d = if needs_3d_context {
+                Picture3DContext::In {
+                    root_data: Some(Vec::new()),
+                    ancestor_index,
+                    plane_splitter_index,
+                }
+            } else {
+                // If we didn't need a 3d rendering context, walk the child pictures
+                // that make up this context and disable the off-screen surface and
+                // 3d render context.
+                for child_pic_index in &prim_list.child_pictures {
+                    let child_pic = &mut self.prim_store.pictures[child_pic_index.0];
+                    child_pic.composite_mode = None;
+                    child_pic.context_3d = Picture3DContext::Out;
+                }
+
+                Picture3DContext::Out
+            };
+
+            // This is the acttual picture representing our 3D hierarchy root.
+            let pic_index = PictureIndex(self.prim_store.pictures
+                .alloc()
+                .init(PicturePrimitive::new_image(
+                    None,
+                    context_3d,
+                    stacking_context.prim_flags,
+                    prim_list,
+                    stacking_context.spatial_node_index,
+                    stacking_context.raster_space,
+                    PictureFlags::empty(),
+                ))
+            );
+
+            let instance = create_prim_instance(
+                pic_index,
+                PictureCompositeKey::Identity,
+                stacking_context.raster_space,
+                stacking_context.clip_node_id,
+                &mut self.interners,
+                &mut self.clip_tree_builder,
+            );
+
+            source = PictureChainBuilder::from_instance(
+                instance,
+                stacking_context.prim_flags,
+                stacking_context.spatial_node_index,
+                stacking_context.raster_space,
+            );
+        }
+
+        let has_filters = stacking_context.composite_ops.has_valid_filters();
+
+        source = self.wrap_prim_with_filters(
+            source,
+            stacking_context.clip_node_id,
+            stacking_context.composite_ops.filters,
+            stacking_context.composite_ops.filter_primitives,
+            stacking_context.composite_ops.filter_datas,
+            None,
+        );
+
+        // Same for mix-blend-mode, except we can skip if this primitive is the first in the parent
+        // stacking context.
+        // From https://drafts.fxtf.org/compositing-1/#generalformula, the formula for blending is:
+        // Cs = (1 - ab) x Cs + ab x Blend(Cb, Cs)
+        // where
+        // Cs = Source color
+        // ab = Backdrop alpha
+        // Cb = Backdrop color
+        //
+        // If we're the first primitive within a stacking context, then we can guarantee that the
+        // backdrop alpha will be 0, and then the blend equation collapses to just
+        // Cs = Cs, and the blend mode isn't taken into account at all.
+        if let Some(mix_blend_mode) = stacking_context.composite_ops.mix_blend_mode {
+            let composite_mode = PictureCompositeMode::MixBlend(mix_blend_mode);
+
+            source = source.add_picture(
+                composite_mode,
+                stacking_context.clip_node_id,
+                Picture3DContext::Out,
+                &mut self.interners,
+                &mut self.prim_store,
+                &mut self.prim_instances,
+                &mut self.clip_tree_builder,
+            );
+        }
+
+        // Set the stacking context clip on the outermost picture in the chain,
+        // unless we already set it on the leaf picture.
+        let cur_instance = source.finalize(
+            stacking_context.clip_node_id,
+            &mut self.interners,
+            &mut self.prim_store,
+            &mut self.clip_tree_builder,
+        );
+
+        // The primitive instance for the remainder of flat children of this SC
+        // if it's a part of 3D hierarchy but not the root of it.
+        let trailing_children_instance = match self.sc_stack.last_mut() {
+            // Preserve3D path (only relevant if there are no filters/mix-blend modes)
+            Some(ref parent_sc) if !has_filters && parent_sc.is_3d() => {
+                Some(cur_instance)
+            }
+            // Regular parenting path
+            Some(ref mut parent_sc) => {
+                parent_sc.prim_list.add_prim(
+                    cur_instance,
+                    LayoutRect::zero(),
+                    stacking_context.spatial_node_index,
+                    stacking_context.prim_flags,
+                    &mut self.prim_instances,
+                    &self.clip_tree_builder,
+                );
+                None
+            }
+            // This must be the root stacking context
+            None => {
+                self.add_primitive_to_draw_list(
+                    cur_instance,
+                    LayoutRect::zero(),
+                    stacking_context.spatial_node_index,
+                    stacking_context.prim_flags,
+                );
+
+                None
+            }
+        };
+
+        // finally, if there any outstanding 3D primitive instances,
+        // find the 3D hierarchy root and add them there.
+        if let Some(instance) = trailing_children_instance {
+            self.add_primitive_instance_to_3d_root(ExtendedPrimitiveInstance {
+                instance,
+                spatial_node_index: stacking_context.spatial_node_index,
+                flags: stacking_context.prim_flags,
+            });
+        }
+
+        assert!(
+            self.pending_shadow_items.is_empty(),
+            "Found unpopped shadows when popping stacking context!"
+        );
+    }
+
+    pub fn push_reference_frame(
+        &mut self,
+        reference_frame_id: SpatialId,
+        parent_index: SpatialNodeIndex,
+        pipeline_id: PipelineId,
+        transform_style: TransformStyle,
+        source_transform: PropertyBinding<LayoutTransform>,
+        kind: ReferenceFrameKind,
+        origin_in_parent_reference_frame: LayoutVector2D,
+        uid: SpatialNodeUid,
+    ) -> SpatialNodeIndex {
+        let index = self.spatial_tree.add_reference_frame(
+            parent_index,
+            transform_style,
+            source_transform,
+            kind,
+            origin_in_parent_reference_frame,
+            pipeline_id,
+            uid,
+        );
+        self.id_to_index_mapper_stack.last_mut().unwrap().add_spatial_node(reference_frame_id, index);
+
+        index
+    }
+
+    fn push_root(
+        &mut self,
+        pipeline_id: PipelineId,
+        viewport_size: &LayoutSize,
+        instance: PipelineInstanceId,
+    ) {
+        let spatial_node_index = self.push_reference_frame(
+            SpatialId::root_reference_frame(pipeline_id),
+            self.spatial_tree.root_reference_frame_index(),
+            pipeline_id,
+            TransformStyle::Flat,
+            PropertyBinding::Value(LayoutTransform::identity()),
+            ReferenceFrameKind::Transform {
+                is_2d_scale_translation: true,
+                should_snap: true,
+                paired_with_perspective: false,
+            },
+            LayoutVector2D::zero(),
+            SpatialNodeUid::root_reference_frame(pipeline_id, instance),
+        );
+
+        let viewport_rect = self.snap_rect(
+            &LayoutRect::from_size(*viewport_size),
+            spatial_node_index,
+        );
+
+        self.add_scroll_frame(
+            SpatialId::root_scroll_node(pipeline_id),
+            spatial_node_index,
+            ExternalScrollId(0, pipeline_id),
+            pipeline_id,
+            &viewport_rect,
+            &viewport_rect.size(),
+            ScrollFrameKind::PipelineRoot {
+                is_root_pipeline: true,
+            },
+            LayoutVector2D::zero(),
+            APZScrollGeneration::default(),
+            HasScrollLinkedEffect::No,
+            SpatialNodeUid::root_scroll_frame(pipeline_id, instance),
+        );
+    }
+
+    fn add_image_mask_clip_node(
+        &mut self,
+        new_node_id: ClipId,
+        spatial_id: SpatialId,
+        image_mask: &ImageMask,
+        fill_rule: FillRule,
+        points_range: ItemRange<LayoutPoint>,
+    ) {
+        let spatial_node_index = self.get_space(spatial_id);
+
+        let snapped_mask_rect = self.snap_rect(
+            &image_mask.rect,
+            spatial_node_index,
+        );
+        let points: Vec<LayoutPoint> = points_range.iter().collect();
+
+        // If any points are provided, then intern a polygon with the points and fill rule.
+        let mut polygon_handle: Option<PolygonDataHandle> = None;
+        if points.len() > 0 {
+            let item = PolygonKey::new(&points, fill_rule);
+
+            let handle = self
+                .interners
+                .polygon
+                .intern(&item, || item);
+            polygon_handle = Some(handle);
+        }
+
+        let item = ClipItemKey {
+            kind: ClipItemKeyKind::image_mask(image_mask, snapped_mask_rect, polygon_handle),
+            spatial_node_index,
+        };
+
+        let handle = self
+            .interners
+            .clip
+            .intern(&item, || {
+                ClipInternData {
+                    key: item,
+                }
+            });
+
+        self.clip_tree_builder.define_image_mask_clip(
+            new_node_id,
+            handle,
+        );
+    }
+
+    /// Add a new rectangle clip, positioned by the spatial node in the `space_and_clip`.
+    fn add_rect_clip_node(
+        &mut self,
+        new_node_id: ClipId,
+        spatial_id: SpatialId,
+        clip_rect: &LayoutRect,
+    ) {
+        let spatial_node_index = self.get_space(spatial_id);
+
+        let snapped_clip_rect = self.snap_rect(
+            clip_rect,
+            spatial_node_index,
+        );
+
+        let item = ClipItemKey {
+            kind: ClipItemKeyKind::rectangle(snapped_clip_rect, ClipMode::Clip),
+            spatial_node_index,
+        };
+        let handle = self
+            .interners
+            .clip
+            .intern(&item, || {
+                ClipInternData {
+                    key: item,
+                }
+            });
+
+        self.clip_tree_builder.define_rect_clip(
+            new_node_id,
+            handle,
+        );
+    }
+
+    fn add_rounded_rect_clip_node(
+        &mut self,
+        new_node_id: ClipId,
+        spatial_id: SpatialId,
+        clip: &ComplexClipRegion,
+    ) {
+        let spatial_node_index = self.get_space(spatial_id);
+
+        let snapped_region_rect = self.snap_rect(
+            &clip.rect,
+            spatial_node_index,
+        );
+        let item = ClipItemKey {
+            kind: ClipItemKeyKind::rounded_rect(
+                snapped_region_rect,
+                clip.radii,
+                clip.mode,
+            ),
+            spatial_node_index,
+        };
+
+        let handle = self
+            .interners
+            .clip
+            .intern(&item, || {
+                ClipInternData {
+                    key: item,
+                }
+            });
+
+        self.clip_tree_builder.define_rounded_rect_clip(
+            new_node_id,
+            handle,
+        );
+    }
+
+    pub fn add_scroll_frame(
+        &mut self,
+        new_node_id: SpatialId,
+        parent_node_index: SpatialNodeIndex,
+        external_id: ExternalScrollId,
+        pipeline_id: PipelineId,
+        frame_rect: &LayoutRect,
+        content_size: &LayoutSize,
+        frame_kind: ScrollFrameKind,
+        external_scroll_offset: LayoutVector2D,
+        scroll_offset_generation: APZScrollGeneration,
+        has_scroll_linked_effect: HasScrollLinkedEffect,
+        uid: SpatialNodeUid,
+    ) -> SpatialNodeIndex {
+        let node_index = self.spatial_tree.add_scroll_frame(
+            parent_node_index,
+            external_id,
+            pipeline_id,
+            frame_rect,
+            content_size,
+            frame_kind,
+            external_scroll_offset,
+            scroll_offset_generation,
+            has_scroll_linked_effect,
+            uid,
+        );
+        self.id_to_index_mapper_stack.last_mut().unwrap().add_spatial_node(new_node_id, node_index);
+        node_index
+    }
+
+    pub fn push_shadow(
+        &mut self,
+        shadow: Shadow,
+        spatial_node_index: SpatialNodeIndex,
+        clip_chain_id: api::ClipChainId,
+        should_inflate: bool,
+    ) {
+        self.clip_tree_builder.push_clip_chain(Some(clip_chain_id), false);
+
+        // Store this shadow in the pending list, for processing
+        // during pop_all_shadows.
+        self.pending_shadow_items.push_back(ShadowItem::Shadow(PendingShadow {
+            shadow,
+            spatial_node_index,
+            should_inflate,
+        }));
+    }
+
+    pub fn pop_all_shadows(
+        &mut self,
+    ) {
+        assert!(!self.pending_shadow_items.is_empty(), "popped shadows, but none were present");
+
+        let mut items = mem::replace(&mut self.pending_shadow_items, VecDeque::new());
+
+        //
+        // The pending_shadow_items queue contains a list of shadows and primitives
+        // that were pushed during the active shadow context. To process these, we:
+        //
+        // Iterate the list, popping an item from the front each iteration.
+        //
+        // If the item is a shadow:
+        //      - Create a shadow picture primitive.
+        //      - Add *any* primitives that remain in the item list to this shadow.
+        // If the item is a primitive:
+        //      - Add that primitive as a normal item (if alpha > 0)
+        //
+
+        while let Some(item) = items.pop_front() {
+            match item {
+                ShadowItem::Shadow(pending_shadow) => {
+                    // Quote from https://drafts.csswg.org/css-backgrounds-3/#shadow-blur
+                    // "the image that would be generated by applying to the shadow a
+                    // Gaussian blur with a standard deviation equal to half the blur radius."
+                    let std_deviation = pending_shadow.shadow.blur_radius * 0.5;
+
+                    // Add any primitives that come after this shadow in the item
+                    // list to this shadow.
+                    let mut prim_list = PrimitiveList::empty();
+                    let blur_filter = Filter::Blur {
+                        width: std_deviation,
+                        height: std_deviation,
+                        should_inflate: pending_shadow.should_inflate,
+                    };
+                    let blur_is_noop = blur_filter.is_noop();
+
+                    for item in &items {
+                        let (instance, info, spatial_node_index) = match item {
+                            ShadowItem::Image(ref pending_image) => {
+                                self.create_shadow_prim(
+                                    &pending_shadow,
+                                    pending_image,
+                                    blur_is_noop,
+                                )
+                            }
+                            ShadowItem::LineDecoration(ref pending_line_dec) => {
+                                self.create_shadow_prim(
+                                    &pending_shadow,
+                                    pending_line_dec,
+                                    blur_is_noop,
+                                )
+                            }
+                            ShadowItem::NormalBorder(ref pending_border) => {
+                                self.create_shadow_prim(
+                                    &pending_shadow,
+                                    pending_border,
+                                    blur_is_noop,
+                                )
+                            }
+                            ShadowItem::Primitive(ref pending_primitive) => {
+                                self.create_shadow_prim(
+                                    &pending_shadow,
+                                    pending_primitive,
+                                    blur_is_noop,
+                                )
+                            }
+                            ShadowItem::TextRun(ref pending_text_run) => {
+                                self.create_shadow_prim(
+                                    &pending_shadow,
+                                    pending_text_run,
+                                    blur_is_noop,
+                                )
+                            }
+                            _ => {
+                                continue;
+                            }
+                        };
+
+                        if blur_is_noop {
+                            self.add_primitive_to_draw_list(
+                                instance,
+                                info.rect,
+                                spatial_node_index,
+                                info.flags,
+                            );
+                        } else {
+                            prim_list.add_prim(
+                                instance,
+                                info.rect,
+                                spatial_node_index,
+                                info.flags,
+                                &mut self.prim_instances,
+                                &self.clip_tree_builder,
+                            );
+                        }
+                    }
+
+                    // No point in adding a shadow here if there were no primitives
+                    // added to the shadow.
+                    if !prim_list.is_empty() {
+                        // Create a picture that the shadow primitives will be added to. If the
+                        // blur radius is 0, the code in Picture::prepare_for_render will
+                        // detect this and mark the picture to be drawn directly into the
+                        // parent picture, which avoids an intermediate surface and blur.
+                        assert!(!blur_filter.is_noop());
+                        let composite_mode = Some(PictureCompositeMode::Filter(blur_filter));
+                        let composite_mode_key = composite_mode.clone().into();
+                        let raster_space = RasterSpace::Screen;
+
+                        // Create the primitive to draw the shadow picture into the scene.
+                        let shadow_pic_index = PictureIndex(self.prim_store.pictures
+                            .alloc()
+                            .init(PicturePrimitive::new_image(
+                                composite_mode,
+                                Picture3DContext::Out,
+                                PrimitiveFlags::IS_BACKFACE_VISIBLE,
+                                prim_list,
+                                pending_shadow.spatial_node_index,
+                                raster_space,
+                                PictureFlags::empty(),
+                            ))
+                        );
+
+                        let shadow_pic_key = PictureKey::new(
+                            Picture { composite_mode_key, raster_space },
+                        );
+
+                        let shadow_prim_data_handle = self.interners
+                            .picture
+                            .intern(&shadow_pic_key, || ());
+
+                        let clip_node_id = self.clip_tree_builder.build_clip_set(api::ClipChainId::INVALID);
+
+                        let shadow_prim_instance = PrimitiveInstance::new(
+                            PrimitiveInstanceKind::Picture {
+                                data_handle: shadow_prim_data_handle,
+                                pic_index: shadow_pic_index,
+                                segment_instance_index: SegmentInstanceIndex::INVALID,
+                            },
+                            self.clip_tree_builder.build_for_picture(clip_node_id),
+                        );
+
+                        // Add the shadow primitive. This must be done before pushing this
+                        // picture on to the shadow stack, to avoid infinite recursion!
+                        self.add_primitive_to_draw_list(
+                            shadow_prim_instance,
+                            LayoutRect::zero(),
+                            pending_shadow.spatial_node_index,
+                            PrimitiveFlags::IS_BACKFACE_VISIBLE,
+                        );
+                    }
+
+                    self.clip_tree_builder.pop_clip();
+                }
+                ShadowItem::Image(pending_image) => {
+                    self.add_shadow_prim_to_draw_list(
+                        pending_image,
+                    )
+                },
+                ShadowItem::LineDecoration(pending_line_dec) => {
+                    self.add_shadow_prim_to_draw_list(
+                        pending_line_dec,
+                    )
+                },
+                ShadowItem::NormalBorder(pending_border) => {
+                    self.add_shadow_prim_to_draw_list(
+                        pending_border,
+                    )
+                },
+                ShadowItem::Primitive(pending_primitive) => {
+                    self.add_shadow_prim_to_draw_list(
+                        pending_primitive,
+                    )
+                },
+                ShadowItem::TextRun(pending_text_run) => {
+                    self.add_shadow_prim_to_draw_list(
+                        pending_text_run,
+                    )
+                },
+            }
+        }
+
+        debug_assert!(items.is_empty());
+        self.pending_shadow_items = items;
+    }
+
+    fn create_shadow_prim<P>(
+        &mut self,
+        pending_shadow: &PendingShadow,
+        pending_primitive: &PendingPrimitive<P>,
+        blur_is_noop: bool,
+    ) -> (PrimitiveInstance, LayoutPrimitiveInfo, SpatialNodeIndex)
+    where
+        P: InternablePrimitive + CreateShadow,
+        Interners: AsMut<Interner<P>>,
+    {
+        // Offset the local rect and clip rect by the shadow offset. The pending
+        // primitive has already been snapped, but we will need to snap the
+        // shadow after translation. We don't need to worry about the size
+        // changing because the shadow has the same raster space as the
+        // primitive, and thus we know the size is already rounded.
+        let mut info = pending_primitive.info.clone();
+        info.rect = info.rect.translate(pending_shadow.shadow.offset);
+        info.clip_rect = info.clip_rect.translate(pending_shadow.shadow.offset);
+
+        let clip_set = self.clip_tree_builder.build_for_prim(
+            pending_primitive.clip_node_id,
+            &info,
+            &[],
+            &mut self.interners,
+        );
+
+        // Construct and add a primitive for the given shadow.
+        let shadow_prim_instance = self.create_primitive(
+            &info,
+            pending_primitive.spatial_node_index,
+            clip_set,
+            pending_primitive.prim.create_shadow(
+                &pending_shadow.shadow,
+                blur_is_noop,
+                self.raster_space_stack.last().cloned().unwrap(),
+            ),
+        );
+
+        (shadow_prim_instance, info, pending_primitive.spatial_node_index)
+    }
+
+    fn add_shadow_prim_to_draw_list<P>(
+        &mut self,
+        pending_primitive: PendingPrimitive<P>,
+    ) where
+        P: InternablePrimitive + IsVisible,
+        Interners: AsMut<Interner<P>>,
+    {
+        // For a normal primitive, if it has alpha > 0, then we add this
+        // as a normal primitive to the parent picture.
+        if pending_primitive.prim.is_visible() {
+            let clip_set = self.clip_tree_builder.build_for_prim(
+                pending_primitive.clip_node_id,
+                &pending_primitive.info,
+                &[],
+                &mut self.interners,
+            );
+
+            self.add_prim_to_draw_list(
+                &pending_primitive.info,
+                pending_primitive.spatial_node_index,
+                clip_set,
+                pending_primitive.prim,
+            );
+        }
+    }
+
+    pub fn add_clear_rectangle(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+        info: &LayoutPrimitiveInfo,
+    ) {
+        // Clear prims must be in their own picture cache slice to
+        // be composited correctly.
+        self.add_tile_cache_barrier_if_needed(SliceFlags::empty());
+
+        self.add_primitive(
+            spatial_node_index,
+            clip_node_id,
+            info,
+            Vec::new(),
+            PrimitiveKeyKind::Clear,
+        );
+
+        self.add_tile_cache_barrier_if_needed(SliceFlags::empty());
+    }
+
+    pub fn add_line(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+        info: &LayoutPrimitiveInfo,
+        wavy_line_thickness: f32,
+        orientation: LineOrientation,
+        color: ColorF,
+        style: LineStyle,
+    ) {
+        // For line decorations, we can construct the render task cache key
+        // here during scene building, since it doesn't depend on device
+        // pixel ratio or transform.
+        let size = get_line_decoration_size(
+            &info.rect.size(),
+            orientation,
+            style,
+            wavy_line_thickness,
+        );
+
+        let cache_key = size.map(|size| {
+            LineDecorationCacheKey {
+                style,
+                orientation,
+                wavy_line_thickness: Au::from_f32_px(wavy_line_thickness),
+                size: size.to_au(),
+            }
+        });
+
+        self.add_primitive(
+            spatial_node_index,
+            clip_node_id,
+            &info,
+            Vec::new(),
+            LineDecoration {
+                cache_key,
+                color: color.into(),
+            },
+        );
+    }
+
+    pub fn add_border(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+        info: &LayoutPrimitiveInfo,
+        border_item: &BorderDisplayItem,
+        gradient_stops: ItemRange<GradientStop>,
+    ) {
+        match border_item.details {
+            BorderDetails::NinePatch(ref border) => {
+                let nine_patch = NinePatchDescriptor {
+                    width: border.width,
+                    height: border.height,
+                    slice: border.slice,
+                    fill: border.fill,
+                    repeat_horizontal: border.repeat_horizontal,
+                    repeat_vertical: border.repeat_vertical,
+                    outset: border.outset.into(),
+                    widths: border_item.widths.into(),
+                };
+
+                match border.source {
+                    NinePatchBorderSource::Image(key, rendering) => {
+                        let prim = ImageBorder {
+                            request: ImageRequest {
+                                key,
+                                rendering,
+                                tile: None,
+                            },
+                            nine_patch,
+                        };
+
+                        self.add_nonshadowable_primitive(
+                            spatial_node_index,
+                            clip_node_id,
+                            info,
+                            Vec::new(),
+                            prim,
+                        );
+                    }
+                    NinePatchBorderSource::Gradient(gradient) => {
+                        let prim = match self.create_linear_gradient_prim(
+                            &info,
+                            gradient.start_point,
+                            gradient.end_point,
+                            read_gradient_stops(gradient_stops),
+                            gradient.extend_mode,
+                            LayoutSize::new(border.height as f32, border.width as f32),
+                            LayoutSize::zero(),
+                            Some(Box::new(nine_patch)),
+                            EdgeAaSegmentMask::all(),
+                        ) {
+                            Some(prim) => prim,
+                            None => return,
+                        };
+
+                        self.add_nonshadowable_primitive(
+                            spatial_node_index,
+                            clip_node_id,
+                            info,
+                            Vec::new(),
+                            prim,
+                        );
+                    }
+                    NinePatchBorderSource::RadialGradient(gradient) => {
+                        let prim = self.create_radial_gradient_prim(
+                            &info,
+                            gradient.center,
+                            gradient.start_offset * gradient.radius.width,
+                            gradient.end_offset * gradient.radius.width,
+                            gradient.radius.width / gradient.radius.height,
+                            read_gradient_stops(gradient_stops),
+                            gradient.extend_mode,
+                            LayoutSize::new(border.height as f32, border.width as f32),
+                            LayoutSize::zero(),
+                            Some(Box::new(nine_patch)),
+                        );
+
+                        self.add_nonshadowable_primitive(
+                            spatial_node_index,
+                            clip_node_id,
+                            info,
+                            Vec::new(),
+                            prim,
+                        );
+                    }
+                    NinePatchBorderSource::ConicGradient(gradient) => {
+                        let prim = self.create_conic_gradient_prim(
+                            &info,
+                            gradient.center,
+                            gradient.angle,
+                            gradient.start_offset,
+                            gradient.end_offset,
+                            gradient_stops,
+                            gradient.extend_mode,
+                            LayoutSize::new(border.height as f32, border.width as f32),
+                            LayoutSize::zero(),
+                            Some(Box::new(nine_patch)),
+                        );
+
+                        self.add_nonshadowable_primitive(
+                            spatial_node_index,
+                            clip_node_id,
+                            info,
+                            Vec::new(),
+                            prim,
+                        );
+                    }
+                };
+            }
+            BorderDetails::Normal(ref border) => {
+                self.add_normal_border(
+                    info,
+                    border,
+                    border_item.widths,
+                    spatial_node_index,
+                    clip_node_id,
+                );
+            }
+        }
+    }
+
+    pub fn create_linear_gradient_prim(
+        &mut self,
+        info: &LayoutPrimitiveInfo,
+        start_point: LayoutPoint,
+        end_point: LayoutPoint,
+        stops: Vec<GradientStopKey>,
+        extend_mode: ExtendMode,
+        stretch_size: LayoutSize,
+        mut tile_spacing: LayoutSize,
+        nine_patch: Option<Box<NinePatchDescriptor>>,
+        edge_aa_mask: EdgeAaSegmentMask,
+    ) -> Option<LinearGradient> {
+        let mut prim_rect = info.rect;
+        simplify_repeated_primitive(&stretch_size, &mut tile_spacing, &mut prim_rect);
+
+        let mut has_hard_stops = false;
+        let mut is_entirely_transparent = true;
+        let mut prev_stop = None;
+        for stop in &stops {
+            if Some(stop.offset) == prev_stop {
+                has_hard_stops = true;
+            }
+            prev_stop = Some(stop.offset);
+            if stop.color.a > 0 {
+                is_entirely_transparent = false;
+            }
+        }
+
+        // If all the stops have no alpha, then this
+        // gradient can't contribute to the scene.
+        if is_entirely_transparent {
+            return None;
+        }
+
+        // Try to ensure that if the gradient is specified in reverse, then so long as the stops
+        // are also supplied in reverse that the rendered result will be equivalent. To do this,
+        // a reference orientation for the gradient line must be chosen, somewhat arbitrarily, so
+        // just designate the reference orientation as start < end. Aligned gradient rendering
+        // manages to produce the same result regardless of orientation, so don't worry about
+        // reversing in that case.
+        let reverse_stops = start_point.x > end_point.x ||
+            (start_point.x == end_point.x && start_point.y > end_point.y);
+
+        // To get reftests exactly matching with reverse start/end
+        // points, it's necessary to reverse the gradient
+        // line in some cases.
+        let (sp, ep) = if reverse_stops {
+            (end_point, start_point)
+        } else {
+            (start_point, end_point)
+        };
+
+        // We set a limit to the resolution at which cached gradients are rendered.
+        // For most gradients this is fine but when there are hard stops this causes
+        // noticeable artifacts. If so, fall back to non-cached gradients.
+        let max = gradient::LINEAR_MAX_CACHED_SIZE;
+        let caching_causes_artifacts = has_hard_stops && (stretch_size.width > max || stretch_size.height > max);
+
+        let is_tiled = prim_rect.width() > stretch_size.width
+         || prim_rect.height() > stretch_size.height;
+        // SWGL has a fast-path that can render gradients faster than it can sample from the
+        // texture cache so we disable caching in this configuration. Cached gradients are
+        // faster on hardware.
+        let cached = (!self.config.is_software || is_tiled) && !caching_causes_artifacts;
+
+        Some(LinearGradient {
+            extend_mode,
+            start_point: sp.into(),
+            end_point: ep.into(),
+            stretch_size: stretch_size.into(),
+            tile_spacing: tile_spacing.into(),
+            stops,
+            reverse_stops,
+            nine_patch,
+            cached,
+            edge_aa_mask,
+        })
+    }
+
+    pub fn create_radial_gradient_prim(
+        &mut self,
+        info: &LayoutPrimitiveInfo,
+        center: LayoutPoint,
+        start_radius: f32,
+        end_radius: f32,
+        ratio_xy: f32,
+        stops: Vec<GradientStopKey>,
+        extend_mode: ExtendMode,
+        stretch_size: LayoutSize,
+        mut tile_spacing: LayoutSize,
+        nine_patch: Option<Box<NinePatchDescriptor>>,
+    ) -> RadialGradient {
+        let mut prim_rect = info.rect;
+        simplify_repeated_primitive(&stretch_size, &mut tile_spacing, &mut prim_rect);
+
+        let params = RadialGradientParams {
+            start_radius,
+            end_radius,
+            ratio_xy,
+        };
+
+        RadialGradient {
+            extend_mode,
+            center: center.into(),
+            params,
+            stretch_size: stretch_size.into(),
+            tile_spacing: tile_spacing.into(),
+            nine_patch,
+            stops,
+        }
+    }
+
+    pub fn create_conic_gradient_prim(
+        &mut self,
+        info: &LayoutPrimitiveInfo,
+        center: LayoutPoint,
+        angle: f32,
+        start_offset: f32,
+        end_offset: f32,
+        stops: ItemRange<GradientStop>,
+        extend_mode: ExtendMode,
+        stretch_size: LayoutSize,
+        mut tile_spacing: LayoutSize,
+        nine_patch: Option<Box<NinePatchDescriptor>>,
+    ) -> ConicGradient {
+        let mut prim_rect = info.rect;
+        simplify_repeated_primitive(&stretch_size, &mut tile_spacing, &mut prim_rect);
+
+        let stops = stops.iter().map(|stop| {
+            GradientStopKey {
+                offset: stop.offset,
+                color: stop.color.into(),
+            }
+        }).collect();
+
+        ConicGradient {
+            extend_mode,
+            center: center.into(),
+            params: ConicGradientParams { angle, start_offset, end_offset },
+            stretch_size: stretch_size.into(),
+            tile_spacing: tile_spacing.into(),
+            nine_patch,
+            stops,
+        }
+    }
+
+    pub fn add_text(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+        prim_info: &LayoutPrimitiveInfo,
+        font_instance_key: &FontInstanceKey,
+        text_color: &ColorF,
+        glyph_range: ItemRange<GlyphInstance>,
+        glyph_options: Option<GlyphOptions>,
+    ) {
+        let offset = self.current_offset(spatial_node_index);
+
+        let text_run = {
+            let shared_key = self.fonts.instance_keys.map_key(font_instance_key);
+            let font_instance = match self.fonts.instances.get_font_instance(shared_key) {
+                Some(instance) => instance,
+                None => {
+                    warn!("Unknown font instance key");
+                    debug!("key={:?} shared={:?}", font_instance_key, shared_key);
+                    return;
+                }
+            };
+
+            // Trivial early out checks
+            if font_instance.size <= FontSize::zero() {
+                return;
+            }
+
+            // TODO(gw): Use a proper algorithm to select
+            // whether this item should be rendered with
+            // subpixel AA!
+            let mut render_mode = self.config
+                .default_font_render_mode
+                .limit_by(font_instance.render_mode);
+            let mut flags = font_instance.flags;
+            if let Some(options) = glyph_options {
+                render_mode = render_mode.limit_by(options.render_mode);
+                flags |= options.flags;
+            }
+
+            let font = FontInstance::new(
+                font_instance,
+                (*text_color).into(),
+                render_mode,
+                flags,
+            );
+
+            // TODO(gw): It'd be nice not to have to allocate here for creating
+            //           the primitive key, when the common case is that the
+            //           hash will match and we won't end up creating a new
+            //           primitive template.
+            let prim_offset = prim_info.rect.min.to_vector() - offset;
+            let glyphs = glyph_range
+                .iter()
+                .map(|glyph| {
+                    GlyphInstance {
+                        index: glyph.index,
+                        point: glyph.point - prim_offset,
+                    }
+                })
+                .collect();
+
+            // Query the current requested raster space (stack handled by push/pop
+            // stacking context).
+            let requested_raster_space = self.raster_space_stack
+                .last()
+                .cloned()
+                .unwrap();
+
+            TextRun {
+                glyphs: Arc::new(glyphs),
+                font,
+                shadow: false,
+                requested_raster_space,
+            }
+        };
+
+        self.add_primitive(
+            spatial_node_index,
+            clip_node_id,
+            prim_info,
+            Vec::new(),
+            text_run,
+        );
+    }
+
+    pub fn add_image(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+        info: &LayoutPrimitiveInfo,
+        stretch_size: LayoutSize,
+        mut tile_spacing: LayoutSize,
+        image_key: ImageKey,
+        image_rendering: ImageRendering,
+        alpha_type: AlphaType,
+        color: ColorF,
+    ) {
+        let mut prim_rect = info.rect;
+        simplify_repeated_primitive(&stretch_size, &mut tile_spacing, &mut prim_rect);
+        let info = LayoutPrimitiveInfo {
+            rect: prim_rect,
+            .. *info
+        };
+
+        self.add_primitive(
+            spatial_node_index,
+            clip_node_id,
+            &info,
+            Vec::new(),
+            Image {
+                key: image_key,
+                tile_spacing: tile_spacing.into(),
+                stretch_size: stretch_size.into(),
+                color: color.into(),
+                image_rendering,
+                alpha_type,
+            },
+        );
+    }
+
+    pub fn add_yuv_image(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+        info: &LayoutPrimitiveInfo,
+        yuv_data: YuvData,
+        color_depth: ColorDepth,
+        color_space: YuvColorSpace,
+        color_range: ColorRange,
+        image_rendering: ImageRendering,
+    ) {
+        let format = yuv_data.get_format();
+        let yuv_key = match yuv_data {
+            YuvData::NV12(plane_0, plane_1) => [plane_0, plane_1, ImageKey::DUMMY],
+            YuvData::P010(plane_0, plane_1) => [plane_0, plane_1, ImageKey::DUMMY],
+            YuvData::PlanarYCbCr(plane_0, plane_1, plane_2) => [plane_0, plane_1, plane_2],
+            YuvData::InterleavedYCbCr(plane_0) => [plane_0, ImageKey::DUMMY, ImageKey::DUMMY],
+        };
+
+        self.add_nonshadowable_primitive(
+            spatial_node_index,
+            clip_node_id,
+            info,
+            Vec::new(),
+            YuvImage {
+                color_depth,
+                yuv_key,
+                format,
+                color_space,
+                color_range,
+                image_rendering,
+            },
+        );
+    }
+
+    fn add_primitive_instance_to_3d_root(
+        &mut self,
+        prim: ExtendedPrimitiveInstance,
+    ) {
+        // find the 3D root and append to the children list
+        for sc in self.sc_stack.iter_mut().rev() {
+            match sc.context_3d {
+                Picture3DContext::In { root_data: Some(ref mut prims), .. } => {
+                    prims.push(prim);
+                    break;
+                }
+                Picture3DContext::In { .. } => {}
+                Picture3DContext::Out => panic!("Unable to find 3D root"),
+            }
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn add_backdrop_filter(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        clip_node_id: ClipNodeId,
+        info: &LayoutPrimitiveInfo,
+        filters: Vec<Filter>,
+        filter_datas: Vec<FilterData>,
+        filter_primitives: Vec<FilterPrimitive>,
+    ) {
+        // We don't know the spatial node for a backdrop filter, as it's whatever is the
+        // backdrop root, but we can't know this if the root is a picture cache slice
+        // (which is the common case). It will get resolved later during `finalize_picture`.
+        let filter_spatial_node_index = SpatialNodeIndex::UNKNOWN;
+
+        self.make_current_slice_atomic_if_required();
+
+        // Ensure we create a clip-chain for the capture primitive that matches
+        // the render primitive, otherwise one might get culled while the other
+        // is considered visible.
+        let clip_leaf_id = self.clip_tree_builder.build_for_prim(
+            clip_node_id,
+            info,
+            &[],
+            &mut self.interners,
+        );
+
+        // Create the backdrop prim - this is a placeholder which sets the size of resolve
+        // picture that reads from the backdrop root
+        let backdrop_capture_instance = self.create_primitive(
+            info,
+            spatial_node_index,
+            clip_leaf_id,
+            BackdropCapture {
+            },
+        );
+
+        // Create a prim_list for this backdrop prim and add to a picture chain builder, which
+        // is needed for the call to `wrap_prim_with_filters` below
+        let mut prim_list = PrimitiveList::empty();
+        prim_list.add_prim(
+            backdrop_capture_instance,
+            info.rect,
+            spatial_node_index,
+            info.flags,
+            &mut self.prim_instances,
+            &self.clip_tree_builder,
+        );
+
+        let mut source = PictureChainBuilder::from_prim_list(
+            prim_list,
+            info.flags,
+            filter_spatial_node_index,
+            RasterSpace::Screen,
+            true,
+        );
+
+        // Wrap the backdrop primitive picture with the filters that were specified. This
+        // produces a picture chain with 1+ pictures with the filter composite modes set.
+        source = self.wrap_prim_with_filters(
+            source,
+            clip_node_id,
+            filters,
+            filter_primitives,
+            filter_datas,
+            Some(false),
+        );
+
+        // If all the filters were no-ops (e.g. opacity(0)) then we don't get a picture here
+        // and we can skip adding the backdrop-filter.
+        if source.has_picture() {
+            source = source.add_picture(
+                PictureCompositeMode::IntermediateSurface,
+                clip_node_id,
+                Picture3DContext::Out,
+                &mut self.interners,
+                &mut self.prim_store,
+                &mut self.prim_instances,
+                &mut self.clip_tree_builder,
+            );
+
+            let filtered_instance = source.finalize(
+                clip_node_id,
+                &mut self.interners,
+                &mut self.prim_store,
+                &mut self.clip_tree_builder,
+            );
+
+            // Extract the pic index for the intermediate surface. We need to
+            // supply this to the capture prim below.
+            let output_pic_index = match filtered_instance.kind {
+                PrimitiveInstanceKind::Picture { pic_index, .. } => pic_index,
+                _ => panic!("bug: not a picture"),
+            };
+
+            // Find which stacking context (or root tile cache) to add the
+            // backdrop-filter chain to
+            let sc_index = self.sc_stack.iter().rposition(|sc| {
+                !sc.flags.contains(StackingContextFlags::WRAPS_BACKDROP_FILTER)
+            });
+
+            match sc_index {
+                Some(sc_index) => {
+                    self.sc_stack[sc_index].prim_list.add_prim(
+                        filtered_instance,
+                        info.rect,
+                        filter_spatial_node_index,
+                        info.flags,
+                        &mut self.prim_instances,
+                        &self.clip_tree_builder,
+                    );
+                }
+                None => {
+                    self.tile_cache_builder.add_prim(
+                        filtered_instance,
+                        info.rect,
+                        filter_spatial_node_index,
+                        info.flags,
+                        self.spatial_tree,
+                        self.interners,
+                        &self.quality_settings,
+                        &mut self.prim_instances,
+                        &self.clip_tree_builder,
+                    );
+                }
+            }
+
+            // Add the prim that renders the result of the backdrop filter chain
+            let mut backdrop_render_instance = self.create_primitive(
+                info,
+                spatial_node_index,
+                clip_leaf_id,
+                BackdropRender {
+                },
+            );
+
+            // Set up the picture index for the backdrop-filter output in the prim
+            // that will draw it
+            match backdrop_render_instance.kind {
+                PrimitiveInstanceKind::BackdropRender { ref mut pic_index, .. } => {
+                    assert_eq!(*pic_index, PictureIndex::INVALID);
+                    *pic_index = output_pic_index;
+                }
+                _ => panic!("bug: unexpected prim kind"),
+            }
+
+            self.add_primitive_to_draw_list(
+                backdrop_render_instance,
+                info.rect,
+                spatial_node_index,
+                info.flags,
+            );
+        }
+    }
+
+    #[must_use]
+    fn wrap_prim_with_filters(
+        &mut self,
+        mut source: PictureChainBuilder,
+        clip_node_id: ClipNodeId,
+        mut filter_ops: Vec<Filter>,
+        mut filter_primitives: Vec<FilterPrimitive>,
+        filter_datas: Vec<FilterData>,
+        should_inflate_override: Option<bool>,
+    ) -> PictureChainBuilder {
+        // TODO(cbrewster): Currently CSS and SVG filters live side by side in WebRender, but unexpected results will
+        // happen if they are used simulataneously. Gecko only provides either filter ops or filter primitives.
+        // At some point, these two should be combined and CSS filters should be expressed in terms of SVG filters.
+        assert!(filter_ops.is_empty() || filter_primitives.is_empty(),
+            "Filter ops and filter primitives are not allowed on the same stacking context.");
+
+        // For each filter, create a new image with that composite mode.
+        let mut current_filter_data_index = 0;
+        for filter in &mut filter_ops {
+            let composite_mode = match filter {
+                Filter::ComponentTransfer => {
+                    let filter_data =
+                        &filter_datas[current_filter_data_index];
+                    let filter_data = filter_data.sanitize();
+                    current_filter_data_index = current_filter_data_index + 1;
+                    if filter_data.is_identity() {
+                        continue
+                    } else {
+                        let filter_data_key = SFilterDataKey {
+                            data:
+                                SFilterData {
+                                    r_func: SFilterDataComponent::from_functype_values(
+                                        filter_data.func_r_type, &filter_data.r_values),
+                                    g_func: SFilterDataComponent::from_functype_values(
+                                        filter_data.func_g_type, &filter_data.g_values),
+                                    b_func: SFilterDataComponent::from_functype_values(
+                                        filter_data.func_b_type, &filter_data.b_values),
+                                    a_func: SFilterDataComponent::from_functype_values(
+                                        filter_data.func_a_type, &filter_data.a_values),
+                                },
+                        };
+
+                        let handle = self.interners
+                            .filter_data
+                            .intern(&filter_data_key, || ());
+                        PictureCompositeMode::ComponentTransferFilter(handle)
+                    }
+                }
+                _ => {
+                    if filter.is_noop() {
+                        continue;
+                    } else {
+                        let mut filter = filter.clone();
+
+                        // backdrop-filter spec says that blurs should assume edgeMode=Duplicate
+                        // We can do this by not inflating the bounds, which means the blur
+                        // shader will duplicate pixels outside the sample rect
+                        if let Some(should_inflate_override) = should_inflate_override {
+                            if let Filter::Blur { ref mut should_inflate, .. } = filter {
+                                *should_inflate = should_inflate_override;
+                            }
+                        }
+
+                        PictureCompositeMode::Filter(filter)
+                    }
+                }
+            };
+
+            source = source.add_picture(
+                composite_mode,
+                clip_node_id,
+                Picture3DContext::Out,
+                &mut self.interners,
+                &mut self.prim_store,
+                &mut self.prim_instances,
+                &mut self.clip_tree_builder,
+            );
+        }
+
+        if !filter_primitives.is_empty() {
+            let filter_datas = filter_datas.iter()
+                .map(|filter_data| filter_data.sanitize())
+                .map(|filter_data| {
+                    SFilterData {
+                        r_func: SFilterDataComponent::from_functype_values(
+                            filter_data.func_r_type, &filter_data.r_values),
+                        g_func: SFilterDataComponent::from_functype_values(
+                            filter_data.func_g_type, &filter_data.g_values),
+                        b_func: SFilterDataComponent::from_functype_values(
+                            filter_data.func_b_type, &filter_data.b_values),
+                        a_func: SFilterDataComponent::from_functype_values(
+                            filter_data.func_a_type, &filter_data.a_values),
+                    }
+                })
+                .collect();
+
+            // Sanitize filter inputs
+            for primitive in &mut filter_primitives {
+                primitive.sanitize();
+            }
+
+            let composite_mode = PictureCompositeMode::SvgFilter(
+                filter_primitives,
+                filter_datas,
+            );
+
+            source = source.add_picture(
+                composite_mode,
+                clip_node_id,
+                Picture3DContext::Out,
+                &mut self.interners,
+                &mut self.prim_store,
+                &mut self.prim_instances,
+                &mut self.clip_tree_builder,
+            );
+        }
+
+        source
+    }
+}
+
+
+pub trait CreateShadow {
+    fn create_shadow(
+        &self,
+        shadow: &Shadow,
+        blur_is_noop: bool,
+        current_raster_space: RasterSpace,
+    ) -> Self;
+}
+
+pub trait IsVisible {
+    fn is_visible(&self) -> bool;
+}
+
+/// A primitive instance + some extra information about the primitive. This is
+/// stored when constructing 3d rendering contexts, which involve cutting
+/// primitive lists.
+struct ExtendedPrimitiveInstance {
+    instance: PrimitiveInstance,
+    spatial_node_index: SpatialNodeIndex,
+    flags: PrimitiveFlags,
+}
+
+/// Internal tracking information about the currently pushed stacking context.
+/// Used to track what operations need to happen when a stacking context is popped.
+struct StackingContextInfo {
+    /// If true, pop and entry from the containing block stack.
+    pop_containing_block: bool,
+    /// If true, pop an entry from the flattened stacking context stack.
+    pop_stacking_context: bool,
+    /// If true, set a tile cache barrier when popping the stacking context.
+    set_tile_cache_barrier: bool,
+}
+
+/// Properties of a stacking context that are maintained
+/// during creation of the scene. These structures are
+/// not persisted after the initial scene build.
+struct FlattenedStackingContext {
+    /// The list of primitive instances added to this stacking context.
+    prim_list: PrimitiveList,
+
+    /// Primitive instance flags for compositing this stacking context
+    prim_flags: PrimitiveFlags,
+
+    /// The positioning node for this stacking context
+    spatial_node_index: SpatialNodeIndex,
+
+    /// The clip chain for this stacking context
+    clip_node_id: ClipNodeId,
+
+    /// The list of filters / mix-blend-mode for this
+    /// stacking context.
+    composite_ops: CompositeOps,
+
+    /// Bitfield of reasons this stacking context needs to
+    /// be an offscreen surface.
+    blit_reason: BlitReason,
+
+    /// CSS transform-style property.
+    transform_style: TransformStyle,
+
+    /// Defines the relationship to a preserve-3D hiearachy.
+    context_3d: Picture3DContext<ExtendedPrimitiveInstance>,
+
+    /// Flags identifying the type of container (among other things) this stacking context is
+    flags: StackingContextFlags,
+
+    /// Requested raster space for this stacking context
+    raster_space: RasterSpace,
+}
+
+impl FlattenedStackingContext {
+    /// Return true if the stacking context has a valid preserve-3d property
+    pub fn is_3d(&self) -> bool {
+        self.transform_style == TransformStyle::Preserve3D && self.composite_ops.is_empty()
+    }
+
+    /// Return true if the stacking context isn't needed.
+    pub fn is_redundant(
+        context_3d: &Picture3DContext<ExtendedPrimitiveInstance>,
+        composite_ops: &CompositeOps,
+        blit_reason: BlitReason,
+        parent: Option<&FlattenedStackingContext>,
+        prim_flags: PrimitiveFlags,
+    ) -> bool {
+        // Any 3d context is required
+        if let Picture3DContext::In { .. } = context_3d {
+            return false;
+        }
+
+        // If any filters are present that affect the output
+        if composite_ops.has_valid_filters() {
+            return false;
+        }
+
+        // If a mix-blend is active, we'll need to apply it in most cases
+        if composite_ops.mix_blend_mode.is_some() {
+            match parent {
+                Some(ref parent) => {
+                    // However, if the parent stacking context is empty, then the mix-blend
+                    // is a no-op, and we can skip it
+                    if !parent.prim_list.is_empty() {
+                        return false;
+                    }
+                }
+                None => {
+                    // TODO(gw): For now, we apply mix-blend ops that may be no-ops on a root
+                    //           level picture cache slice. We could apply a similar optimization
+                    //           to above with a few extra checks here, but it's probably quite rare.
+                    return false;
+                }
+            }
+        }
+
+        // If need to isolate in surface due to clipping / mix-blend-mode
+        if !blit_reason.is_empty() {
+            return false;
+        }
+
+        // If backface visibility is explicitly set.
+        if !prim_flags.contains(PrimitiveFlags::IS_BACKFACE_VISIBLE) {
+            return false;
+        }
+
+        // It is redundant!
+        true
+    }
+
+    /// Cut the sequence of the immediate children recorded so far and generate a picture from them.
+    pub fn cut_item_sequence(
+        &mut self,
+        prim_store: &mut PrimitiveStore,
+        interners: &mut Interners,
+        composite_mode: Option<PictureCompositeMode>,
+        flat_items_context_3d: Picture3DContext<OrderedPictureChild>,
+        clip_tree_builder: &mut ClipTreeBuilder,
+    ) -> Option<(PictureIndex, PrimitiveInstance)> {
+        if self.prim_list.is_empty() {
+            return None
+        }
+
+        let pic_index = PictureIndex(prim_store.pictures
+            .alloc()
+            .init(PicturePrimitive::new_image(
+                composite_mode.clone(),
+                flat_items_context_3d,
+                self.prim_flags,
+                mem::replace(&mut self.prim_list, PrimitiveList::empty()),
+                self.spatial_node_index,
+                self.raster_space,
+                PictureFlags::empty(),
+            ))
+        );
+
+        let prim_instance = create_prim_instance(
+            pic_index,
+            composite_mode.into(),
+            self.raster_space,
+            self.clip_node_id,
+            interners,
+            clip_tree_builder,
+        );
+
+        Some((pic_index, prim_instance))
+    }
+}
+
+/// A primitive that is added while a shadow context is
+/// active is stored as a pending primitive and only
+/// added to pictures during pop_all_shadows.
+pub struct PendingPrimitive<T> {
+    spatial_node_index: SpatialNodeIndex,
+    clip_node_id: ClipNodeId,
+    info: LayoutPrimitiveInfo,
+    prim: T,
+}
+
+/// As shadows are pushed, they are stored as pending
+/// shadows, and handled at once during pop_all_shadows.
+pub struct PendingShadow {
+    shadow: Shadow,
+    should_inflate: bool,
+    spatial_node_index: SpatialNodeIndex,
+}
+
+pub enum ShadowItem {
+    Shadow(PendingShadow),
+    Image(PendingPrimitive<Image>),
+    LineDecoration(PendingPrimitive<LineDecoration>),
+    NormalBorder(PendingPrimitive<NormalBorderPrim>),
+    Primitive(PendingPrimitive<PrimitiveKeyKind>),
+    TextRun(PendingPrimitive<TextRun>),
+}
+
+impl From<PendingPrimitive<Image>> for ShadowItem {
+    fn from(image: PendingPrimitive<Image>) -> Self {
+        ShadowItem::Image(image)
+    }
+}
+
+impl From<PendingPrimitive<LineDecoration>> for ShadowItem {
+    fn from(line_dec: PendingPrimitive<LineDecoration>) -> Self {
+        ShadowItem::LineDecoration(line_dec)
+    }
+}
+
+impl From<PendingPrimitive<NormalBorderPrim>> for ShadowItem {
+    fn from(border: PendingPrimitive<NormalBorderPrim>) -> Self {
+        ShadowItem::NormalBorder(border)
+    }
+}
+
+impl From<PendingPrimitive<PrimitiveKeyKind>> for ShadowItem {
+    fn from(container: PendingPrimitive<PrimitiveKeyKind>) -> Self {
+        ShadowItem::Primitive(container)
+    }
+}
+
+impl From<PendingPrimitive<TextRun>> for ShadowItem {
+    fn from(text_run: PendingPrimitive<TextRun>) -> Self {
+        ShadowItem::TextRun(text_run)
+    }
+}
+
+fn create_prim_instance(
+    pic_index: PictureIndex,
+    composite_mode_key: PictureCompositeKey,
+    raster_space: RasterSpace,
+    clip_node_id: ClipNodeId,
+    interners: &mut Interners,
+    clip_tree_builder: &mut ClipTreeBuilder,
+) -> PrimitiveInstance {
+    let pic_key = PictureKey::new(
+        Picture {
+            composite_mode_key,
+            raster_space,
+        },
+    );
+
+    let data_handle = interners
+        .picture
+        .intern(&pic_key, || ());
+
+    PrimitiveInstance::new(
+        PrimitiveInstanceKind::Picture {
+            data_handle,
+            pic_index,
+            segment_instance_index: SegmentInstanceIndex::INVALID,
+        },
+        clip_tree_builder.build_for_picture(
+            clip_node_id,
+        ),
+    )
+}
+
+fn filter_ops_for_compositing(
+    input_filters: ItemRange<FilterOp>,
+) -> Vec<Filter> {
+    // TODO(gw): Now that we resolve these later on,
+    //           we could probably make it a bit
+    //           more efficient than cloning these here.
+    input_filters.iter().map(|filter| filter.into()).collect()
+}
+
+fn filter_datas_for_compositing(
+    input_filter_datas: &[TempFilterData],
+) -> Vec<FilterData> {
+    // TODO(gw): Now that we resolve these later on,
+    //           we could probably make it a bit
+    //           more efficient than cloning these here.
+    let mut filter_datas = vec![];
+    for temp_filter_data in input_filter_datas {
+        let func_types : Vec<ComponentTransferFuncType> = temp_filter_data.func_types.iter().collect();
+        debug_assert!(func_types.len() == 4);
+        filter_datas.push( FilterData {
+            func_r_type: func_types[0],
+            r_values: temp_filter_data.r_values.iter().collect(),
+            func_g_type: func_types[1],
+            g_values: temp_filter_data.g_values.iter().collect(),
+            func_b_type: func_types[2],
+            b_values: temp_filter_data.b_values.iter().collect(),
+            func_a_type: func_types[3],
+            a_values: temp_filter_data.a_values.iter().collect(),
+        });
+    }
+    filter_datas
+}
+
+fn filter_primitives_for_compositing(
+    input_filter_primitives: ItemRange<FilterPrimitive>,
+) -> Vec<FilterPrimitive> {
+    // Resolve these in the flattener?
+    // TODO(gw): Now that we resolve these later on,
+    //           we could probably make it a bit
+    //           more efficient than cloning these here.
+    input_filter_primitives.iter().map(|primitive| primitive).collect()
+}
+
+fn process_repeat_size(
+    snapped_rect: &LayoutRect,
+    unsnapped_rect: &LayoutRect,
+    repeat_size: LayoutSize,
+) -> LayoutSize {
+    // FIXME(aosmond): The tile size is calculated based on several parameters
+    // during display list building. It may produce a slightly different result
+    // than the bounds due to floating point error accumulation, even though in
+    // theory they should be the same. We do a fuzzy check here to paper over
+    // that. It may make more sense to push the original parameters into scene
+    // building and let it do a saner calculation with more information (e.g.
+    // the snapped values).
+    const EPSILON: f32 = 0.001;
+    LayoutSize::new(
+        if repeat_size.width.approx_eq_eps(&unsnapped_rect.width(), &EPSILON) {
+            snapped_rect.width()
+        } else {
+            repeat_size.width
+        },
+        if repeat_size.height.approx_eq_eps(&unsnapped_rect.height(), &EPSILON) {
+            snapped_rect.height()
+        } else {
+            repeat_size.height
+        },
+    )
+}
+
+fn read_gradient_stops(stops: ItemRange<GradientStop>) -> Vec<GradientStopKey> {
+    stops.iter().map(|stop| {
+        GradientStopKey {
+            offset: stop.offset,
+            color: stop.color.into(),
+        }
+    }).collect()
+}
diff --git a/gfx/wr/webrender/src/screen_capture.rs b/gfx/wr/webrender/src/screen_capture.rs
new file mode 100644
index 0000000000..3cc500f3fa
--- /dev/null
+++ b/gfx/wr/webrender/src/screen_capture.rs
@@ -0,0 +1,495 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Screen capture infrastructure for the Gecko Profiler and Composition Recorder.
+
+use std::collections::HashMap;
+
+use api::{ImageFormat, ImageBufferKind};
+use api::units::*;
+use gleam::gl::GlType;
+
+use crate::device::{Device, PBO, DrawTarget, ReadTarget, Texture, TextureFilter};
+use crate::internal_types::RenderTargetInfo;
+use crate::renderer::Renderer;
+use crate::util::round_up_to_multiple;
+
+/// A handle to a screenshot that is being asynchronously captured and scaled.
+#[repr(C)]
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub struct AsyncScreenshotHandle(usize);
+
+/// A handle to a recorded frame that was captured.
+#[repr(C)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub struct RecordedFrameHandle(usize);
+
+/// An asynchronously captured screenshot bound to a PBO which has not yet been mapped for copying.
+struct AsyncScreenshot {
+    /// The PBO that will contain the screenshot data.
+    pbo: PBO,
+    /// The size of the screenshot.
+    screenshot_size: DeviceIntSize,
+    /// The stride of the data in the PBO.
+    buffer_stride: usize,
+    /// Thge image format of the screenshot.
+    image_format: ImageFormat,
+}
+
+/// How the `AsyncScreenshotGrabber` captures frames.
+#[derive(Debug, Eq, PartialEq)]
+enum AsyncScreenshotGrabberMode {
+    /// Capture screenshots for the Gecko profiler.
+    ///
+    /// This mode will asynchronously scale the screenshots captured.
+    ProfilerScreenshots,
+
+    /// Capture screenshots for the CompositionRecorder.
+    ///
+    /// This mode does not scale the captured screenshots.
+    CompositionRecorder,
+}
+
+/// Renderer infrastructure for capturing screenshots and scaling them asynchronously.
+pub(in crate) struct AsyncScreenshotGrabber {
+    /// The textures used to scale screenshots.
+    scaling_textures: Vec<Texture>,
+    /// PBOs available to be used for screenshot readback.
+    available_pbos: Vec<PBO>,
+    /// PBOs containing screenshots that are awaiting readback.
+    awaiting_readback: HashMap<AsyncScreenshotHandle, AsyncScreenshot>,
+    /// The handle for the net PBO that will be inserted into `in_use_pbos`.
+    next_pbo_handle: usize,
+    /// The mode the grabber operates in.
+    mode: AsyncScreenshotGrabberMode,
+}
+
+impl Default for AsyncScreenshotGrabber {
+    fn default() -> Self {
+        AsyncScreenshotGrabber {
+            scaling_textures: Vec::new(),
+            available_pbos: Vec::new(),
+            awaiting_readback: HashMap::new(),
+            next_pbo_handle: 1,
+            mode: AsyncScreenshotGrabberMode::ProfilerScreenshots,
+        }
+    }
+}
+
+impl AsyncScreenshotGrabber {
+    /// Create a new AsyncScreenshotGrabber for the composition recorder.
+    pub fn new_composition_recorder() -> Self {
+        let mut recorder = Self::default();
+        recorder.mode = AsyncScreenshotGrabberMode::CompositionRecorder;
+
+        recorder
+    }
+
+    /// Deinitialize the allocated textures and PBOs.
+    pub fn deinit(self, device: &mut Device) {
+        for texture in self.scaling_textures {
+            device.delete_texture(texture);
+        }
+
+        for pbo in self.available_pbos {
+            device.delete_pbo(pbo);
+        }
+
+        for (_, async_screenshot) in self.awaiting_readback {
+            device.delete_pbo(async_screenshot.pbo);
+        }
+    }
+
+    /// Take a screenshot and scale it asynchronously.
+    ///
+    /// The returned handle can be used to access the mapped screenshot data via
+    /// `map_and_recycle_screenshot`.
+    /// The returned size is the size of the screenshot.
+    pub fn get_screenshot(
+        &mut self,
+        device: &mut Device,
+        window_rect: DeviceIntRect,
+        buffer_size: DeviceIntSize,
+        image_format: ImageFormat,
+    ) -> (AsyncScreenshotHandle, DeviceIntSize) {
+        let screenshot_size = match self.mode {
+            AsyncScreenshotGrabberMode::ProfilerScreenshots => {
+                assert_ne!(window_rect.width(), 0);
+                assert_ne!(window_rect.height(), 0);
+
+                let scale = (buffer_size.width as f32 / window_rect.width() as f32)
+                    .min(buffer_size.height as f32 / window_rect.height() as f32);
+
+                (window_rect.size().to_f32() * scale).round().to_i32()
+            }
+
+            AsyncScreenshotGrabberMode::CompositionRecorder => {
+                assert_eq!(buffer_size, window_rect.size());
+                buffer_size
+            }
+        };
+
+        assert!(screenshot_size.width <= buffer_size.width);
+        assert!(screenshot_size.height <= buffer_size.height);
+
+        // To ensure that we hit the fast path when reading from a
+        // framebuffer we must ensure that the width of the area we read
+        // is a multiple of the device's optimal pixel-transfer stride.
+        // The read_size should therefore be the screenshot_size with the width
+        // increased to a suitable value. We will also pass this value to
+        // scale_screenshot() as the min_texture_size, to ensure the texture is
+        // large enough to read from. In CompositionRecorder mode we read
+        // directly from the default framebuffer so are unable choose this size.
+        let read_size = match self.mode {
+            AsyncScreenshotGrabberMode::ProfilerScreenshots => {
+                let stride = (screenshot_size.width * image_format.bytes_per_pixel()) as usize;
+                let rounded = round_up_to_multiple(stride, device.required_pbo_stride().num_bytes(image_format));
+                let optimal_width = rounded as i32 / image_format.bytes_per_pixel();
+
+                DeviceIntSize::new(
+                    optimal_width,
+                    screenshot_size.height,
+                )
+            }
+            AsyncScreenshotGrabberMode::CompositionRecorder => buffer_size,
+        };
+        let required_size = read_size.area() as usize * image_format.bytes_per_pixel() as usize;
+
+        // Find an available PBO with the required size, creating a new one if necessary.
+        let pbo = {
+            let mut reusable_pbo = None;
+            while let Some(pbo) = self.available_pbos.pop() {
+                if pbo.get_reserved_size() != required_size {
+                    device.delete_pbo(pbo);
+                } else {
+                    reusable_pbo = Some(pbo);
+                    break;
+                }
+            };
+
+            reusable_pbo.unwrap_or_else(|| device.create_pbo_with_size(required_size))
+        };
+        assert_eq!(pbo.get_reserved_size(), required_size);
+
+        let read_target = match self.mode {
+            AsyncScreenshotGrabberMode::ProfilerScreenshots => {
+                self.scale_screenshot(
+                    device,
+                    ReadTarget::Default,
+                    window_rect,
+                    buffer_size,
+                    read_size,
+                    screenshot_size,
+                    image_format,
+                    0,
+                );
+
+                ReadTarget::from_texture(&self.scaling_textures[0])
+            }
+
+            AsyncScreenshotGrabberMode::CompositionRecorder => ReadTarget::Default,
+        };
+
+        device.read_pixels_into_pbo(
+            read_target,
+            DeviceIntRect::from_size(read_size),
+            image_format,
+            &pbo,
+        );
+
+        let handle = AsyncScreenshotHandle(self.next_pbo_handle);
+        self.next_pbo_handle += 1;
+
+        self.awaiting_readback.insert(
+            handle,
+            AsyncScreenshot {
+                pbo,
+                screenshot_size,
+                buffer_stride: (read_size.width * image_format.bytes_per_pixel()) as usize,
+                image_format,
+            },
+        );
+
+        (handle, screenshot_size)
+    }
+
+    /// Take the screenshot in the given `ReadTarget` and scale it to `dest_size` recursively.
+    ///
+    /// Each scaling operation scales only by a factor of two to preserve quality.
+    ///
+    /// Textures are scaled such that `scaling_textures[n]` is half the size of
+    /// `scaling_textures[n+1]`.
+    ///
+    /// After the scaling completes, the final screenshot will be in
+    /// `scaling_textures[0]`.
+    ///
+    /// The size of `scaling_textures[0]` will be increased to `min_texture_size`
+    /// so that an optimally-sized area can be read from it.
+    fn scale_screenshot(
+        &mut self,
+        device: &mut Device,
+        read_target: ReadTarget,
+        read_target_rect: DeviceIntRect,
+        buffer_size: DeviceIntSize,
+        min_texture_size: DeviceIntSize,
+        dest_size: DeviceIntSize,
+        image_format: ImageFormat,
+        level: usize,
+    ) {
+        assert_eq!(self.mode, AsyncScreenshotGrabberMode::ProfilerScreenshots);
+
+        let texture_size = {
+            let size = buffer_size * (1 << level);
+            DeviceIntSize::new(
+                size.width.max(min_texture_size.width),
+                size.height.max(min_texture_size.height),
+            )
+        };
+
+        // If we haven't created a texture for this level, or the existing
+        // texture is the wrong size, then create a new one.
+        if level == self.scaling_textures.len() || self.scaling_textures[level].get_dimensions() != texture_size {
+            let texture = device.create_texture(
+                ImageBufferKind::Texture2D,
+                image_format,
+                texture_size.width,
+                texture_size.height,
+                TextureFilter::Linear,
+                Some(RenderTargetInfo { has_depth: false }),
+            );
+            if level == self.scaling_textures.len() {
+                self.scaling_textures.push(texture);
+            } else {
+                let old_texture = std::mem::replace(&mut self.scaling_textures[level], texture);
+                device.delete_texture(old_texture);
+            }
+        }
+        assert_eq!(self.scaling_textures[level].get_dimensions(), texture_size);
+
+        let (read_target, read_target_rect) = if read_target_rect.width() > 2 * dest_size.width {
+            self.scale_screenshot(
+                device,
+                read_target,
+                read_target_rect,
+                buffer_size,
+                min_texture_size,
+                dest_size * 2,
+                image_format,
+                level + 1,
+            );
+
+            (
+                ReadTarget::from_texture(&self.scaling_textures[level + 1]),
+                DeviceIntRect::from_size(dest_size * 2),
+            )
+        } else {
+            (read_target, read_target_rect)
+        };
+
+        let draw_target = DrawTarget::from_texture(&self.scaling_textures[level], false);
+
+        let draw_target_rect = draw_target
+            .to_framebuffer_rect(DeviceIntRect::from_size(dest_size));
+
+        let read_target_rect = device_rect_as_framebuffer_rect(&read_target_rect);
+
+        if level == 0 && !device.surface_origin_is_top_left() {
+            device.blit_render_target_invert_y(
+                read_target,
+                read_target_rect,
+                draw_target,
+                draw_target_rect,
+            );
+        } else {
+            device.blit_render_target(
+                read_target,
+                read_target_rect,
+                draw_target,
+                draw_target_rect,
+                TextureFilter::Linear,
+            );
+        }
+    }
+
+    /// Map the contents of the screenshot given by the handle and copy it into
+    /// the given buffer.
+    pub fn map_and_recycle_screenshot(
+        &mut self,
+        device: &mut Device,
+        handle: AsyncScreenshotHandle,
+        dst_buffer: &mut [u8],
+        dst_stride: usize,
+    ) -> bool {
+        let AsyncScreenshot {
+            pbo,
+            screenshot_size,
+            buffer_stride,
+            image_format,
+        } = match self.awaiting_readback.remove(&handle) {
+            Some(screenshot) => screenshot,
+            None => return false,
+        };
+
+        let gl_type = device.gl().get_type();
+
+        let success = if let Some(bound_pbo) = device.map_pbo_for_readback(&pbo) {
+            let src_buffer = &bound_pbo.data;
+            let src_stride = buffer_stride;
+            let src_width =
+                screenshot_size.width as usize * image_format.bytes_per_pixel() as usize;
+
+            for (src_slice, dst_slice) in self
+                .iter_src_buffer_chunked(gl_type, src_buffer, src_stride)
+                .zip(dst_buffer.chunks_mut(dst_stride))
+                .take(screenshot_size.height as usize)
+            {
+                dst_slice[.. src_width].copy_from_slice(&src_slice[.. src_width]);
+            }
+
+            true
+        } else {
+            false
+        };
+
+        match self.mode {
+            AsyncScreenshotGrabberMode::ProfilerScreenshots => self.available_pbos.push(pbo),
+            AsyncScreenshotGrabberMode::CompositionRecorder => device.delete_pbo(pbo),
+        }
+
+        success
+    }
+
+    fn iter_src_buffer_chunked<'a>(
+        &self,
+        gl_type: GlType,
+        src_buffer: &'a [u8],
+        src_stride: usize,
+    ) -> Box<dyn Iterator<Item = &'a [u8]> + 'a> {
+        use AsyncScreenshotGrabberMode::*;
+
+        let is_angle = cfg!(windows) && gl_type == GlType::Gles;
+
+        if self.mode == CompositionRecorder && !is_angle {
+            // This is a non-ANGLE configuration. in this case, the recorded frames were captured
+            // upside down, so we have to flip them right side up.
+            Box::new(src_buffer.chunks(src_stride).rev())
+        } else {
+            // This is either an ANGLE configuration in the `CompositionRecorder` mode or a
+            // non-ANGLE configuration in the `ProfilerScreenshots` mode. In either case, the
+            // captured frames are right-side up.
+            Box::new(src_buffer.chunks(src_stride))
+        }
+    }
+}
+
+// Screen-capture specific Renderer impls.
+impl Renderer {
+    /// Record a frame for the Composition Recorder.
+    ///
+    /// The returned handle can be passed to `map_recorded_frame` to copy it into
+    /// a buffer.
+    /// The returned size is the size of the frame.
+    pub fn record_frame(
+        &mut self,
+        image_format: ImageFormat,
+    ) -> Option<(RecordedFrameHandle, DeviceIntSize)> {
+        let device_size = self.device_size()?;
+        self.device.begin_frame();
+
+        let (handle, _) = self
+            .async_frame_recorder
+            .get_or_insert_with(AsyncScreenshotGrabber::new_composition_recorder)
+            .get_screenshot(
+                &mut self.device,
+                DeviceIntRect::from_size(device_size),
+                device_size,
+                image_format,
+            );
+
+        self.device.end_frame();
+
+        Some((RecordedFrameHandle(handle.0), device_size))
+    }
+
+    /// Map a frame captured for the composition recorder into the given buffer.
+    pub fn map_recorded_frame(
+        &mut self,
+        handle: RecordedFrameHandle,
+        dst_buffer: &mut [u8],
+        dst_stride: usize,
+    ) -> bool {
+        if let Some(async_frame_recorder) = self.async_frame_recorder.as_mut() {
+            async_frame_recorder.map_and_recycle_screenshot(
+                &mut self.device,
+                AsyncScreenshotHandle(handle.0),
+                dst_buffer,
+                dst_stride,
+            )
+        } else {
+            false
+        }
+    }
+
+    /// Free the data structures used by the composition recorder.
+    pub fn release_composition_recorder_structures(&mut self) {
+        if let Some(async_frame_recorder) = self.async_frame_recorder.take() {
+            self.device.begin_frame();
+            async_frame_recorder.deinit(&mut self.device);
+            self.device.end_frame();
+        }
+    }
+
+    /// Take a screenshot and scale it asynchronously.
+    ///
+    /// The returned handle can be used to access the mapped screenshot data via
+    /// `map_and_recycle_screenshot`.
+    ///
+    /// The returned size is the size of the screenshot.
+    pub fn get_screenshot_async(
+        &mut self,
+        window_rect: DeviceIntRect,
+        buffer_size: DeviceIntSize,
+        image_format: ImageFormat,
+    ) -> (AsyncScreenshotHandle, DeviceIntSize) {
+        self.device.begin_frame();
+
+        let handle = self
+            .async_screenshots
+            .get_or_insert_with(AsyncScreenshotGrabber::default)
+            .get_screenshot(&mut self.device, window_rect, buffer_size, image_format);
+
+        self.device.end_frame();
+
+        handle
+    }
+
+    /// Map the contents of the screenshot given by the handle and copy it into
+    /// the given buffer.
+    pub fn map_and_recycle_screenshot(
+        &mut self,
+        handle: AsyncScreenshotHandle,
+        dst_buffer: &mut [u8],
+        dst_stride: usize,
+    ) -> bool {
+        if let Some(async_screenshots) = self.async_screenshots.as_mut() {
+            async_screenshots.map_and_recycle_screenshot(
+                &mut self.device,
+                handle,
+                dst_buffer,
+                dst_stride,
+            )
+        } else {
+            false
+        }
+    }
+
+    /// Release the screenshot grabbing structures that the profiler was using.
+    pub fn release_profiler_structures(&mut self) {
+        if let Some(async_screenshots) = self.async_screenshots.take() {
+            self.device.begin_frame();
+            async_screenshots.deinit(&mut self.device);
+            self.device.end_frame();
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/segment.rs b/gfx/wr/webrender/src/segment.rs
new file mode 100644
index 0000000000..55e76f33f2
--- /dev/null
+++ b/gfx/wr/webrender/src/segment.rs
@@ -0,0 +1,1374 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//!  Primitive segmentation
+//!
+//! # Overview
+//!
+//! Segmenting is the process of breaking rectangular primitives into smaller rectangular
+//! primitives in order to extract parts that could benefit from a fast paths.
+//!
+//! Typically this is used to allow fully opaque segments to be rendered in the opaque
+//! pass. For example when an opaque rectangle has a non-axis-aligned transform applied,
+//! we usually have to apply some anti-aliasing around the edges which requires alpha
+//! blending. By segmenting the edges out of the center of the primitive, we can keep a
+//! large amount of pixels in the opaque pass.
+//! Segmenting also lets us avoids rasterizing parts of clip masks that we know to have
+//! no effect or to be fully masking. For example by segmenting the corners of a rounded
+//! rectangle clip, we can optimize both rendering the mask and the primitive by only
+//! rasterize the corners in the mask and not applying any clipping to the segments of
+//! the primitive that don't overlap the borders.
+//!
+//! It is a flexible system in the sense that different sources of segmentation (for
+//! example two rounded rectangle clips) can affect the segmentation, and the possibility
+//! to segment some effects such as specific clip kinds does not necessarily mean the
+//! primitive will actually be segmented.
+//!
+//! ## Segments and clipping
+//!
+//! Segments of a primitive can be either not clipped, fully clipped, or partially clipped.
+//! In the first two case we don't need a clip mask. For each partially masked segments, a
+//! mask is rasterized using a render task. All of the interesting steps happen during frame
+//! building.
+//!
+//! - The first step is to determine the segmentation and write the associated GPU data.
+//!   See `PrimitiveInstance::build_segments_if_needed` and `write_brush_segment_description`
+//!   in `prim_store/mod.rs` which uses the segment builder of this module.
+//! - The second step is to generate the mask render tasks.
+//!   See `BrushSegment::update_clip_task` and `RenderTask::new_mask`. For each segment that
+//!   needs a mask, the contribution of all clips that affect the segment is added to the
+//!   mask's render task.
+//! - Segments are assigned to batches (See `batch.rs`). Segments of a given primitive can
+//!   be assigned to different batches.
+//!
+//! See also the [`clip` module documentation][clip.rs] for details about how clipping
+//! information is represented.
+//!
+//!
+//! [clip.rs]: ../clip/index.html
+//!
+
+use api::{BorderRadius, ClipMode};
+use api::units::*;
+use std::{cmp, usize};
+use crate::util::{extract_inner_rect_safe};
+use smallvec::SmallVec;
+
+// We don't want to generate too many segments in edge cases, as it will result in a lot of
+// clip mask overhead, and possibly exceeding the maximum row size of the GPU cache.
+const MAX_SEGMENTS: usize = 64;
+
+bitflags! {
+    // Note: This can use up to 4 bits due to how it will be packed in
+    // the instance data.
+
+    /// Each bit of the edge AA mask is:
+    /// 0, when the edge of the primitive needs to be considered for AA
+    /// 1, when the edge of the segment needs to be considered for AA
+    ///
+    /// *Note*: the bit values have to match the shader logic in
+    /// `write_transform_vertex()` function.
+    #[cfg_attr(feature = "capture", derive(Serialize))]
+    #[cfg_attr(feature = "replay", derive(Deserialize))]
+    #[derive(MallocSizeOf)]
+    pub struct EdgeAaSegmentMask: u8 {
+        ///
+        const LEFT = 0x1;
+        ///
+        const TOP = 0x2;
+        ///
+        const RIGHT = 0x4;
+        ///
+        const BOTTOM = 0x8;
+    }
+}
+
+bitflags! {
+    pub struct ItemFlags: u8 {
+        const X_ACTIVE = 0x1;
+        const Y_ACTIVE = 0x2;
+        const HAS_MASK = 0x4;
+    }
+}
+
+// The segment builder outputs a list of these segments.
+#[derive(Debug, PartialEq)]
+pub struct Segment {
+    pub rect: LayoutRect,
+    pub has_mask: bool,
+    pub edge_flags: EdgeAaSegmentMask,
+    pub region_x: usize,
+    pub region_y: usize,
+}
+
+// The segment builder creates a list of x/y axis events
+// that are used to build a segment list. Right now, we
+// don't bother providing a list of *which* clip regions
+// are active for a given segment. Instead, if there is
+// any clip mask present in a segment, we will just end
+// up drawing each of the masks to that segment clip.
+// This is a fairly rare case, but we can detect this
+// in the future and only apply clip masks that are
+// relevant to each segment region.
+// TODO(gw): Provide clip region info with each segment.
+#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd)]
+enum EventKind {
+    // Beginning of a clip (rounded) rect.
+    BeginClip,
+    // End of a clip (rounded) rect.
+    EndClip,
+    // Begin the next region in the primitive.
+    BeginRegion,
+}
+
+// Events must be ordered such that when the coordinates
+// of two events are the same, the end events are processed
+// before the begin events. This ensures that we're able
+// to detect which regions are active for a given segment.
+impl Ord for EventKind {
+    fn cmp(&self, other: &EventKind) -> cmp::Ordering {
+        match (*self, *other) {
+            (EventKind::BeginRegion, EventKind::BeginRegion) => {
+                panic!("bug: regions must be non-overlapping")
+            }
+            (EventKind::EndClip, EventKind::BeginRegion) |
+            (EventKind::BeginRegion, EventKind::BeginClip) => {
+                cmp::Ordering::Less
+            }
+            (EventKind::BeginClip, EventKind::BeginRegion) |
+            (EventKind::BeginRegion, EventKind::EndClip) => {
+                cmp::Ordering::Greater
+            }
+            (EventKind::BeginClip, EventKind::BeginClip) |
+            (EventKind::EndClip, EventKind::EndClip) => {
+                cmp::Ordering::Equal
+            }
+            (EventKind::BeginClip, EventKind::EndClip) => {
+                cmp::Ordering::Greater
+            }
+            (EventKind::EndClip, EventKind::BeginClip) => {
+                cmp::Ordering::Less
+            }
+        }
+    }
+}
+
+// A x/y event where we will create a vertex in the
+// segment builder.
+#[derive(Debug, Eq, PartialEq, PartialOrd)]
+struct Event {
+    value: Au,
+    item_index: ItemIndex,
+    kind: EventKind,
+}
+
+impl Ord for Event {
+    fn cmp(&self, other: &Event) -> cmp::Ordering {
+        self.value
+            .cmp(&other.value)
+            .then(self.kind.cmp(&other.kind))
+    }
+}
+
+impl Event {
+    fn begin(value: f32, index: usize) -> Event {
+        Event {
+            value: Au::from_f32_px(value),
+            item_index: ItemIndex(index),
+            kind: EventKind::BeginClip,
+        }
+    }
+
+    fn end(value: f32, index: usize) -> Event {
+        Event {
+            value: Au::from_f32_px(value),
+            item_index: ItemIndex(index),
+            kind: EventKind::EndClip,
+        }
+    }
+
+    fn region(value: f32) -> Event {
+        Event {
+            value: Au::from_f32_px(value),
+            kind: EventKind::BeginRegion,
+            item_index: ItemIndex(usize::MAX),
+        }
+    }
+
+    fn update(
+        &self,
+        flag: ItemFlags,
+        items: &mut [Item],
+        region: &mut usize,
+    ) {
+        let is_active = match self.kind {
+            EventKind::BeginClip => true,
+            EventKind::EndClip => false,
+            EventKind::BeginRegion => {
+                *region += 1;
+                return;
+            }
+        };
+
+        items[self.item_index.0].flags.set(flag, is_active);
+    }
+}
+
+// An item that provides some kind of clip region (either
+// a clip in/out rect, or a mask region).
+#[derive(Debug)]
+struct Item {
+    rect: LayoutRect,
+    mode: Option<ClipMode>,
+    flags: ItemFlags,
+}
+
+impl Item {
+    fn new(
+        rect: LayoutRect,
+        mode: Option<ClipMode>,
+        has_mask: bool,
+    ) -> Item {
+        let flags = if has_mask {
+            ItemFlags::HAS_MASK
+        } else {
+            ItemFlags::empty()
+        };
+
+        Item {
+            rect,
+            mode,
+            flags,
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd)]
+struct ItemIndex(usize);
+
+// The main public interface to the segment module.
+pub struct SegmentBuilder {
+    items: Vec<Item>,
+    inner_rect: Option<LayoutRect>,
+    bounding_rect: Option<LayoutRect>,
+    has_interesting_clips: bool,
+
+    #[cfg(debug_assertions)]
+    initialized: bool,
+}
+
+impl SegmentBuilder {
+    // Create a new segment builder, supplying the primitive
+    // local rect and associated local clip rect.
+    pub fn new() -> SegmentBuilder {
+        SegmentBuilder {
+            items: Vec::with_capacity(4),
+            bounding_rect: None,
+            inner_rect: None,
+            has_interesting_clips: false,
+            #[cfg(debug_assertions)]
+            initialized: false,
+        }
+    }
+
+    pub fn initialize(
+        &mut self,
+        local_rect: LayoutRect,
+        inner_rect: Option<LayoutRect>,
+        local_clip_rect: LayoutRect,
+    ) {
+        self.items.clear();
+        self.inner_rect = inner_rect;
+        self.bounding_rect = Some(local_rect);
+
+        self.push_clip_rect(local_rect, None, ClipMode::Clip);
+        self.push_clip_rect(local_clip_rect, None, ClipMode::Clip);
+
+        // This must be set after the push_clip_rect calls above, since we
+        // want to skip segment building if those are the only clips.
+        self.has_interesting_clips = false;
+
+        #[cfg(debug_assertions)]
+        {
+            self.initialized = true;
+        }
+    }
+
+    // Push a region defined by an inner and outer rect where there
+    // is a mask required. This ensures that segments which intersect
+    // with these areas will get a clip mask task allocated. This
+    // is currently used to mark where a box-shadow region can affect
+    // the pixels of a clip-mask. It might be useful for other types
+    // such as dashed and dotted borders in the future.
+    pub fn push_mask_region(
+        &mut self,
+        outer_rect: LayoutRect,
+        inner_rect: LayoutRect,
+        inner_clip_mode: Option<ClipMode>,
+    ) {
+        self.has_interesting_clips = true;
+
+        if inner_rect.is_empty() {
+            self.items.push(Item::new(
+                outer_rect,
+                None,
+                true
+            ));
+            return;
+        }
+
+        debug_assert!(outer_rect.contains_box(&inner_rect));
+
+        let p0 = outer_rect.min;
+        let p1 = inner_rect.min;
+        let p2 = inner_rect.max;
+        let p3 = outer_rect.max;
+
+        let segments = &[
+            LayoutRect {
+                min: LayoutPoint::new(p0.x, p0.y),
+                max: LayoutPoint::new(p1.x, p1.y),
+            },
+            LayoutRect {
+                min: LayoutPoint::new(p2.x, p0.y),
+                max: LayoutPoint::new(p3.x, p1.y),
+            },
+            LayoutRect {
+                min: LayoutPoint::new(p2.x, p2.y),
+                max: LayoutPoint::new(p3.x, p3.y),
+            },
+            LayoutRect {
+                min: LayoutPoint::new(p0.x, p2.y),
+                max: LayoutPoint::new(p1.x, p3.y),
+            },
+            LayoutRect {
+                min: LayoutPoint::new(p1.x, p0.y),
+                max: LayoutPoint::new(p2.x, p1.y),
+            },
+            LayoutRect {
+                min: LayoutPoint::new(p2.x, p1.y),
+                max: LayoutPoint::new(p3.x, p2.y),
+            },
+            LayoutRect {
+                min: LayoutPoint::new(p1.x, p2.y),
+                max: LayoutPoint::new(p2.x, p3.y),
+            },
+            LayoutRect {
+                min: LayoutPoint::new(p0.x, p1.y),
+                max: LayoutPoint::new(p1.x, p2.y),
+            },
+        ];
+
+        self.items.reserve(segments.len() + 1);
+
+        for segment in segments {
+            self.items.push(Item::new(
+                *segment,
+                None,
+                true
+            ));
+        }
+
+        if inner_clip_mode.is_some() {
+            self.items.push(Item::new(
+                inner_rect,
+                inner_clip_mode,
+                false,
+            ));
+        }
+    }
+
+    // Push some kind of clipping region into the segment builder.
+    // If radius is None, it's a simple rect.
+    pub fn push_clip_rect(
+        &mut self,
+        rect: LayoutRect,
+        radius: Option<BorderRadius>,
+        mode: ClipMode,
+    ) {
+        self.has_interesting_clips = true;
+
+        // Keep track of a minimal bounding rect for the set of
+        // segments that will be generated.
+        if mode == ClipMode::Clip {
+            self.bounding_rect = self.bounding_rect.and_then(|bounding_rect| {
+                bounding_rect.intersection(&rect)
+            });
+        }
+        let mode = Some(mode);
+
+        match radius {
+            Some(radius) => {
+                // For a rounded rect, try to create a nine-patch where there
+                // is a clip item for each corner, inner and edge region.
+                match extract_inner_rect_safe(&rect, &radius) {
+                    Some(inner) => {
+                        let p0 = rect.min;
+                        let p1 = inner.min;
+                        let p2 = inner.max;
+                        let p3 = rect.max;
+
+                        self.items.reserve(9);
+
+                        let corner_segments = &[
+                            LayoutRect {
+                                min: LayoutPoint::new(p0.x, p0.y),
+                                max: LayoutPoint::new(p1.x, p1.y),
+                            },
+                            LayoutRect {
+                                min: LayoutPoint::new(p2.x, p0.y),
+                                max: LayoutPoint::new(p3.x, p1.y),
+                            },
+                            LayoutRect {
+                                min: LayoutPoint::new(p2.x, p2.y),
+                                max: LayoutPoint::new(p3.x, p3.y),
+                            },
+                            LayoutRect {
+                                min: LayoutPoint::new(p0.x, p2.y),
+                                max: LayoutPoint::new(p1.x, p3.y),
+                            },
+                        ];
+
+                        for segment in corner_segments {
+                            self.items.push(Item::new(
+                                *segment,
+                                mode,
+                                true
+                            ));
+                        }
+
+                        let other_segments = &[
+                            LayoutRect {
+                                min: LayoutPoint::new(p1.x, p0.y),
+                                max: LayoutPoint::new(p2.x, p1.y),
+                            },
+                            LayoutRect {
+                                min: LayoutPoint::new(p2.x, p1.y),
+                                max: LayoutPoint::new(p3.x, p2.y),
+                            },
+                            LayoutRect {
+                                min: LayoutPoint::new(p1.x, p2.y),
+                                max: LayoutPoint::new(p2.x, p3.y),
+                            },
+                            LayoutRect {
+                                min: LayoutPoint::new(p0.x, p1.y),
+                                max: LayoutPoint::new(p1.x, p2.y),
+                            },
+                            LayoutRect {
+                                min: LayoutPoint::new(p1.x, p1.y),
+                                max: LayoutPoint::new(p2.x, p2.y),
+                            },
+                        ];
+
+                        for segment in other_segments {
+                            self.items.push(Item::new(
+                                *segment,
+                                mode,
+                                false,
+                            ));
+                        }
+                    }
+                    None => {
+                        // If we get here, we could not extract an inner rectangle
+                        // for this clip region. This can occur in cases such as
+                        // a rounded rect where the top-left and bottom-left radii
+                        // result in overlapping rects. In that case, just create
+                        // a single clip region for the entire rounded rect.
+                        self.items.push(Item::new(
+                            rect,
+                            mode,
+                            true,
+                        ))
+                    }
+                }
+            }
+            None => {
+                // For a simple rect, just create one clipping item.
+                self.items.push(Item::new(
+                    rect,
+                    mode,
+                    false,
+                ))
+            }
+        }
+    }
+
+    // Consume this segment builder and produce a list of segments.
+    pub fn build<F>(&mut self, mut f: F) where F: FnMut(&Segment) {
+        #[cfg(debug_assertions)]
+        debug_assert!(self.initialized);
+
+        #[cfg(debug_assertions)]
+        {
+            self.initialized = false;
+        }
+
+        let bounding_rect = match self.bounding_rect {
+            Some(bounding_rect) => bounding_rect,
+            None => return,
+        };
+
+        if !self.has_interesting_clips {
+            // There were no additional clips added, so don't bother building segments.
+            // Just emit a single segment for the bounding rect of the primitive.
+            f(&Segment {
+                edge_flags: EdgeAaSegmentMask::all(),
+                region_x: 0,
+                region_y: 0,
+                has_mask: false,
+                rect: bounding_rect,
+            });
+            return
+        }
+
+        // First, filter out any items that don't intersect
+        // with the visible bounding rect.
+        self.items.retain(|item| item.rect.intersects(&bounding_rect));
+
+        // Create events for each item
+        let mut x_events : SmallVec<[Event; 4]> = SmallVec::new();
+        let mut y_events : SmallVec<[Event; 4]> = SmallVec::new();
+
+        for (item_index, item) in self.items.iter().enumerate() {
+            let p0 = item.rect.min;
+            let p1 = item.rect.max;
+
+            x_events.push(Event::begin(p0.x, item_index));
+            x_events.push(Event::end(p1.x, item_index));
+            y_events.push(Event::begin(p0.y, item_index));
+            y_events.push(Event::end(p1.y, item_index));
+        }
+
+        // Add the region events, if provided.
+        if let Some(inner_rect) = self.inner_rect {
+            x_events.push(Event::region(inner_rect.min.x));
+            x_events.push(Event::region(inner_rect.max.x));
+
+            y_events.push(Event::region(inner_rect.min.y));
+            y_events.push(Event::region(inner_rect.max.y));
+        }
+
+        // Get the minimal bounding rect in app units. We will
+        // work in fixed point in order to avoid float precision
+        // error while handling events.
+        let p0 = LayoutPointAu::new(
+            Au::from_f32_px(bounding_rect.min.x),
+            Au::from_f32_px(bounding_rect.min.y),
+        );
+
+        let p1 = LayoutPointAu::new(
+            Au::from_f32_px(bounding_rect.max.x),
+            Au::from_f32_px(bounding_rect.max.y),
+        );
+
+        // Sort the events in ascending order.
+        x_events.sort();
+        y_events.sort();
+
+        // Generate segments from the event lists, by sweeping the y-axis
+        // and then the x-axis for each event. This can generate a significant
+        // number of segments, but most importantly, it ensures that there are
+        // no t-junctions in the generated segments. It's probably possible
+        // to come up with more efficient segmentation algorithms, at least
+        // for simple / common cases.
+
+        // Each coordinate is clamped to the bounds of the minimal
+        // bounding rect. This ensures that we don't generate segments
+        // outside that bounding rect, but does allow correctly handling
+        // clips where the clip region starts outside the minimal
+        // rect but still intersects with it.
+
+        let mut prev_y = clamp(p0.y, y_events[0].value, p1.y);
+        let mut region_y = 0;
+        let mut segments : SmallVec<[_; 16]> = SmallVec::new();
+        let mut x_count = 0;
+        let mut y_count = 0;
+
+        for ey in &y_events {
+            let cur_y = clamp(p0.y, ey.value, p1.y);
+
+            if cur_y != prev_y {
+                let mut prev_x = clamp(p0.x, x_events[0].value, p1.x);
+                let mut region_x = 0;
+
+                for ex in &x_events {
+                    let cur_x = clamp(p0.x, ex.value, p1.x);
+
+                    if cur_x != prev_x {
+                        segments.push(emit_segment_if_needed(
+                            prev_x,
+                            prev_y,
+                            cur_x,
+                            cur_y,
+                            region_x,
+                            region_y,
+                            &self.items,
+                        ));
+
+                        prev_x = cur_x;
+                        if y_count == 0 {
+                            x_count += 1;
+                        }
+                    }
+
+                    ex.update(
+                        ItemFlags::X_ACTIVE,
+                        &mut self.items,
+                        &mut region_x,
+                    );
+                }
+
+                prev_y = cur_y;
+                y_count += 1;
+            }
+
+            ey.update(
+                ItemFlags::Y_ACTIVE,
+                &mut self.items,
+                &mut region_y,
+            );
+        }
+
+        // If we created more than 64 segments, just bail out and draw it as a single primitive
+        // with a single mask, to avoid overhead of excessive amounts of segments. This can only
+        // happen in pathological cases, for example a cascade of a dozen or more overlapping
+        // and intersecting rounded clips.
+        if segments.len() > MAX_SEGMENTS {
+            f(&Segment {
+                edge_flags: EdgeAaSegmentMask::all(),
+                region_x: 0,
+                region_y: 0,
+                has_mask: true,
+                rect: bounding_rect,
+            });
+            return
+        }
+
+        // Run user supplied closure for each valid segment.
+        debug_assert_eq!(segments.len(), x_count * y_count);
+        for y in 0 .. y_count {
+            for x in 0 .. x_count {
+                let mut edge_flags = EdgeAaSegmentMask::empty();
+
+                if x == 0 || segments[y * x_count + x - 1].is_none() {
+                    edge_flags |= EdgeAaSegmentMask::LEFT;
+                }
+                if x == x_count-1 || segments[y * x_count + x + 1].is_none() {
+                    edge_flags |= EdgeAaSegmentMask::RIGHT;
+                }
+                if y == 0 || segments[(y-1) * x_count + x].is_none() {
+                    edge_flags |= EdgeAaSegmentMask::TOP;
+                }
+                if y == y_count-1 || segments[(y+1) * x_count + x].is_none() {
+                    edge_flags |= EdgeAaSegmentMask::BOTTOM;
+                }
+
+                if let Some(ref mut segment) = segments[y * x_count + x] {
+                    segment.edge_flags = edge_flags;
+                    f(segment);
+                }
+            }
+        }
+    }
+}
+
+fn clamp(low: Au, value: Au, high: Au) -> Au {
+    value.max(low).min(high)
+}
+
+fn emit_segment_if_needed(
+    x0: Au,
+    y0: Au,
+    x1: Au,
+    y1: Au,
+    region_x: usize,
+    region_y: usize,
+    items: &[Item],
+) -> Option<Segment> {
+    debug_assert!(x1 > x0);
+    debug_assert!(y1 > y0);
+
+    // TODO(gw): Don't scan the whole list of items for
+    //           each segment rect. Store active list
+    //           in a hash set or similar if this ever
+    //           shows up in a profile.
+    let mut has_clip_mask = false;
+
+    for item in items {
+        if item.flags.contains(ItemFlags::X_ACTIVE | ItemFlags::Y_ACTIVE) {
+            has_clip_mask |= item.flags.contains(ItemFlags::HAS_MASK);
+
+            if item.mode == Some(ClipMode::ClipOut) && !item.flags.contains(ItemFlags::HAS_MASK) {
+                return None;
+            }
+        }
+    }
+
+    let segment_rect = LayoutRect {
+        min: LayoutPoint::new(
+            x0.to_f32_px(),
+            y0.to_f32_px(),
+        ),
+        max: LayoutPoint::new(
+            x1.to_f32_px(),
+            y1.to_f32_px(),
+        ),
+    };
+
+    Some(Segment {
+        rect: segment_rect,
+        has_mask: has_clip_mask,
+        edge_flags: EdgeAaSegmentMask::empty(),
+        region_x,
+        region_y,
+    })
+}
+
+#[cfg(test)]
+mod test {
+    use api::{BorderRadius, ClipMode};
+    use api::units::{LayoutPoint, LayoutRect};
+    use super::{Segment, SegmentBuilder, EdgeAaSegmentMask};
+    use std::cmp;
+
+    fn rect(x0: f32, y0: f32, x1: f32, y1: f32) -> LayoutRect {
+        LayoutRect {
+            min: LayoutPoint::new(x0, y0),
+            max: LayoutPoint::new(x1, y1),
+        }
+    }
+
+    fn seg(
+        x0: f32,
+        y0: f32,
+        x1: f32,
+        y1: f32,
+        has_mask: bool,
+        edge_flags: Option<EdgeAaSegmentMask>,
+    ) -> Segment {
+        seg_region(x0, y0, x1, y1, 0, 0, has_mask, edge_flags)
+    }
+
+    fn seg_region(
+        x0: f32,
+        y0: f32,
+        x1: f32,
+        y1: f32,
+        region_x: usize,
+        region_y: usize,
+        has_mask: bool,
+        edge_flags: Option<EdgeAaSegmentMask>,
+    ) -> Segment {
+        Segment {
+            rect: LayoutRect {
+                min: LayoutPoint::new(x0, y0),
+                max: LayoutPoint::new(x1, y1),
+            },
+            has_mask,
+            edge_flags: edge_flags.unwrap_or(EdgeAaSegmentMask::empty()),
+            region_x,
+            region_y,
+        }
+    }
+
+    fn segment_sorter(s0: &Segment, s1: &Segment) -> cmp::Ordering {
+        let r0 = &s0.rect;
+        let r1 = &s1.rect;
+
+        (
+            (r0.min.x, r0.min.y, r0.max.x, r0.max.y)
+        ).partial_cmp(&
+            (r1.min.x, r1.min.y, r1.max.x, r1.max.y)
+        ).unwrap()
+    }
+
+    fn seg_test(
+        local_rect: LayoutRect,
+        inner_rect: Option<LayoutRect>,
+        local_clip_rect: LayoutRect,
+        clips: &[(LayoutRect, Option<BorderRadius>, ClipMode)],
+        expected_segments: &mut [Segment]
+    ) {
+        let mut sb = SegmentBuilder::new();
+        sb.initialize(
+            local_rect,
+            inner_rect,
+            local_clip_rect,
+        );
+        sb.push_clip_rect(local_rect, None, ClipMode::Clip);
+        sb.push_clip_rect(local_clip_rect, None, ClipMode::Clip);
+        let mut segments = Vec::new();
+        for &(rect, radius, mode) in clips {
+            sb.push_clip_rect(rect, radius, mode);
+        }
+        sb.build(|segment| {
+            segments.push(Segment {
+                ..*segment
+            });
+        });
+        segments.sort_by(segment_sorter);
+        expected_segments.sort_by(segment_sorter);
+        assert_eq!(
+            segments.len(),
+            expected_segments.len(),
+            "segments\n{:?}\nexpected\n{:?}\n",
+            segments,
+            expected_segments
+        );
+        for (segment, expected) in segments.iter().zip(expected_segments.iter()) {
+            assert_eq!(segment, expected);
+        }
+    }
+
+    #[test]
+    fn segment_empty() {
+        seg_test(
+            rect(0.0, 0.0, 0.0, 0.0),
+            None,
+            rect(0.0, 0.0, 0.0, 0.0),
+            &[],
+            &mut [],
+        );
+    }
+
+    #[test]
+    fn segment_single() {
+        seg_test(
+            rect(10.0, 20.0, 30.0, 40.0),
+            None,
+            rect(10.0, 20.0, 30.0, 40.0),
+            &[],
+            &mut [
+                seg(10.0, 20.0, 30.0, 40.0, false,
+                    Some(EdgeAaSegmentMask::LEFT |
+                         EdgeAaSegmentMask::TOP |
+                         EdgeAaSegmentMask::RIGHT |
+                         EdgeAaSegmentMask::BOTTOM
+                    )
+                ),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_single_clip() {
+        seg_test(
+            rect(10.0, 20.0, 30.0, 40.0),
+            None,
+            rect(10.0, 20.0, 25.0, 35.0),
+            &[],
+            &mut [
+                seg(10.0, 20.0, 25.0, 35.0, false,
+                    Some(EdgeAaSegmentMask::LEFT |
+                         EdgeAaSegmentMask::TOP |
+                         EdgeAaSegmentMask::RIGHT |
+                         EdgeAaSegmentMask::BOTTOM
+                    )
+                ),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_inner_clip() {
+        seg_test(
+            rect(10.0, 20.0, 30.0, 40.0),
+            None,
+            rect(15.0, 25.0, 25.0, 35.0),
+            &[],
+            &mut [
+                seg(15.0, 25.0, 25.0, 35.0, false,
+                    Some(EdgeAaSegmentMask::LEFT |
+                         EdgeAaSegmentMask::TOP |
+                         EdgeAaSegmentMask::RIGHT |
+                         EdgeAaSegmentMask::BOTTOM
+                    )
+                ),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_outer_clip() {
+        seg_test(
+            rect(15.0, 25.0, 25.0, 35.0),
+            None,
+            rect(10.0, 20.0, 30.0, 40.0),
+            &[],
+            &mut [
+                seg(15.0, 25.0, 25.0, 35.0, false,
+                    Some(EdgeAaSegmentMask::LEFT |
+                         EdgeAaSegmentMask::TOP |
+                         EdgeAaSegmentMask::RIGHT |
+                         EdgeAaSegmentMask::BOTTOM
+                    )
+                ),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_clip_int() {
+        seg_test(
+            rect(10.0, 20.0, 30.0, 40.0),
+            None,
+            rect(20.0, 10.0, 40.0, 30.0),
+            &[],
+            &mut [
+                seg(20.0, 20.0, 30.0, 30.0, false,
+                    Some(EdgeAaSegmentMask::LEFT |
+                         EdgeAaSegmentMask::TOP |
+                         EdgeAaSegmentMask::RIGHT |
+                         EdgeAaSegmentMask::BOTTOM
+                    )
+                ),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_clip_disjoint() {
+        seg_test(
+            rect(10.0, 20.0, 30.0, 40.0),
+            None,
+            rect(30.0, 20.0, 50.0, 40.0),
+            &[],
+            &mut [],
+        );
+    }
+
+    #[test]
+    fn segment_clips() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            None,
+            rect(-1000.0, -1000.0, 1000.0, 1000.0),
+            &[
+                (rect(20.0, 20.0, 40.0, 40.0), None, ClipMode::Clip),
+                (rect(40.0, 20.0, 60.0, 40.0), None, ClipMode::Clip),
+            ],
+            &mut [
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_rounded_clip() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            None,
+            rect(-1000.0, -1000.0, 1000.0, 1000.0),
+            &[
+                (rect(20.0, 20.0, 60.0, 60.0), Some(BorderRadius::uniform(10.0)), ClipMode::Clip),
+            ],
+            &mut [
+                // corners
+                seg(20.0, 20.0, 30.0, 30.0, true, Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::TOP)),
+                seg(20.0, 50.0, 30.0, 60.0, true, Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::BOTTOM)),
+                seg(50.0, 20.0, 60.0, 30.0, true, Some(EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::TOP)),
+                seg(50.0, 50.0, 60.0, 60.0, true, Some(EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::BOTTOM)),
+
+                // inner
+                seg(30.0, 30.0, 50.0, 50.0, false, None),
+
+                // edges
+                seg(30.0, 20.0, 50.0, 30.0, false, Some(EdgeAaSegmentMask::TOP)),
+                seg(30.0, 50.0, 50.0, 60.0, false, Some(EdgeAaSegmentMask::BOTTOM)),
+                seg(20.0, 30.0, 30.0, 50.0, false, Some(EdgeAaSegmentMask::LEFT)),
+                seg(50.0, 30.0, 60.0, 50.0, false, Some(EdgeAaSegmentMask::RIGHT)),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_clip_out() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            None,
+            rect(-1000.0, -1000.0, 2000.0, 2000.0),
+            &[
+                (rect(20.0, 20.0, 60.0, 60.0), None, ClipMode::ClipOut),
+            ],
+            &mut [
+                seg(0.0, 0.0, 20.0, 20.0, false, Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::LEFT)),
+                seg(20.0, 0.0, 60.0, 20.0, false, Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::BOTTOM)),
+                seg(60.0, 0.0, 100.0, 20.0, false, Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::RIGHT)),
+
+                seg(0.0, 20.0, 20.0, 60.0, false, Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::RIGHT)),
+                seg(60.0, 20.0, 100.0, 60.0, false, Some(EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::LEFT)),
+
+                seg(0.0, 60.0, 20.0, 100.0, false, Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::BOTTOM)),
+                seg(20.0, 60.0, 60.0, 100.0, false, Some(EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::TOP)),
+                seg(60.0, 60.0, 100.0, 100.0, false, Some(EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::RIGHT)),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_rounded_clip_out() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            None,
+            rect(-1000.0, -1000.0, 2000.0, 2000.0),
+            &[
+                (rect(20.0, 20.0, 60.0, 60.0), Some(BorderRadius::uniform(10.0)), ClipMode::ClipOut),
+            ],
+            &mut [
+                // top row
+                seg(0.0, 0.0, 20.0, 20.0, false, Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::LEFT)),
+                seg(20.0, 0.0, 30.0, 20.0, false, Some(EdgeAaSegmentMask::TOP)),
+                seg(30.0, 0.0, 50.0, 20.0, false, Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::BOTTOM)),
+                seg(50.0, 0.0, 60.0, 20.0, false, Some(EdgeAaSegmentMask::TOP)),
+                seg(60.0, 0.0, 100.0, 20.0, false, Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::RIGHT)),
+
+                // left
+                seg(0.0, 20.0, 20.0, 30.0, false, Some(EdgeAaSegmentMask::LEFT)),
+                seg(0.0, 30.0, 20.0, 50.0, false, Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::RIGHT)),
+                seg(0.0, 50.0, 20.0, 60.0, false, Some(EdgeAaSegmentMask::LEFT)),
+
+                // right
+                seg(60.0, 20.0, 100.0, 30.0, false, Some(EdgeAaSegmentMask::RIGHT)),
+                seg(60.0, 30.0, 100.0, 50.0, false, Some(EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::LEFT)),
+                seg(60.0, 50.0, 100.0, 60.0, false, Some(EdgeAaSegmentMask::RIGHT)),
+
+                // bottom row
+                seg(0.0, 60.0, 20.0, 100.0, false, Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::BOTTOM)),
+                seg(20.0, 60.0, 30.0, 100.0, false, Some(EdgeAaSegmentMask::BOTTOM)),
+                seg(30.0, 60.0, 50.0, 100.0, false, Some(EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::TOP)),
+                seg(50.0, 60.0, 60.0, 100.0, false, Some(EdgeAaSegmentMask::BOTTOM)),
+                seg(60.0, 60.0, 100.0, 100.0, false, Some(EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::BOTTOM)),
+
+                // inner corners
+                seg(20.0, 20.0, 30.0, 30.0, true, Some(EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::BOTTOM)),
+                seg(20.0, 50.0, 30.0, 60.0, true, Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::RIGHT)),
+                seg(50.0, 20.0, 60.0, 30.0, true, Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::BOTTOM)),
+                seg(50.0, 50.0, 60.0, 60.0, true, Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::TOP)),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_clip_in_clip_out() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            None,
+            rect(-1000.0, -1000.0, 2000.0, 2000.0),
+            &[
+                (rect(20.0, 20.0, 60.0, 60.0), None, ClipMode::Clip),
+                (rect(50.0, 50.0, 80.0, 80.0), None, ClipMode::ClipOut),
+            ],
+            &mut [
+                seg(20.0, 20.0, 50.0, 50.0, false, Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::TOP)),
+                seg(50.0, 20.0, 60.0, 50.0, false, Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::BOTTOM)),
+                seg(20.0, 50.0, 50.0, 60.0, false, Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::RIGHT)),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_rounded_clip_overlap() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            None,
+            rect(0.0, 0.0, 100.0, 100.0),
+            &[
+                (rect(0.0, 0.0, 10.0, 10.0), None, ClipMode::ClipOut),
+                (rect(0.0, 0.0, 100.0, 100.0), Some(BorderRadius::uniform(10.0)), ClipMode::Clip),
+            ],
+            &mut [
+                // corners
+                seg(0.0, 90.0, 10.0, 100.0, true, Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::BOTTOM)),
+                seg(90.0, 0.0, 100.0, 10.0, true, Some(EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::TOP)),
+                seg(90.0, 90.0, 100.0, 100.0, true, Some(EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::BOTTOM)),
+
+                // inner
+                seg(10.0, 10.0, 90.0, 90.0, false, None),
+
+                // edges
+                seg(10.0, 0.0, 90.0, 10.0, false, Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::LEFT)),
+                seg(10.0, 90.0, 90.0, 100.0, false, Some(EdgeAaSegmentMask::BOTTOM)),
+                seg(0.0, 10.0, 10.0, 90.0, false, Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::TOP)),
+                seg(90.0, 10.0, 100.0, 90.0, false, Some(EdgeAaSegmentMask::RIGHT)),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_rounded_clip_overlap_reverse() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            None,
+            rect(0.0, 0.0, 100.0, 100.0),
+            &[
+                (rect(10.0, 10.0, 90.0, 90.0), None, ClipMode::Clip),
+                (rect(0.0, 0.0, 100.0, 100.0), Some(BorderRadius::uniform(10.0)), ClipMode::Clip),
+            ],
+            &mut [
+                seg(10.0, 10.0, 90.0, 90.0, false,
+                    Some(EdgeAaSegmentMask::LEFT |
+                         EdgeAaSegmentMask::TOP |
+                         EdgeAaSegmentMask::RIGHT |
+                         EdgeAaSegmentMask::BOTTOM
+                    )
+                ),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_clip_in_clip_out_overlap() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            None,
+            rect(0.0, 0.0, 100.0, 100.0),
+            &[
+                (rect(10.0, 10.0, 90.0, 90.0), None, ClipMode::Clip),
+                (rect(10.0, 10.0, 90.0, 90.0), None, ClipMode::ClipOut),
+            ],
+            &mut [
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_event_order() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            None,
+            rect(0.0, 0.0, 100.0, 100.0),
+            &[
+                (rect(0.0, 0.0, 100.0, 90.0), None, ClipMode::ClipOut),
+            ],
+            &mut [
+                seg(0.0, 90.0, 100.0, 100.0, false, Some(
+                    EdgeAaSegmentMask::LEFT |
+                    EdgeAaSegmentMask::RIGHT |
+                    EdgeAaSegmentMask::BOTTOM |
+                    EdgeAaSegmentMask::TOP
+                )),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_region_simple() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            Some(rect(20.0, 40.0, 60.0, 80.0)),
+            rect(0.0, 0.0, 100.0, 100.0),
+            &[
+            ],
+            &mut [
+                seg_region(
+                    0.0, 0.0,
+                    20.0, 40.0,
+                    0, 0,
+                    false,
+                    Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::TOP)
+                ),
+
+                seg_region(
+                    20.0, 0.0,
+                    60.0, 40.0,
+                    1, 0,
+                    false,
+                    Some(EdgeAaSegmentMask::TOP)
+                ),
+
+                seg_region(
+                    60.0, 0.0,
+                    100.0, 40.0,
+                    2, 0,
+                    false,
+                    Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::RIGHT)
+                ),
+
+                seg_region(
+                    0.0, 40.0,
+                    20.0, 80.0,
+                    0, 1,
+                    false,
+                    Some(EdgeAaSegmentMask::LEFT)
+                ),
+
+                seg_region(
+                    20.0, 40.0,
+                    60.0, 80.0,
+                    1, 1,
+                    false,
+                    None,
+                ),
+
+                seg_region(
+                    60.0, 40.0,
+                    100.0, 80.0,
+                    2, 1,
+                    false,
+                    Some(EdgeAaSegmentMask::RIGHT)
+                ),
+
+                seg_region(
+                    0.0, 80.0,
+                    20.0, 100.0,
+                    0, 2,
+                    false,
+                    Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::BOTTOM)
+                ),
+
+                seg_region(
+                    20.0, 80.0,
+                    60.0, 100.0,
+                    1, 2,
+                    false,
+                    Some(EdgeAaSegmentMask::BOTTOM),
+                ),
+
+                seg_region(
+                    60.0, 80.0,
+                    100.0, 100.0,
+                    2, 2,
+                    false,
+                    Some(EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::BOTTOM)
+                ),
+
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_region_clip() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            Some(rect(20.0, 40.0, 60.0, 80.0)),
+            rect(0.0, 0.0, 100.0, 100.0),
+            &[
+                (rect(0.0, 0.0, 100.0, 90.0), None, ClipMode::ClipOut),
+            ],
+            &mut [
+                seg_region(
+                    0.0, 90.0,
+                    20.0, 100.0,
+                    0, 2,
+                    false,
+                    Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::TOP)
+                ),
+
+                seg_region(
+                    20.0, 90.0,
+                    60.0, 100.0,
+                    1, 2,
+                    false,
+                    Some(EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::TOP),
+                ),
+
+                seg_region(
+                    60.0, 90.0,
+                    100.0, 100.0,
+                    2, 2,
+                    false,
+                    Some(EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::TOP)
+                ),
+
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_region_clip2() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            Some(rect(20.0, 20.0, 80.0, 80.0)),
+            rect(0.0, 0.0, 100.0, 100.0),
+            &[
+                (rect(20.0, 20.0, 100.0, 100.0), None, ClipMode::ClipOut),
+            ],
+            &mut [
+                seg_region(
+                    0.0, 0.0,
+                    20.0, 20.0,
+                    0, 0,
+                    false,
+                    Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::TOP)
+                ),
+
+                seg_region(
+                    20.0, 0.0,
+                    80.0, 20.0,
+                    1, 0,
+                    false,
+                    Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::BOTTOM),
+                ),
+
+                seg_region(
+                    80.0, 0.0,
+                    100.0, 20.0,
+                    2, 0,
+                    false,
+                    Some(EdgeAaSegmentMask::RIGHT | EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::BOTTOM)
+                ),
+
+                seg_region(
+                    0.0, 20.0,
+                    20.0, 80.0,
+                    0, 1,
+                    false,
+                    Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::RIGHT)
+                ),
+
+                seg_region(
+                    0.0, 80.0,
+                    20.0, 100.0,
+                    0, 2,
+                    false,
+                    Some(EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::RIGHT)
+                ),
+            ],
+        );
+    }
+
+    #[test]
+    fn segment_region_clip3() {
+        seg_test(
+            rect(0.0, 0.0, 100.0, 100.0),
+            Some(rect(20.0, 20.0, 80.0, 80.0)),
+            rect(0.0, 0.0, 100.0, 100.0),
+            &[
+                (rect(10.0, 10.0, 30.0, 30.0), None, ClipMode::Clip),
+            ],
+            &mut [
+                seg_region(
+                    10.0, 10.0,
+                    20.0, 20.0,
+                    0, 0,
+                    false,
+                    Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::LEFT),
+                ),
+
+                seg_region(
+                    20.0, 10.0,
+                    30.0, 20.0,
+                    1, 0,
+                    false,
+                    Some(EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::RIGHT),
+                ),
+
+                seg_region(
+                    10.0, 20.0,
+                    20.0, 30.0,
+                    0, 1,
+                    false,
+                    Some(EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::LEFT),
+                ),
+
+                seg_region(
+                    20.0, 20.0,
+                    30.0, 30.0,
+                    1, 1,
+                    false,
+                    Some(EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::RIGHT),
+                ),
+            ],
+        );
+    }
+}
diff --git a/gfx/wr/webrender/src/space.rs b/gfx/wr/webrender/src/space.rs
new file mode 100644
index 0000000000..518bee6a67
--- /dev/null
+++ b/gfx/wr/webrender/src/space.rs
@@ -0,0 +1,269 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+//! Utilities to deal with coordinate spaces.
+
+use std::fmt;
+
+use euclid::{Transform3D, Box2D, Point2D, Vector2D};
+
+use api::units::*;
+use crate::spatial_tree::{SpatialTree, CoordinateSpaceMapping, SpatialNodeIndex, VisibleFace, SpatialNodeContainer};
+use crate::util::project_rect;
+use crate::util::{MatrixHelpers, ScaleOffset, RectHelpers, PointHelpers};
+
+
+#[derive(Debug, Clone)]
+pub struct SpaceMapper<F, T> {
+    kind: CoordinateSpaceMapping<F, T>,
+    pub ref_spatial_node_index: SpatialNodeIndex,
+    pub current_target_spatial_node_index: SpatialNodeIndex,
+    pub bounds: Box2D<f32, T>,
+    visible_face: VisibleFace,
+}
+
+impl<F, T> SpaceMapper<F, T> where F: fmt::Debug {
+    pub fn new(
+        ref_spatial_node_index: SpatialNodeIndex,
+        bounds: Box2D<f32, T>,
+    ) -> Self {
+        SpaceMapper {
+            kind: CoordinateSpaceMapping::Local,
+            ref_spatial_node_index,
+            current_target_spatial_node_index: ref_spatial_node_index,
+            bounds,
+            visible_face: VisibleFace::Front,
+        }
+    }
+
+    pub fn new_with_target(
+        ref_spatial_node_index: SpatialNodeIndex,
+        target_node_index: SpatialNodeIndex,
+        bounds: Box2D<f32, T>,
+        spatial_tree: &SpatialTree,
+    ) -> Self {
+        let mut mapper = Self::new(ref_spatial_node_index, bounds);
+        mapper.set_target_spatial_node(target_node_index, spatial_tree);
+        mapper
+    }
+
+    pub fn set_target_spatial_node(
+        &mut self,
+        target_node_index: SpatialNodeIndex,
+        spatial_tree: &SpatialTree,
+    ) {
+        if target_node_index == self.current_target_spatial_node_index {
+            return
+        }
+
+        let ref_spatial_node = spatial_tree.get_spatial_node(self.ref_spatial_node_index);
+        let target_spatial_node = spatial_tree.get_spatial_node(target_node_index);
+        self.visible_face = VisibleFace::Front;
+
+        self.kind = if self.ref_spatial_node_index == target_node_index {
+            CoordinateSpaceMapping::Local
+        } else if ref_spatial_node.coordinate_system_id == target_spatial_node.coordinate_system_id {
+            let scale_offset = ref_spatial_node.content_transform
+                .inverse()
+                .accumulate(&target_spatial_node.content_transform);
+            CoordinateSpaceMapping::ScaleOffset(scale_offset)
+        } else {
+            let transform = spatial_tree
+                .get_relative_transform_with_face(
+                    target_node_index,
+                    self.ref_spatial_node_index,
+                    Some(&mut self.visible_face),
+                )
+                .into_transform()
+                .with_source::<F>()
+                .with_destination::<T>();
+            CoordinateSpaceMapping::Transform(transform)
+        };
+
+        self.current_target_spatial_node_index = target_node_index;
+    }
+
+    pub fn get_transform(&self) -> Transform3D<f32, F, T> {
+        match self.kind {
+            CoordinateSpaceMapping::Local => {
+                Transform3D::identity()
+            }
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
+                scale_offset.to_transform()
+            }
+            CoordinateSpaceMapping::Transform(transform) => {
+                transform
+            }
+        }
+    }
+
+    pub fn unmap(&self, rect: &Box2D<f32, T>) -> Option<Box2D<f32, F>> {
+        match self.kind {
+            CoordinateSpaceMapping::Local => {
+                Some(rect.cast_unit())
+            }
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
+                Some(scale_offset.unmap_rect(rect))
+            }
+            CoordinateSpaceMapping::Transform(ref transform) => {
+                transform.inverse_rect_footprint(rect)
+            }
+        }
+    }
+
+    pub fn map(&self, rect: &Box2D<f32, F>) -> Option<Box2D<f32, T>> {
+        match self.kind {
+            CoordinateSpaceMapping::Local => {
+                Some(rect.cast_unit())
+            }
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
+                Some(scale_offset.map_rect(rect))
+            }
+            CoordinateSpaceMapping::Transform(ref transform) => {
+                match project_rect(transform, rect, &self.bounds) {
+                    Some(bounds) => {
+                        Some(bounds)
+                    }
+                    None => {
+                        warn!("parent relative transform can't transform the primitive rect for {:?}", rect);
+                        None
+                    }
+                }
+            }
+        }
+    }
+
+    // Attempt to return a rect that is contained in the mapped rect.
+    pub fn map_inner_bounds(&self, rect: &Box2D<f32, F>) -> Option<Box2D<f32, T>> {
+        match self.kind {
+            CoordinateSpaceMapping::Local => {
+                Some(rect.cast_unit())
+            }
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
+                Some(scale_offset.map_rect(rect))
+            }
+            CoordinateSpaceMapping::Transform(..) => {
+                // We could figure out a rect that is contained in the transformed rect but
+                // for now we do the simple thing here and bail out.
+                return None;
+            }
+        }
+    }
+
+    // Map a local space point to the target coordinate space
+    pub fn map_point(&self, p: Point2D<f32, F>) -> Option<Point2D<f32, T>> {
+        match self.kind {
+            CoordinateSpaceMapping::Local => {
+                Some(p.cast_unit())
+            }
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
+                Some(scale_offset.map_point(&p))
+            }
+            CoordinateSpaceMapping::Transform(ref transform) => {
+                transform.transform_point2d(p)
+            }
+        }
+    }
+
+    pub fn map_vector(&self, v: Vector2D<f32, F>) -> Vector2D<f32, T> {
+        match self.kind {
+            CoordinateSpaceMapping::Local => {
+                v.cast_unit()
+            }
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
+                scale_offset.map_vector(&v)
+            }
+            CoordinateSpaceMapping::Transform(ref transform) => {
+                transform.transform_vector2d(v)
+            }
+        }
+    }
+}
+
+
+#[derive(Clone, Debug)]
+pub struct SpaceSnapper {
+    ref_spatial_node_index: SpatialNodeIndex,
+    current_target_spatial_node_index: SpatialNodeIndex,
+    snapping_transform: Option<ScaleOffset>,
+    raster_pixel_scale: RasterPixelScale,
+}
+
+impl SpaceSnapper {
+    pub fn new(
+        ref_spatial_node_index: SpatialNodeIndex,
+        raster_pixel_scale: RasterPixelScale,
+    ) -> Self {
+        SpaceSnapper {
+            ref_spatial_node_index,
+            current_target_spatial_node_index: SpatialNodeIndex::INVALID,
+            snapping_transform: None,
+            raster_pixel_scale,
+        }
+    }
+
+    pub fn new_with_target<S: SpatialNodeContainer>(
+        ref_spatial_node_index: SpatialNodeIndex,
+        target_node_index: SpatialNodeIndex,
+        raster_pixel_scale: RasterPixelScale,
+        spatial_tree: &S,
+    ) -> Self {
+        let mut snapper = SpaceSnapper {
+            ref_spatial_node_index,
+            current_target_spatial_node_index: SpatialNodeIndex::INVALID,
+            snapping_transform: None,
+            raster_pixel_scale,
+        };
+
+        snapper.set_target_spatial_node(target_node_index, spatial_tree);
+        snapper
+    }
+
+    pub fn set_target_spatial_node<S: SpatialNodeContainer>(
+        &mut self,
+        target_node_index: SpatialNodeIndex,
+        spatial_tree: &S,
+    ) {
+        if target_node_index == self.current_target_spatial_node_index {
+            return
+        }
+
+        let ref_snap = spatial_tree.get_node_info(self.ref_spatial_node_index).snapping_transform;
+        let target_snap = spatial_tree.get_node_info(target_node_index).snapping_transform;
+
+        self.current_target_spatial_node_index = target_node_index;
+        self.snapping_transform = match (ref_snap, target_snap) {
+            (Some(ref ref_scale_offset), Some(ref target_scale_offset)) => {
+                Some(ref_scale_offset
+                    .inverse()
+                    .accumulate(target_scale_offset)
+                    .scale(self.raster_pixel_scale.0))
+            }
+            _ => None,
+        };
+    }
+
+    pub fn snap_rect<F>(&self, rect: &Box2D<f32, F>) -> Box2D<f32, F> where F: fmt::Debug {
+        debug_assert!(self.current_target_spatial_node_index != SpatialNodeIndex::INVALID);
+        match self.snapping_transform {
+            Some(ref scale_offset) => {
+                let snapped_device_rect: DeviceRect = scale_offset.map_rect(rect).snap();
+                scale_offset.unmap_rect(&snapped_device_rect)
+            }
+            None => *rect,
+        }
+    }
+
+    pub fn snap_point<F>(&self, point: &Point2D<f32, F>) -> Point2D<f32, F> where F: fmt::Debug {
+        debug_assert!(self.current_target_spatial_node_index != SpatialNodeIndex::INVALID);
+        match self.snapping_transform {
+            Some(ref scale_offset) => {
+                let snapped_device_vector : DevicePoint = scale_offset.map_point(point).snap();
+                scale_offset.unmap_point(&snapped_device_vector)
+            }
+            None => *point,
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/spatial_node.rs b/gfx/wr/webrender/src/spatial_node.rs
new file mode 100644
index 0000000000..9a2039e37b
--- /dev/null
+++ b/gfx/wr/webrender/src/spatial_node.rs
@@ -0,0 +1,995 @@
+
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ExternalScrollId, PipelineId, PropertyBinding, PropertyBindingId, ReferenceFrameKind};
+use api::{APZScrollGeneration, HasScrollLinkedEffect, SampledScrollOffset};
+use api::{TransformStyle, StickyOffsetBounds, SpatialTreeItemKey};
+use api::units::*;
+use crate::internal_types::PipelineInstanceId;
+use crate::spatial_tree::{CoordinateSystem, SpatialNodeIndex, TransformUpdateState};
+use crate::spatial_tree::{CoordinateSystemId};
+use euclid::{Vector2D, SideOffsets2D};
+use crate::scene::SceneProperties;
+use crate::util::{LayoutFastTransform, MatrixHelpers, ScaleOffset, TransformedRectKind, PointHelpers};
+
+/// The kind of a spatial node uid. These are required because we currently create external
+/// nodes during DL building, but the internal nodes aren't created until scene building.
+/// TODO(gw): The internal scroll and reference frames are not used in any important way
+//            by Gecko - they were primarily useful for Servo. So we should plan to remove
+//            them completely.
+#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum SpatialNodeUidKind {
+    /// The root node of the entire spatial tree
+    Root,
+    /// Internal scroll frame created during scene building for each iframe
+    InternalScrollFrame,
+    /// Internal reference frame created during scene building for each iframe
+    InternalReferenceFrame,
+    /// A normal spatial node uid, defined by a caller provided unique key
+    External {
+        key: SpatialTreeItemKey,
+    },
+}
+
+/// A unique identifier for a spatial node, that is stable across display lists
+#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SpatialNodeUid {
+    /// The unique key for a given pipeline for this uid
+    pub kind: SpatialNodeUidKind,
+    /// Pipeline id to namespace key kinds
+    pub pipeline_id: PipelineId,
+    /// Instance of this pipeline id
+    pub instance_id: PipelineInstanceId,
+}
+
+impl SpatialNodeUid {
+    pub fn root() -> Self {
+        SpatialNodeUid {
+            kind: SpatialNodeUidKind::Root,
+            pipeline_id: PipelineId::dummy(),
+            instance_id: PipelineInstanceId::new(0),
+        }
+    }
+
+    pub fn root_scroll_frame(
+        pipeline_id: PipelineId,
+        instance_id: PipelineInstanceId,
+    ) -> Self {
+        SpatialNodeUid {
+            kind: SpatialNodeUidKind::InternalScrollFrame,
+            pipeline_id,
+            instance_id,
+        }
+    }
+
+    pub fn root_reference_frame(
+        pipeline_id: PipelineId,
+        instance_id: PipelineInstanceId,
+    ) -> Self {
+        SpatialNodeUid {
+            kind: SpatialNodeUidKind::InternalReferenceFrame,
+            pipeline_id,
+            instance_id,
+        }
+    }
+
+    pub fn external(
+        key: SpatialTreeItemKey,
+        pipeline_id: PipelineId,
+        instance_id: PipelineInstanceId,
+    ) -> Self {
+        SpatialNodeUid {
+            kind: SpatialNodeUidKind::External {
+                key,
+            },
+            pipeline_id,
+            instance_id,
+        }
+    }
+}
+
+/// Defines the content of a spatial node. If the values in the descriptor don't
+/// change, that means the rest of the fields in a spatial node will end up with
+/// the same result
+#[derive(Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SpatialNodeDescriptor {
+    /// The type of this node and any data associated with that node type.
+    pub node_type: SpatialNodeType,
+
+    /// Pipeline that this layer belongs to
+    pub pipeline_id: PipelineId,
+}
+
+#[derive(Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum SpatialNodeType {
+    /// A special kind of node that adjusts its position based on the position
+    /// of its parent node and a given set of sticky positioning offset bounds.
+    /// Sticky positioned is described in the CSS Positioned Layout Module Level 3 here:
+    /// https://www.w3.org/TR/css-position-3/#sticky-pos
+    StickyFrame(StickyFrameInfo),
+
+    /// Transforms it's content, but doesn't clip it. Can also be adjusted
+    /// by scroll events or setting scroll offsets.
+    ScrollFrame(ScrollFrameInfo),
+
+    /// A reference frame establishes a new coordinate space in the tree.
+    ReferenceFrame(ReferenceFrameInfo),
+}
+
+/// Information about a spatial node that can be queried during either scene of
+/// frame building.
+pub struct SpatialNodeInfo<'a> {
+    /// The type of this node and any data associated with that node type.
+    pub node_type: &'a SpatialNodeType,
+
+    /// Parent spatial node. If this is None, we are the root node.
+    pub parent: Option<SpatialNodeIndex>,
+
+    /// Snapping scale/offset relative to the coordinate system. If None, then
+    /// we should not snap entities bound to this spatial node.
+    pub snapping_transform: Option<ScaleOffset>,
+}
+
+/// Scene building specific representation of a spatial node, which is a much
+/// lighter subset of a full spatial node constructed and used for frame building
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(PartialEq)]
+pub struct SceneSpatialNode {
+    /// Snapping scale/offset relative to the coordinate system. If None, then
+    /// we should not snap entities bound to this spatial node.
+    pub snapping_transform: Option<ScaleOffset>,
+
+    /// Parent spatial node. If this is None, we are the root node.
+    pub parent: Option<SpatialNodeIndex>,
+
+    /// Descriptor describing how this spatial node behaves
+    pub descriptor: SpatialNodeDescriptor,
+
+    /// If true, this spatial node is known to exist in the root coordinate
+    /// system in all cases (it has no animated or complex transforms)
+    pub is_root_coord_system: bool,
+}
+
+impl SceneSpatialNode {
+    pub fn new_reference_frame(
+        parent_index: Option<SpatialNodeIndex>,
+        transform_style: TransformStyle,
+        source_transform: PropertyBinding<LayoutTransform>,
+        kind: ReferenceFrameKind,
+        origin_in_parent_reference_frame: LayoutVector2D,
+        pipeline_id: PipelineId,
+        is_root_coord_system: bool,
+        is_pipeline_root: bool,
+    ) -> Self {
+        let info = ReferenceFrameInfo {
+            transform_style,
+            source_transform,
+            kind,
+            origin_in_parent_reference_frame,
+            is_pipeline_root,
+        };
+        Self::new(
+            pipeline_id,
+            parent_index,
+            SpatialNodeType::ReferenceFrame(info),
+            is_root_coord_system,
+        )
+    }
+
+    pub fn new_scroll_frame(
+        pipeline_id: PipelineId,
+        parent_index: SpatialNodeIndex,
+        external_id: ExternalScrollId,
+        frame_rect: &LayoutRect,
+        content_size: &LayoutSize,
+        frame_kind: ScrollFrameKind,
+        external_scroll_offset: LayoutVector2D,
+        offset_generation: APZScrollGeneration,
+        has_scroll_linked_effect: HasScrollLinkedEffect,
+        is_root_coord_system: bool,
+    ) -> Self {
+        let node_type = SpatialNodeType::ScrollFrame(ScrollFrameInfo::new(
+                *frame_rect,
+                LayoutSize::new(
+                    (content_size.width - frame_rect.width()).max(0.0),
+                    (content_size.height - frame_rect.height()).max(0.0)
+                ),
+                external_id,
+                frame_kind,
+                external_scroll_offset,
+                offset_generation,
+                has_scroll_linked_effect,
+            )
+        );
+
+        Self::new(
+            pipeline_id,
+            Some(parent_index),
+            node_type,
+            is_root_coord_system,
+        )
+    }
+
+    pub fn new_sticky_frame(
+        parent_index: SpatialNodeIndex,
+        sticky_frame_info: StickyFrameInfo,
+        pipeline_id: PipelineId,
+        is_root_coord_system: bool,
+    ) -> Self {
+        Self::new(
+            pipeline_id,
+            Some(parent_index),
+            SpatialNodeType::StickyFrame(sticky_frame_info),
+            is_root_coord_system,
+        )
+    }
+
+    fn new(
+        pipeline_id: PipelineId,
+        parent_index: Option<SpatialNodeIndex>,
+        node_type: SpatialNodeType,
+        is_root_coord_system: bool,
+    ) -> Self {
+        SceneSpatialNode {
+            parent: parent_index,
+            descriptor: SpatialNodeDescriptor {
+                pipeline_id,
+                node_type,
+            },
+            snapping_transform: None,
+            is_root_coord_system,
+        }
+    }
+}
+
+/// Contains information common among all types of SpatialTree nodes.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SpatialNode {
+    /// The scale/offset of the viewport for this spatial node, relative to the
+    /// coordinate system. Includes any accumulated scrolling offsets from nodes
+    /// between our reference frame and this node.
+    pub viewport_transform: ScaleOffset,
+
+    /// Content scale/offset relative to the coordinate system.
+    pub content_transform: ScaleOffset,
+
+    /// Snapping scale/offset relative to the coordinate system. If None, then
+    /// we should not snap entities bound to this spatial node.
+    pub snapping_transform: Option<ScaleOffset>,
+
+    /// The axis-aligned coordinate system id of this node.
+    pub coordinate_system_id: CoordinateSystemId,
+
+    /// The current transform kind of this node.
+    pub transform_kind: TransformedRectKind,
+
+    /// Pipeline that this layer belongs to
+    pub pipeline_id: PipelineId,
+
+    /// Parent layer. If this is None, we are the root node.
+    pub parent: Option<SpatialNodeIndex>,
+
+    /// Child layers
+    pub children: Vec<SpatialNodeIndex>,
+
+    /// The type of this node and any data associated with that node type.
+    pub node_type: SpatialNodeType,
+
+    /// True if this node is transformed by an invertible transform.  If not, display items
+    /// transformed by this node will not be displayed and display items not transformed by this
+    /// node will not be clipped by clips that are transformed by this node.
+    pub invertible: bool,
+
+    /// Whether this specific node is currently being async zoomed.
+    /// Should be set when a SetIsTransformAsyncZooming FrameMsg is received.
+    pub is_async_zooming: bool,
+
+    /// Whether this node or any of its ancestors is being pinch zoomed.
+    /// This is calculated in update(). This will be used to decide whether
+    /// to override corresponding picture's raster space as an optimisation.
+    pub is_ancestor_or_self_zooming: bool,
+}
+
+/// Snap an offset to be incorporated into a transform, where the local space
+/// may be considered the world space. We assume raster scale is 1.0, which
+/// may not always be correct if there are intermediate surfaces used, however
+/// those are either cases where snapping is not important (e.g. has perspective
+/// or is not axis aligned), or an edge case (e.g. SVG filters) which we can accept
+/// imperfection for now.
+fn snap_offset<OffsetUnits, ScaleUnits>(
+    offset: Vector2D<f32, OffsetUnits>,
+    scale: Vector2D<f32, ScaleUnits>,
+) -> Vector2D<f32, OffsetUnits> {
+    let world_offset = WorldPoint::new(offset.x * scale.x, offset.y * scale.y);
+    let snapped_world_offset = world_offset.snap();
+    Vector2D::new(
+        if scale.x != 0.0 { snapped_world_offset.x / scale.x } else { offset.x },
+        if scale.y != 0.0 { snapped_world_offset.y / scale.y } else { offset.y },
+    )
+}
+
+impl SpatialNode {
+    pub fn add_child(&mut self, child: SpatialNodeIndex) {
+        self.children.push(child);
+    }
+
+    pub fn set_scroll_offsets(&mut self, mut offsets: Vec<SampledScrollOffset>) -> bool {
+        debug_assert!(offsets.len() > 0);
+
+        let scrolling = match self.node_type {
+            SpatialNodeType::ScrollFrame(ref mut scrolling) => scrolling,
+            _ => {
+                warn!("Tried to scroll a non-scroll node.");
+                return false;
+            }
+        };
+
+        for element in offsets.iter_mut() {
+            element.offset = -element.offset - scrolling.external_scroll_offset;
+        }
+
+        if scrolling.offsets == offsets {
+            return false;
+        }
+
+        scrolling.offsets = offsets;
+        true
+    }
+
+    pub fn mark_uninvertible(
+        &mut self,
+        state: &TransformUpdateState,
+    ) {
+        self.invertible = false;
+        self.viewport_transform = ScaleOffset::identity();
+        self.content_transform = ScaleOffset::identity();
+        self.coordinate_system_id = state.current_coordinate_system_id;
+    }
+
+    pub fn update(
+        &mut self,
+        state_stack: &[TransformUpdateState],
+        coord_systems: &mut Vec<CoordinateSystem>,
+        scene_properties: &SceneProperties,
+    ) {
+        let state = state_stack.last().unwrap();
+
+        self.is_ancestor_or_self_zooming = self.is_async_zooming | state.is_ancestor_or_self_zooming;
+
+        // If any of our parents was not rendered, we are not rendered either and can just
+        // quit here.
+        if !state.invertible {
+            self.mark_uninvertible(state);
+            return;
+        }
+
+        self.update_transform(
+            state_stack,
+            coord_systems,
+            scene_properties,
+        );
+
+        if !self.invertible {
+            self.mark_uninvertible(state);
+        }
+    }
+
+    pub fn update_transform(
+        &mut self,
+        state_stack: &[TransformUpdateState],
+        coord_systems: &mut Vec<CoordinateSystem>,
+        scene_properties: &SceneProperties,
+    ) {
+        let state = state_stack.last().unwrap();
+
+        // Start by assuming we're invertible
+        self.invertible = true;
+
+        match self.node_type {
+            SpatialNodeType::ReferenceFrame(ref mut info) => {
+                let mut cs_scale_offset = ScaleOffset::identity();
+                let mut coordinate_system_id = state.current_coordinate_system_id;
+
+                // Resolve the transform against any property bindings.
+                let source_transform = {
+                    let source_transform = scene_properties.resolve_layout_transform(&info.source_transform);
+                    if let ReferenceFrameKind::Transform { is_2d_scale_translation: true, .. } = info.kind {
+                        assert!(source_transform.is_2d_scale_translation(), "Reference frame was marked as only having 2d scale or translation");
+                    }
+
+                    LayoutFastTransform::from(source_transform)
+                };
+
+                // Do a change-basis operation on the perspective matrix using
+                // the scroll offset.
+                let source_transform = match info.kind {
+                    ReferenceFrameKind::Perspective { scrolling_relative_to: Some(external_id) } => {
+                        let mut scroll_offset = LayoutVector2D::zero();
+
+                        for parent_state in state_stack.iter().rev() {
+                            if let Some(parent_external_id) = parent_state.external_id {
+                                if parent_external_id == external_id {
+                                    break;
+                                }
+                            }
+
+                            scroll_offset += parent_state.scroll_offset;
+                        }
+
+                        // Do a change-basis operation on the
+                        // perspective matrix using the scroll offset.
+                        source_transform
+                            .pre_translate(scroll_offset)
+                            .then_translate(-scroll_offset)
+                    }
+                    ReferenceFrameKind::Perspective { scrolling_relative_to: None } |
+                    ReferenceFrameKind::Transform { .. } => source_transform,
+                };
+
+                let resolved_transform =
+                    LayoutFastTransform::with_vector(info.origin_in_parent_reference_frame)
+                        .pre_transform(&source_transform);
+
+                // The transformation for this viewport in world coordinates is the transformation for
+                // our parent reference frame, plus any accumulated scrolling offsets from nodes
+                // between our reference frame and this node. Finally, we also include
+                // whatever local transformation this reference frame provides.
+                let relative_transform = resolved_transform
+                    .then_translate(snap_offset(state.parent_accumulated_scroll_offset, state.coordinate_system_relative_scale_offset.scale))
+                    .to_transform()
+                    .with_destination::<LayoutPixel>();
+
+                let mut reset_cs_id = match info.transform_style {
+                    TransformStyle::Preserve3D => !state.preserves_3d,
+                    TransformStyle::Flat => state.preserves_3d,
+                };
+
+                // We reset the coordinate system upon either crossing the preserve-3d context boundary,
+                // or simply a 3D transformation.
+                if !reset_cs_id {
+                    // Try to update our compatible coordinate system transform. If we cannot, start a new
+                    // incompatible coordinate system.
+                    match ScaleOffset::from_transform(&relative_transform) {
+                        Some(ref scale_offset) => {
+                            // We generally do not want to snap animated transforms as it causes jitter.
+                            // However, we do want to snap the visual viewport offset when scrolling.
+                            // This may still cause jitter when zooming, unfortunately.
+                            let mut maybe_snapped = scale_offset.clone();
+                            if let ReferenceFrameKind::Transform { should_snap: true, .. } = info.kind {
+                                maybe_snapped.offset = snap_offset(
+                                    scale_offset.offset,
+                                    state.coordinate_system_relative_scale_offset.scale,
+                                );
+                            }
+                            cs_scale_offset =
+                                state.coordinate_system_relative_scale_offset.accumulate(&maybe_snapped);
+                        }
+                        None => reset_cs_id = true,
+                    }
+                }
+                if reset_cs_id {
+                    // If we break 2D axis alignment or have a perspective component, we need to start a
+                    // new incompatible coordinate system with which we cannot share clips without masking.
+                    let transform = relative_transform.then(
+                        &state.coordinate_system_relative_scale_offset.to_transform()
+                    );
+
+                    // Push that new coordinate system and record the new id.
+                    let coord_system = {
+                        let parent_system = &coord_systems[state.current_coordinate_system_id.0 as usize];
+                        let mut cur_transform = transform;
+                        if parent_system.should_flatten {
+                            cur_transform.flatten_z_output();
+                        }
+                        let world_transform = cur_transform.then(&parent_system.world_transform);
+                        let determinant = world_transform.determinant();
+                        self.invertible = determinant != 0.0 && !determinant.is_nan();
+
+                        CoordinateSystem {
+                            transform,
+                            world_transform,
+                            should_flatten: match (info.transform_style, info.kind) {
+                                (TransformStyle::Flat, ReferenceFrameKind::Transform { .. }) => true,
+                                (_, _) => false,
+                            },
+                            parent: Some(state.current_coordinate_system_id),
+                        }
+                    };
+                    coordinate_system_id = CoordinateSystemId(coord_systems.len() as u32);
+                    coord_systems.push(coord_system);
+                }
+
+                // Ensure that the current coordinate system ID is propagated to child
+                // nodes, even if we encounter a node that is not invertible. This ensures
+                // that the invariant in get_relative_transform is not violated.
+                self.coordinate_system_id = coordinate_system_id;
+                self.viewport_transform = cs_scale_offset;
+                self.content_transform = cs_scale_offset;
+            }
+            _ => {
+                // We calculate this here to avoid a double-borrow later.
+                let sticky_offset = self.calculate_sticky_offset(
+                    &state.nearest_scrolling_ancestor_offset,
+                    &state.nearest_scrolling_ancestor_viewport,
+                );
+
+                // The transformation for the bounds of our viewport is the parent reference frame
+                // transform, plus any accumulated scroll offset from our parents, plus any offset
+                // provided by our own sticky positioning.
+                let accumulated_offset = state.parent_accumulated_scroll_offset + sticky_offset;
+                self.viewport_transform = state.coordinate_system_relative_scale_offset
+                    .offset(snap_offset(accumulated_offset, state.coordinate_system_relative_scale_offset.scale).to_untyped());
+
+                // The transformation for any content inside of us is the viewport transformation, plus
+                // whatever scrolling offset we supply as well.
+                let added_offset = accumulated_offset + self.scroll_offset();
+                self.content_transform = state.coordinate_system_relative_scale_offset
+                    .offset(snap_offset(added_offset, state.coordinate_system_relative_scale_offset.scale).to_untyped());
+
+                if let SpatialNodeType::StickyFrame(ref mut info) = self.node_type {
+                    info.current_offset = sticky_offset;
+                }
+
+                self.coordinate_system_id = state.current_coordinate_system_id;
+            }
+        }
+
+        //TODO: remove the field entirely?
+        self.transform_kind = if self.coordinate_system_id.0 == 0 {
+            TransformedRectKind::AxisAligned
+        } else {
+            TransformedRectKind::Complex
+        };
+    }
+
+    fn calculate_sticky_offset(
+        &self,
+        viewport_scroll_offset: &LayoutVector2D,
+        viewport_rect: &LayoutRect,
+    ) -> LayoutVector2D {
+        let info = match self.node_type {
+            SpatialNodeType::StickyFrame(ref info) => info,
+            _ => return LayoutVector2D::zero(),
+        };
+
+        if info.margins.top.is_none() && info.margins.bottom.is_none() &&
+            info.margins.left.is_none() && info.margins.right.is_none() {
+            return LayoutVector2D::zero();
+        }
+
+        // The viewport and margins of the item establishes the maximum amount that it can
+        // be offset in order to keep it on screen. Since we care about the relationship
+        // between the scrolled content and unscrolled viewport we adjust the viewport's
+        // position by the scroll offset in order to work with their relative positions on the
+        // page.
+        let mut sticky_rect = info.frame_rect.translate(*viewport_scroll_offset);
+
+        let mut sticky_offset = LayoutVector2D::zero();
+        if let Some(margin) = info.margins.top {
+            let top_viewport_edge = viewport_rect.min.y + margin;
+            if sticky_rect.min.y < top_viewport_edge {
+                // If the sticky rect is positioned above the top edge of the viewport (plus margin)
+                // we move it down so that it is fully inside the viewport.
+                sticky_offset.y = top_viewport_edge - sticky_rect.min.y;
+            } else if info.previously_applied_offset.y > 0.0 &&
+                sticky_rect.min.y > top_viewport_edge {
+                // However, if the sticky rect is positioned *below* the top edge of the viewport
+                // and there is already some offset applied to the sticky rect's position, then
+                // we need to move it up so that it remains at the correct position. This
+                // makes sticky_offset.y negative and effectively reduces the amount of the
+                // offset that was already applied. We limit the reduction so that it can, at most,
+                // cancel out the already-applied offset, but should never end up adjusting the
+                // position the other way.
+                sticky_offset.y = top_viewport_edge - sticky_rect.min.y;
+                sticky_offset.y = sticky_offset.y.max(-info.previously_applied_offset.y);
+            }
+        }
+
+        // If we don't have a sticky-top offset (sticky_offset.y + info.previously_applied_offset.y
+        // == 0), or if we have a previously-applied bottom offset (previously_applied_offset.y < 0)
+        // then we check for handling the bottom margin case. Note that the "don't have a sticky-top
+        // offset" case includes the case where we *had* a sticky-top offset but we reduced it to
+        // zero in the above block.
+        if sticky_offset.y + info.previously_applied_offset.y <= 0.0 {
+            if let Some(margin) = info.margins.bottom {
+                // If sticky_offset.y is nonzero that means we must have set it
+                // in the sticky-top handling code above, so this item must have
+                // both top and bottom sticky margins. We adjust the item's rect
+                // by the top-sticky offset, and then combine any offset from
+                // the bottom-sticky calculation into sticky_offset below.
+                sticky_rect.min.y += sticky_offset.y;
+                sticky_rect.max.y += sticky_offset.y;
+
+                // Same as the above case, but inverted for bottom-sticky items. Here
+                // we adjust items upwards, resulting in a negative sticky_offset.y,
+                // or reduce the already-present upward adjustment, resulting in a positive
+                // sticky_offset.y.
+                let bottom_viewport_edge = viewport_rect.max.y - margin;
+                if sticky_rect.max.y > bottom_viewport_edge {
+                    sticky_offset.y += bottom_viewport_edge - sticky_rect.max.y;
+                } else if info.previously_applied_offset.y < 0.0 &&
+                    sticky_rect.max.y < bottom_viewport_edge {
+                    sticky_offset.y += bottom_viewport_edge - sticky_rect.max.y;
+                    sticky_offset.y = sticky_offset.y.min(-info.previously_applied_offset.y);
+                }
+            }
+        }
+
+        // Same as above, but for the x-axis.
+        if let Some(margin) = info.margins.left {
+            let left_viewport_edge = viewport_rect.min.x + margin;
+            if sticky_rect.min.x < left_viewport_edge {
+                sticky_offset.x = left_viewport_edge - sticky_rect.min.x;
+            } else if info.previously_applied_offset.x > 0.0 &&
+                sticky_rect.min.x > left_viewport_edge {
+                sticky_offset.x = left_viewport_edge - sticky_rect.min.x;
+                sticky_offset.x = sticky_offset.x.max(-info.previously_applied_offset.x);
+            }
+        }
+
+        if sticky_offset.x + info.previously_applied_offset.x <= 0.0 {
+            if let Some(margin) = info.margins.right {
+                sticky_rect.min.x += sticky_offset.x;
+                sticky_rect.max.x += sticky_offset.x;
+                let right_viewport_edge = viewport_rect.max.x - margin;
+                if sticky_rect.max.x > right_viewport_edge {
+                    sticky_offset.x += right_viewport_edge - sticky_rect.max.x;
+                } else if info.previously_applied_offset.x < 0.0 &&
+                    sticky_rect.max.x < right_viewport_edge {
+                    sticky_offset.x += right_viewport_edge - sticky_rect.max.x;
+                    sticky_offset.x = sticky_offset.x.min(-info.previously_applied_offset.x);
+                }
+            }
+        }
+
+        // The total "sticky offset" (which is the sum that was already applied by
+        // the calling code, stored in info.previously_applied_offset, and the extra amount we
+        // computed as a result of scrolling, stored in sticky_offset) needs to be
+        // clamped to the provided bounds.
+        let clamp_adjusted = |value: f32, adjust: f32, bounds: &StickyOffsetBounds| {
+            (value + adjust).max(bounds.min).min(bounds.max) - adjust
+        };
+        sticky_offset.y = clamp_adjusted(sticky_offset.y,
+                                         info.previously_applied_offset.y,
+                                         &info.vertical_offset_bounds);
+        sticky_offset.x = clamp_adjusted(sticky_offset.x,
+                                         info.previously_applied_offset.x,
+                                         &info.horizontal_offset_bounds);
+
+        sticky_offset
+    }
+
+    pub fn prepare_state_for_children(&self, state: &mut TransformUpdateState) {
+        state.current_coordinate_system_id = self.coordinate_system_id;
+        state.is_ancestor_or_self_zooming = self.is_async_zooming;
+        state.invertible &= self.invertible;
+
+        // The transformation we are passing is the transformation of the parent
+        // reference frame and the offset is the accumulated offset of all the nodes
+        // between us and the parent reference frame. If we are a reference frame,
+        // we need to reset both these values.
+        match self.node_type {
+            SpatialNodeType::StickyFrame(ref info) => {
+                // We don't translate the combined rect by the sticky offset, because sticky
+                // offsets actually adjust the node position itself, whereas scroll offsets
+                // only apply to contents inside the node.
+                state.parent_accumulated_scroll_offset += info.current_offset;
+                // We want nested sticky items to take into account the shift
+                // we applied as well.
+                state.nearest_scrolling_ancestor_offset += info.current_offset;
+                state.preserves_3d = false;
+                state.external_id = None;
+                state.scroll_offset = info.current_offset;
+            }
+            SpatialNodeType::ScrollFrame(ref scrolling) => {
+                state.parent_accumulated_scroll_offset += scrolling.offset();
+                state.nearest_scrolling_ancestor_offset = scrolling.offset();
+                state.nearest_scrolling_ancestor_viewport = scrolling.viewport_rect;
+                state.preserves_3d = false;
+                state.external_id = Some(scrolling.external_id);
+                state.scroll_offset = scrolling.offset() + scrolling.external_scroll_offset;
+            }
+            SpatialNodeType::ReferenceFrame(ref info) => {
+                state.external_id = None;
+                state.scroll_offset = LayoutVector2D::zero();
+                state.preserves_3d = info.transform_style == TransformStyle::Preserve3D;
+                state.parent_accumulated_scroll_offset = LayoutVector2D::zero();
+                state.coordinate_system_relative_scale_offset = self.content_transform;
+                let translation = -info.origin_in_parent_reference_frame;
+                state.nearest_scrolling_ancestor_viewport =
+                    state.nearest_scrolling_ancestor_viewport
+                       .translate(translation);
+            }
+        }
+    }
+
+    pub fn scroll_offset(&self) -> LayoutVector2D {
+        match self.node_type {
+            SpatialNodeType::ScrollFrame(ref scrolling) => scrolling.offset(),
+            _ => LayoutVector2D::zero(),
+        }
+    }
+
+    pub fn matches_external_id(&self, external_id: ExternalScrollId) -> bool {
+        match self.node_type {
+            SpatialNodeType::ScrollFrame(ref info) if info.external_id == external_id => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true for ReferenceFrames whose source_transform is
+    /// bound to the property binding id.
+    pub fn is_transform_bound_to_property(&self, id: PropertyBindingId) -> bool {
+        if let SpatialNodeType::ReferenceFrame(ref info) = self.node_type {
+            if let PropertyBinding::Binding(key, _) = info.source_transform {
+                id == key.id
+            } else {
+                false
+            }
+        } else {
+            false
+        }
+    }
+}
+
+/// Defines whether we have an implicit scroll frame for a pipeline root,
+/// or an explicitly defined scroll frame from the display list.
+#[derive(Copy, Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum ScrollFrameKind {
+    PipelineRoot {
+        is_root_pipeline: bool,
+    },
+    Explicit,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ScrollFrameInfo {
+    /// The rectangle of the viewport of this scroll frame. This is important for
+    /// positioning of items inside child StickyFrames.
+    pub viewport_rect: LayoutRect,
+
+    /// Amount that this ScrollFrame can scroll in both directions.
+    pub scrollable_size: LayoutSize,
+
+    /// An external id to identify this scroll frame to API clients. This
+    /// allows setting scroll positions via the API without relying on ClipsIds
+    /// which may change between frames.
+    pub external_id: ExternalScrollId,
+
+    /// Stores whether this is a scroll frame added implicitly by WR when adding
+    /// a pipeline (either the root or an iframe). We need to exclude these
+    /// when searching for scroll roots we care about for picture caching.
+    /// TODO(gw): I think we can actually completely remove the implicit
+    ///           scroll frame being added by WR, and rely on the embedder
+    ///           to define scroll frames. However, that involves API changes
+    ///           so we will use this as a temporary hack!
+    pub frame_kind: ScrollFrameKind,
+
+    /// Amount that visual components attached to this scroll node have been
+    /// pre-scrolled in their local coordinates.
+    pub external_scroll_offset: LayoutVector2D,
+
+    /// A set of a pair of negated scroll offset and scroll generation of this
+    /// scroll node. The negated scroll offset is including the pre-scrolled
+    /// amount. If, for example, a scroll node was pre-scrolled to y=10 (10
+    /// pixels down from the initial unscrolled position), then
+    /// `external_scroll_offset` would be (0,10), and this `offset` field would
+    /// be (0,-10). If WebRender is then asked to change the scroll position by
+    /// an additional 10 pixels (without changing the pre-scroll amount in the
+    /// display list), `external_scroll_offset` would remain at (0,10) and
+    /// `offset` would change to (0,-20).
+    pub offsets: Vec<SampledScrollOffset>,
+
+    /// The generation of the external_scroll_offset.
+    /// This is used to pick up the most appropriate scroll offset sampled
+    /// off the main thread.
+    pub offset_generation: APZScrollGeneration,
+
+    /// Whether the document containing this scroll frame has any scroll-linked
+    /// effect or not.
+    pub has_scroll_linked_effect: HasScrollLinkedEffect,
+}
+
+/// Manages scrolling offset.
+impl ScrollFrameInfo {
+    pub fn new(
+        viewport_rect: LayoutRect,
+        scrollable_size: LayoutSize,
+        external_id: ExternalScrollId,
+        frame_kind: ScrollFrameKind,
+        external_scroll_offset: LayoutVector2D,
+        offset_generation: APZScrollGeneration,
+        has_scroll_linked_effect: HasScrollLinkedEffect,
+    ) -> ScrollFrameInfo {
+        ScrollFrameInfo {
+            viewport_rect,
+            scrollable_size,
+            external_id,
+            frame_kind,
+            external_scroll_offset,
+            offsets: vec![SampledScrollOffset{
+                // If this scroll frame is a newly created one, using
+                // `external_scroll_offset` and `offset_generation` is correct.
+                // If this scroll frame is a result of updating an existing
+                // scroll frame and if there have already been sampled async
+                // scroll offsets by APZ, then these offsets will be replaced in
+                // SpatialTree::set_scroll_offsets via a
+                // RenderBackend::update_document call.
+                offset: -external_scroll_offset,
+                generation: offset_generation.clone(),
+            }],
+            offset_generation,
+            has_scroll_linked_effect,
+        }
+    }
+
+    pub fn offset(&self) -> LayoutVector2D {
+        debug_assert!(self.offsets.len() > 0, "There should be at least one sampled offset!");
+
+        if self.has_scroll_linked_effect == HasScrollLinkedEffect::No {
+            // If there's no scroll-linked effect, use the one-frame delay offset.
+            return self.offsets.first().map_or(LayoutVector2D::zero(), |sampled| sampled.offset);
+        }
+
+        match self.offsets.iter().find(|sampled| sampled.generation == self.offset_generation) {
+            // If we found an offset having the same generation, use it.
+            Some(sampled) => sampled.offset,
+            // If we don't have any offset having the same generation, i.e.
+            // the generation of this scroll frame is behind sampled offsets,
+            // use the first queued sampled offset.
+            _ => self.offsets.first().map_or(LayoutVector2D::zero(), |sampled| sampled.offset),
+        }
+    }
+}
+
+/// Contains information about reference frames.
+#[derive(Copy, Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ReferenceFrameInfo {
+    /// The source transform and perspective matrices provided by the stacking context
+    /// that forms this reference frame. We maintain the property binding information
+    /// here so that we can resolve the animated transform and update the tree each
+    /// frame.
+    pub source_transform: PropertyBinding<LayoutTransform>,
+    pub transform_style: TransformStyle,
+    pub kind: ReferenceFrameKind,
+
+    /// The original, not including the transform and relative to the parent reference frame,
+    /// origin of this reference frame. This is already rolled into the `transform' property, but
+    /// we also store it here to properly transform the viewport for sticky positioning.
+    pub origin_in_parent_reference_frame: LayoutVector2D,
+
+    /// True if this is the root reference frame for a given pipeline. This is only used
+    /// by the hit-test code, perhaps we can change the interface to not require this.
+    pub is_pipeline_root: bool,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct StickyFrameInfo {
+    pub frame_rect: LayoutRect,
+    pub margins: SideOffsets2D<Option<f32>, LayoutPixel>,
+    pub vertical_offset_bounds: StickyOffsetBounds,
+    pub horizontal_offset_bounds: StickyOffsetBounds,
+    pub previously_applied_offset: LayoutVector2D,
+    pub current_offset: LayoutVector2D,
+}
+
+impl StickyFrameInfo {
+    pub fn new(
+        frame_rect: LayoutRect,
+        margins: SideOffsets2D<Option<f32>, LayoutPixel>,
+        vertical_offset_bounds: StickyOffsetBounds,
+        horizontal_offset_bounds: StickyOffsetBounds,
+        previously_applied_offset: LayoutVector2D
+    ) -> StickyFrameInfo {
+        StickyFrameInfo {
+            frame_rect,
+            margins,
+            vertical_offset_bounds,
+            horizontal_offset_bounds,
+            previously_applied_offset,
+            current_offset: LayoutVector2D::zero(),
+        }
+    }
+}
+
+#[test]
+fn test_cst_perspective_relative_scroll() {
+    // Verify that when computing the offset from a perspective transform
+    // to a relative scroll node that any external scroll offset is
+    // ignored. This is because external scroll offsets are not
+    // propagated across reference frame boundaries.
+
+    // It's not currently possible to verify this with a wrench reftest,
+    // since wrench doesn't understand external scroll ids. When wrench
+    // supports this, we could also verify with a reftest.
+
+    use crate::spatial_tree::{SceneSpatialTree, SpatialTree};
+    use euclid::Angle;
+
+    let mut cst = SceneSpatialTree::new();
+    let pipeline_id = PipelineId::dummy();
+    let ext_scroll_id = ExternalScrollId(1, pipeline_id);
+    let transform = LayoutTransform::rotation(0.0, 0.0, 1.0, Angle::degrees(45.0));
+    let pid = PipelineInstanceId::new(0);
+
+    let root = cst.add_reference_frame(
+        cst.root_reference_frame_index(),
+        TransformStyle::Flat,
+        PropertyBinding::Value(LayoutTransform::identity()),
+        ReferenceFrameKind::Transform {
+            is_2d_scale_translation: false,
+            should_snap: false,
+            paired_with_perspective: false,
+        },
+        LayoutVector2D::zero(),
+        pipeline_id,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 0), PipelineId::dummy(), pid),
+    );
+
+    let scroll_frame_1 = cst.add_scroll_frame(
+        root,
+        ext_scroll_id,
+        pipeline_id,
+        &LayoutRect::from_size(LayoutSize::new(100.0, 100.0)),
+        &LayoutSize::new(100.0, 500.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::zero(),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 1), PipelineId::dummy(), pid),
+    );
+
+    let scroll_frame_2 = cst.add_scroll_frame(
+        scroll_frame_1,
+        ExternalScrollId(2, pipeline_id),
+        pipeline_id,
+        &LayoutRect::from_size(LayoutSize::new(100.0, 100.0)),
+        &LayoutSize::new(100.0, 500.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::new(0.0, 50.0),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 3), PipelineId::dummy(), pid),
+    );
+
+    let ref_frame = cst.add_reference_frame(
+        scroll_frame_2,
+        TransformStyle::Preserve3D,
+        PropertyBinding::Value(transform),
+        ReferenceFrameKind::Perspective {
+            scrolling_relative_to: Some(ext_scroll_id),
+        },
+        LayoutVector2D::zero(),
+        pipeline_id,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 4), PipelineId::dummy(), pid),
+    );
+
+    let mut st = SpatialTree::new();
+    st.apply_updates(cst.end_frame_and_get_pending_updates());
+    st.update_tree(&SceneProperties::new());
+
+    let world_transform = st.get_world_transform(ref_frame).into_transform().cast_unit();
+    let ref_transform = transform.then_translate(LayoutVector3D::new(0.0, -50.0, 0.0));
+    assert!(world_transform.approx_eq(&ref_transform));
+}
+
diff --git a/gfx/wr/webrender/src/spatial_tree.rs b/gfx/wr/webrender/src/spatial_tree.rs
new file mode 100644
index 0000000000..cc06941fc9
--- /dev/null
+++ b/gfx/wr/webrender/src/spatial_tree.rs
@@ -0,0 +1,1972 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ExternalScrollId, PropertyBinding, ReferenceFrameKind, TransformStyle, PropertyBindingId};
+use api::{APZScrollGeneration, HasScrollLinkedEffect, PipelineId, SampledScrollOffset, SpatialTreeItemKey};
+use api::units::*;
+use euclid::Transform3D;
+use crate::gpu_types::TransformPalette;
+use crate::internal_types::{FastHashMap, FastHashSet, PipelineInstanceId};
+use crate::print_tree::{PrintableTree, PrintTree, PrintTreePrinter};
+use crate::scene::SceneProperties;
+use crate::spatial_node::{ReferenceFrameInfo, SpatialNode, SpatialNodeType, StickyFrameInfo, SpatialNodeDescriptor};
+use crate::spatial_node::{SpatialNodeUid, ScrollFrameKind, SceneSpatialNode, SpatialNodeInfo, SpatialNodeUidKind};
+use std::{ops, u32};
+use crate::util::{FastTransform, LayoutToWorldFastTransform, MatrixHelpers, ScaleOffset, scale_factors};
+use smallvec::SmallVec;
+use std::collections::hash_map::Entry;
+use crate::util::TransformedRectKind;
+
+
+/// An id that identifies coordinate systems in the SpatialTree. Each
+/// coordinate system has an id and those ids will be shared when the coordinates
+/// system are the same or are in the same axis-aligned space. This allows
+/// for optimizing mask generation.
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CoordinateSystemId(pub u32);
+
+/// A node in the hierarchy of coordinate system
+/// transforms.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CoordinateSystem {
+    pub transform: LayoutTransform,
+    pub world_transform: LayoutToWorldTransform,
+    pub should_flatten: bool,
+    pub parent: Option<CoordinateSystemId>,
+}
+
+impl CoordinateSystem {
+    fn root() -> Self {
+        CoordinateSystem {
+            transform: LayoutTransform::identity(),
+            world_transform: LayoutToWorldTransform::identity(),
+            should_flatten: false,
+            parent: None,
+        }
+    }
+}
+
+#[derive(Debug, Copy, Clone, Eq, Hash, MallocSizeOf, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SpatialNodeIndex(pub u32);
+
+impl SpatialNodeIndex {
+    pub const INVALID: SpatialNodeIndex = SpatialNodeIndex(u32::MAX);
+
+    /// May be set on a cluster / picture during scene building if the spatial
+    /// node is not known at this time. It must be set to a valid value before
+    /// scene building is complete (by `finalize_picture`). In future, we could
+    /// make this type-safe with a wrapper type to ensure we know when a spatial
+    /// node index may have an unknown value.
+    pub const UNKNOWN: SpatialNodeIndex = SpatialNodeIndex(u32::MAX - 1);
+}
+
+// In some cases, the conversion from CSS pixels to device pixels can result in small
+// rounding errors when calculating the scrollable distance of a scroll frame. Apply
+// a small epsilon so that we don't detect these frames as "real" scroll frames.
+const MIN_SCROLLABLE_AMOUNT: f32 = 0.01;
+
+// The minimum size for a scroll frame for it to be considered for a scroll root.
+const MIN_SCROLL_ROOT_SIZE: f32 = 128.0;
+
+impl SpatialNodeIndex {
+    pub fn new(index: usize) -> Self {
+        debug_assert!(index < ::std::u32::MAX as usize);
+        SpatialNodeIndex(index as u32)
+    }
+}
+
+impl CoordinateSystemId {
+    pub fn root() -> Self {
+        CoordinateSystemId(0)
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum VisibleFace {
+    Front,
+    Back,
+}
+
+impl Default for VisibleFace {
+    fn default() -> Self {
+        VisibleFace::Front
+    }
+}
+
+impl ops::Not for VisibleFace {
+    type Output = Self;
+    fn not(self) -> Self {
+        match self {
+            VisibleFace::Front => VisibleFace::Back,
+            VisibleFace::Back => VisibleFace::Front,
+        }
+    }
+}
+
+/// Allows functions and methods to retrieve common information about
+/// a spatial node, whether during scene or frame building
+pub trait SpatialNodeContainer {
+    /// Get the common information for a given spatial node
+    fn get_node_info(&self, index: SpatialNodeIndex) -> SpatialNodeInfo;
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+enum StoreElement<T> {
+    Empty,
+    Occupied(T),
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct Store<T> {
+    elements: Vec<StoreElement<T>>,
+    free_indices: Vec<usize>,
+}
+
+impl<T> Store<T> {
+    fn new() -> Self {
+        Store {
+            elements: Vec::new(),
+            free_indices: Vec::new(),
+        }
+    }
+
+    fn insert(&mut self, element: T) -> usize {
+        match self.free_indices.pop() {
+            Some(index) => {
+                match &mut self.elements[index] {
+                    e @ StoreElement::Empty => *e = StoreElement::Occupied(element),
+                    StoreElement::Occupied(..) => panic!("bug: slot already occupied"),
+                };
+                index
+            }
+            None => {
+                let index = self.elements.len();
+                self.elements.push(StoreElement::Occupied(element));
+                index
+            }
+        }
+    }
+
+    fn set(&mut self, index: usize, element: T) {
+        match &mut self.elements[index] {
+            StoreElement::Empty => panic!("bug: set on empty element!"),
+            StoreElement::Occupied(ref mut entry) => *entry = element,
+        }
+    }
+
+    fn free(&mut self, index: usize) -> T {
+        self.free_indices.push(index);
+
+        let value = std::mem::replace(&mut self.elements[index], StoreElement::Empty);
+
+        match value {
+            StoreElement::Occupied(value) => value,
+            StoreElement::Empty => panic!("bug: freeing an empty slot"),
+        }
+    }
+}
+
+impl<T> ops::Index<usize> for Store<T> {
+    type Output = T;
+    fn index(&self, index: usize) -> &Self::Output {
+        match self.elements[index] {
+            StoreElement::Occupied(ref e) => e,
+            StoreElement::Empty => panic!("bug: indexing an empty element!"),
+        }
+    }
+}
+
+impl<T> ops::IndexMut<usize> for Store<T> {
+    fn index_mut(&mut self, index: usize) -> &mut T {
+        match self.elements[index] {
+            StoreElement::Occupied(ref mut e) => e,
+            StoreElement::Empty => panic!("bug: indexing an empty element!"),
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct SpatialNodeEntry {
+    index: usize,
+    last_used: u64,
+}
+
+/// The representation of the spatial tree during scene building, which is
+/// mostly write-only, with a small number of queries for snapping,
+/// picture cache building
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SceneSpatialTree {
+    /// Nodes which determine the positions (offsets and transforms) for primitives
+    /// and clips.
+    spatial_nodes: Store<SceneSpatialNode>,
+
+    /// A set of the uids we've encountered for spatial nodes, used to assert that
+    /// we're not seeing duplicates. Likely to be removed once we rely on this feature.
+    spatial_node_map: FastHashMap<SpatialNodeUid, SpatialNodeEntry>,
+
+    root_reference_frame_index: SpatialNodeIndex,
+
+    frame_counter: u64,
+    updates: SpatialTreeUpdates,
+
+    /// A debug check that the caller never adds a spatial node with duplicate
+    /// uid, since that can cause badness if it occurs (e.g. a malformed spatial
+    /// tree and infinite loops in is_ancestor etc)
+    spatial_nodes_set: FastHashSet<SpatialNodeUid>,
+}
+
+impl SpatialNodeContainer for SceneSpatialTree {
+    fn get_node_info(&self, index: SpatialNodeIndex) -> SpatialNodeInfo {
+        let node = &self.spatial_nodes[index.0 as usize];
+
+        SpatialNodeInfo {
+            parent: node.parent,
+            node_type: &node.descriptor.node_type,
+            snapping_transform: node.snapping_transform,
+        }
+    }
+}
+
+impl SceneSpatialTree {
+    pub fn new() -> Self {
+        let mut tree = SceneSpatialTree {
+            spatial_nodes: Store::new(),
+            spatial_node_map: FastHashMap::default(),
+            root_reference_frame_index: SpatialNodeIndex(0),
+            frame_counter: 0,
+            updates: SpatialTreeUpdates::new(),
+            spatial_nodes_set: FastHashSet::default(),
+        };
+
+        let node = SceneSpatialNode::new_reference_frame(
+            None,
+            TransformStyle::Flat,
+            PropertyBinding::Value(LayoutTransform::identity()),
+            ReferenceFrameKind::Transform {
+                should_snap: true,
+                is_2d_scale_translation: true,
+                paired_with_perspective: false,
+            },
+            LayoutVector2D::zero(),
+            PipelineId::dummy(),
+            true,
+            true,
+        );
+
+        tree.add_spatial_node(node, SpatialNodeUid::root());
+
+        tree
+    }
+
+    pub fn is_root_coord_system(&self, index: SpatialNodeIndex) -> bool {
+        self.spatial_nodes[index.0 as usize].is_root_coord_system
+    }
+
+    /// Complete building this scene, return the updates to apply to the frame spatial tree
+    pub fn end_frame_and_get_pending_updates(&mut self) -> SpatialTreeUpdates {
+        self.updates.root_reference_frame_index = self.root_reference_frame_index;
+        self.spatial_nodes_set.clear();
+
+        let now = self.frame_counter;
+        let spatial_nodes = &mut self.spatial_nodes;
+        let updates = &mut self.updates;
+
+        self.spatial_node_map.get_mut(&SpatialNodeUid::root()).unwrap().last_used = now;
+
+        self.spatial_node_map.retain(|_, entry| {
+            if entry.last_used + 10 < now {
+                spatial_nodes.free(entry.index);
+                updates.updates.push(SpatialTreeUpdate::Remove {
+                    index: entry.index,
+                });
+                return false;
+            }
+
+            true
+        });
+
+        let updates = std::mem::replace(&mut self.updates, SpatialTreeUpdates::new());
+
+        self.frame_counter += 1;
+
+        updates
+    }
+
+    /// Check if a given spatial node is an ancestor of another spatial node.
+    pub fn is_ancestor(
+        &self,
+        maybe_parent: SpatialNodeIndex,
+        maybe_child: SpatialNodeIndex,
+    ) -> bool {
+        // Early out if same node
+        if maybe_parent == maybe_child {
+            return false;
+        }
+
+        let mut current_node = maybe_child;
+
+        while current_node != self.root_reference_frame_index {
+            let node = self.get_node_info(current_node);
+            current_node = node.parent.expect("bug: no parent");
+
+            if current_node == maybe_parent {
+                return true;
+            }
+        }
+
+        false
+    }
+
+    /// Find the spatial node that is the scroll root for a given spatial node.
+    /// A scroll root is the first spatial node when found travelling up the
+    /// spatial node tree that is an explicit scroll frame.
+    pub fn find_scroll_root(
+        &self,
+        spatial_node_index: SpatialNodeIndex,
+    ) -> SpatialNodeIndex {
+        let mut real_scroll_root = self.root_reference_frame_index;
+        let mut outermost_scroll_root = self.root_reference_frame_index;
+        let mut node_index = spatial_node_index;
+
+        while node_index != self.root_reference_frame_index {
+            let node = self.get_node_info(node_index);
+            match node.node_type {
+                SpatialNodeType::ReferenceFrame(ref info) => {
+                    match info.kind {
+                        ReferenceFrameKind::Transform { is_2d_scale_translation: true, .. } => {
+                            // We can handle scroll nodes that pass through a 2d scale/translation node
+                        }
+                        ReferenceFrameKind::Transform { is_2d_scale_translation: false, .. } |
+                        ReferenceFrameKind::Perspective { .. } => {
+                            // When a reference frame is encountered, forget any scroll roots
+                            // we have encountered, as they may end up with a non-axis-aligned transform.
+                            real_scroll_root = self.root_reference_frame_index;
+                            outermost_scroll_root = self.root_reference_frame_index;
+                        }
+                    }
+                }
+                SpatialNodeType::StickyFrame(..) => {}
+                SpatialNodeType::ScrollFrame(ref info) => {
+                    match info.frame_kind {
+                        ScrollFrameKind::PipelineRoot { is_root_pipeline } => {
+                            // Once we encounter a pipeline root, there is no need to look further
+                            if is_root_pipeline {
+                                break;
+                            }
+                        }
+                        ScrollFrameKind::Explicit => {
+                            // Store the closest scroll root we find to the root, for use
+                            // later on, even if it's not actually scrollable.
+                            outermost_scroll_root = node_index;
+
+                            // If the scroll root has no scrollable area, we don't want to
+                            // consider it. This helps pages that have a nested scroll root
+                            // within a redundant scroll root to avoid selecting the wrong
+                            // reference spatial node for a picture cache.
+                            if info.scrollable_size.width > MIN_SCROLLABLE_AMOUNT ||
+                               info.scrollable_size.height > MIN_SCROLLABLE_AMOUNT {
+                                // Since we are skipping redundant scroll roots, we may end up
+                                // selecting inner scroll roots that are very small. There is
+                                // no performance benefit to creating a slice for these roots,
+                                // as they are cheap to rasterize. The size comparison is in
+                                // local-space, but makes for a reasonable estimate. The value
+                                // is arbitrary, but is generally small enough to ignore things
+                                // like scroll roots around text input elements.
+                                if info.viewport_rect.width() > MIN_SCROLL_ROOT_SIZE &&
+                                   info.viewport_rect.height() > MIN_SCROLL_ROOT_SIZE {
+                                    // If we've found a root that is scrollable, and a reasonable
+                                    // size, select that as the current root for this node
+                                    real_scroll_root = node_index;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            node_index = node.parent.expect("unable to find parent node");
+        }
+
+        // If we didn't find any real (scrollable) frames, then return the outermost
+        // redundant scroll frame. This is important so that we can correctly find
+        // the clips defined on the content which should be handled when drawing the
+        // picture cache tiles (by definition these clips are ancestors of the
+        // scroll root selected for the picture cache).
+        if real_scroll_root == self.root_reference_frame_index {
+            outermost_scroll_root
+        } else {
+            real_scroll_root
+        }
+    }
+
+    /// The root reference frame, which is the true root of the SpatialTree.
+    pub fn root_reference_frame_index(&self) -> SpatialNodeIndex {
+        self.root_reference_frame_index
+    }
+
+    fn add_spatial_node(
+        &mut self,
+        mut node: SceneSpatialNode,
+        uid: SpatialNodeUid,
+    ) -> SpatialNodeIndex {
+        let parent_snapping_transform = match node.parent {
+            Some(parent_index) => {
+                self.get_node_info(parent_index).snapping_transform
+            }
+            None => {
+                Some(ScaleOffset::identity())
+            }
+        };
+
+        node.snapping_transform = calculate_snapping_transform(
+            parent_snapping_transform,
+            &node.descriptor.node_type,
+        );
+
+        // Ensure a node with the same uid hasn't been added during this scene build
+        assert!(self.spatial_nodes_set.insert(uid), "duplicate key {:?}", uid);
+
+        let index = match self.spatial_node_map.entry(uid) {
+            Entry::Occupied(mut e) => {
+                let e = e.get_mut();
+                e.last_used = self.frame_counter;
+
+                let existing_node = &self.spatial_nodes[e.index];
+
+                if *existing_node != node {
+                    self.updates.updates.push(SpatialTreeUpdate::Update {
+                        index: e.index,
+                        parent: node.parent,
+                        descriptor: node.descriptor.clone(),
+                    });
+                    self.spatial_nodes.set(e.index, node);
+                }
+
+                e.index
+            }
+            Entry::Vacant(e) => {
+                let descriptor = node.descriptor.clone();
+                let parent = node.parent;
+
+                let index = self.spatial_nodes.insert(node);
+
+                e.insert(SpatialNodeEntry {
+                    index,
+                    last_used: self.frame_counter,
+                });
+
+                self.updates.updates.push(SpatialTreeUpdate::Insert {
+                    index,
+                    descriptor,
+                    parent,
+                });
+
+                index
+            }
+        };
+
+        SpatialNodeIndex(index as u32)
+    }
+
+    pub fn add_reference_frame(
+        &mut self,
+        parent_index: SpatialNodeIndex,
+        transform_style: TransformStyle,
+        source_transform: PropertyBinding<LayoutTransform>,
+        kind: ReferenceFrameKind,
+        origin_in_parent_reference_frame: LayoutVector2D,
+        pipeline_id: PipelineId,
+        uid: SpatialNodeUid,
+    ) -> SpatialNodeIndex {
+        // Determine if this reference frame creates a new static coordinate system
+        let new_static_coord_system = match kind {
+            ReferenceFrameKind::Transform { is_2d_scale_translation: true, .. } => {
+                // Client has guaranteed this transform will only be axis-aligned
+                false
+            }
+            ReferenceFrameKind::Transform { is_2d_scale_translation: false, .. } | ReferenceFrameKind::Perspective { .. } => {
+                // Even if client hasn't promised it's an axis-aligned transform, we can still
+                // check this so long as the transform isn't animated (and thus could change to
+                // anything by APZ during frame building)
+                match source_transform {
+                    PropertyBinding::Value(m) => {
+                        !m.is_2d_scale_translation()
+                    }
+                    PropertyBinding::Binding(..) => {
+                        // Animated, so assume it may introduce a complex transform
+                        true
+                    }
+                }
+            }
+        };
+
+        let is_root_coord_system = !new_static_coord_system &&
+            self.spatial_nodes[parent_index.0 as usize].is_root_coord_system;
+        let is_pipeline_root = match uid.kind {
+            SpatialNodeUidKind::InternalReferenceFrame { .. } => true,
+            _ => false,
+        };
+
+        let node = SceneSpatialNode::new_reference_frame(
+            Some(parent_index),
+            transform_style,
+            source_transform,
+            kind,
+            origin_in_parent_reference_frame,
+            pipeline_id,
+            is_root_coord_system,
+            is_pipeline_root,
+        );
+        self.add_spatial_node(node, uid)
+    }
+
+    pub fn add_scroll_frame(
+        &mut self,
+        parent_index: SpatialNodeIndex,
+        external_id: ExternalScrollId,
+        pipeline_id: PipelineId,
+        frame_rect: &LayoutRect,
+        content_size: &LayoutSize,
+        frame_kind: ScrollFrameKind,
+        external_scroll_offset: LayoutVector2D,
+        scroll_offset_generation: APZScrollGeneration,
+        has_scroll_linked_effect: HasScrollLinkedEffect,
+        uid: SpatialNodeUid,
+    ) -> SpatialNodeIndex {
+        // Scroll frames are only 2d translations - they can't introduce a new static coord system
+        let is_root_coord_system = self.spatial_nodes[parent_index.0 as usize].is_root_coord_system;
+
+        let node = SceneSpatialNode::new_scroll_frame(
+            pipeline_id,
+            parent_index,
+            external_id,
+            frame_rect,
+            content_size,
+            frame_kind,
+            external_scroll_offset,
+            scroll_offset_generation,
+            has_scroll_linked_effect,
+            is_root_coord_system,
+        );
+        self.add_spatial_node(node, uid)
+    }
+
+    pub fn add_sticky_frame(
+        &mut self,
+        parent_index: SpatialNodeIndex,
+        sticky_frame_info: StickyFrameInfo,
+        pipeline_id: PipelineId,
+        key: SpatialTreeItemKey,
+        instance_id: PipelineInstanceId,
+    ) -> SpatialNodeIndex {
+        // Sticky frames are only 2d translations - they can't introduce a new static coord system
+        let is_root_coord_system = self.spatial_nodes[parent_index.0 as usize].is_root_coord_system;
+        let uid = SpatialNodeUid::external(key, pipeline_id, instance_id);
+
+        let node = SceneSpatialNode::new_sticky_frame(
+            parent_index,
+            sticky_frame_info,
+            pipeline_id,
+            is_root_coord_system,
+        );
+        self.add_spatial_node(node, uid)
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum SpatialTreeUpdate {
+    Insert {
+        index: usize,
+        parent: Option<SpatialNodeIndex>,
+        descriptor: SpatialNodeDescriptor,
+    },
+    Update {
+        index: usize,
+        parent: Option<SpatialNodeIndex>,
+        descriptor: SpatialNodeDescriptor,
+    },
+    Remove {
+        index: usize,
+    },
+}
+
+/// The delta updates to apply after building a new scene to the retained frame building
+/// tree.
+// TODO(gw): During the initial scaffolding work, this is the exact same as previous
+//           behavior - that is, a complete list of new spatial nodes. In future, this
+//           will instead be a list of deltas to apply to the frame spatial tree.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SpatialTreeUpdates {
+    root_reference_frame_index: SpatialNodeIndex,
+    updates: Vec<SpatialTreeUpdate>,
+}
+
+impl SpatialTreeUpdates {
+    fn new() -> Self {
+        SpatialTreeUpdates {
+            root_reference_frame_index: SpatialNodeIndex::INVALID,
+            updates: Vec::new(),
+        }
+    }
+}
+
+/// Represents the spatial tree during frame building, which is mostly
+/// read-only, apart from the tree update at the start of the frame
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SpatialTree {
+    /// Nodes which determine the positions (offsets and transforms) for primitives
+    /// and clips.
+    spatial_nodes: Vec<SpatialNode>,
+
+    /// A list of transforms that establish new coordinate systems.
+    /// Spatial nodes only establish a new coordinate system when
+    /// they have a transform that is not a simple 2d translation.
+    coord_systems: Vec<CoordinateSystem>,
+
+    root_reference_frame_index: SpatialNodeIndex,
+
+    /// Stack of current state for each parent node while traversing and updating tree
+    update_state_stack: Vec<TransformUpdateState>,
+}
+
+#[derive(Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct TransformUpdateState {
+    pub parent_reference_frame_transform: LayoutToWorldFastTransform,
+    pub parent_accumulated_scroll_offset: LayoutVector2D,
+    pub nearest_scrolling_ancestor_offset: LayoutVector2D,
+    pub nearest_scrolling_ancestor_viewport: LayoutRect,
+
+    /// An id for keeping track of the axis-aligned space of this node. This is used in
+    /// order to to track what kinds of clip optimizations can be done for a particular
+    /// display list item, since optimizations can usually only be done among
+    /// coordinate systems which are relatively axis aligned.
+    pub current_coordinate_system_id: CoordinateSystemId,
+
+    /// Scale and offset from the coordinate system that started this compatible coordinate system.
+    pub coordinate_system_relative_scale_offset: ScaleOffset,
+
+    /// True if this node is transformed by an invertible transform.  If not, display items
+    /// transformed by this node will not be displayed and display items not transformed by this
+    /// node will not be clipped by clips that are transformed by this node.
+    pub invertible: bool,
+
+    /// True if this node is a part of Preserve3D hierarchy.
+    pub preserves_3d: bool,
+
+    /// True if the any parent nodes are currently zooming
+    pub is_ancestor_or_self_zooming: bool,
+
+    /// Set to true if this state represents a scroll node with external id
+    pub external_id: Option<ExternalScrollId>,
+
+    /// The node scroll offset if this state is a scroll/sticky node. Zero if a reference frame.
+    pub scroll_offset: LayoutVector2D,
+}
+
+/// Transformation between two nodes in the spatial tree that can sometimes be
+/// encoded more efficiently than with a full matrix.
+#[derive(Debug, Clone)]
+pub enum CoordinateSpaceMapping<Src, Dst> {
+    Local,
+    ScaleOffset(ScaleOffset),
+    Transform(Transform3D<f32, Src, Dst>),
+}
+
+impl<Src, Dst> CoordinateSpaceMapping<Src, Dst> {
+    pub fn into_transform(self) -> Transform3D<f32, Src, Dst> {
+        match self {
+            CoordinateSpaceMapping::Local => Transform3D::identity(),
+            CoordinateSpaceMapping::ScaleOffset(scale_offset) => scale_offset.to_transform(),
+            CoordinateSpaceMapping::Transform(transform) => transform,
+        }
+    }
+
+    pub fn into_fast_transform(self) -> FastTransform<Src, Dst> {
+        match self {
+            CoordinateSpaceMapping::Local => FastTransform::identity(),
+            CoordinateSpaceMapping::ScaleOffset(scale_offset) => FastTransform::with_scale_offset(scale_offset),
+            CoordinateSpaceMapping::Transform(transform) => FastTransform::with_transform(transform),
+        }
+    }
+
+    pub fn is_perspective(&self) -> bool {
+        match *self {
+            CoordinateSpaceMapping::Local |
+            CoordinateSpaceMapping::ScaleOffset(_) => false,
+            CoordinateSpaceMapping::Transform(ref transform) => transform.has_perspective_component(),
+        }
+    }
+
+    pub fn is_2d_axis_aligned(&self) -> bool {
+        match *self {
+            CoordinateSpaceMapping::Local |
+            CoordinateSpaceMapping::ScaleOffset(_) => true,
+            CoordinateSpaceMapping::Transform(ref transform) => transform.preserves_2d_axis_alignment(),
+        }
+    }
+
+    pub fn scale_factors(&self) -> (f32, f32) {
+        match *self {
+            CoordinateSpaceMapping::Local => (1.0, 1.0),
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => (scale_offset.scale.x.abs(), scale_offset.scale.y.abs()),
+            CoordinateSpaceMapping::Transform(ref transform) => scale_factors(transform),
+        }
+    }
+
+    pub fn inverse(&self) -> Option<CoordinateSpaceMapping<Dst, Src>> {
+        match *self {
+            CoordinateSpaceMapping::Local => Some(CoordinateSpaceMapping::Local),
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
+                Some(CoordinateSpaceMapping::ScaleOffset(scale_offset.inverse()))
+            }
+            CoordinateSpaceMapping::Transform(ref transform) => {
+                transform.inverse().map(CoordinateSpaceMapping::Transform)
+            }
+        }
+    }
+}
+
+enum TransformScroll {
+    Scrolled,
+    Unscrolled,
+}
+
+impl SpatialNodeContainer for SpatialTree {
+    fn get_node_info(&self, index: SpatialNodeIndex) -> SpatialNodeInfo {
+        let node = self.get_spatial_node(index);
+
+        SpatialNodeInfo {
+            parent: node.parent,
+            node_type: &node.node_type,
+            snapping_transform: node.snapping_transform,
+        }
+    }
+}
+
+impl SpatialTree {
+    pub fn new() -> Self {
+        SpatialTree {
+            spatial_nodes: Vec::new(),
+            coord_systems: Vec::new(),
+            root_reference_frame_index: SpatialNodeIndex::INVALID,
+            update_state_stack: Vec::new(),
+        }
+    }
+
+    fn visit_node_impl_mut<F>(
+        &mut self,
+        index: SpatialNodeIndex,
+        f: &mut F,
+    ) where F: FnMut(SpatialNodeIndex, &mut SpatialNode) {
+        let mut child_indices: SmallVec<[SpatialNodeIndex; 8]> = SmallVec::new();
+
+        let node = self.get_spatial_node_mut(index);
+        f(index, node);
+        child_indices.extend_from_slice(&node.children);
+
+        for child_index in child_indices {
+            self.visit_node_impl_mut(child_index, f);
+        }
+    }
+
+    fn visit_node_impl<F>(
+        &self,
+        index: SpatialNodeIndex,
+        f: &mut F,
+    ) where F: FnMut(SpatialNodeIndex, &SpatialNode) {
+        let node = self.get_spatial_node(index);
+
+        f(index, node);
+
+        for child_index in &node.children {
+            self.visit_node_impl(*child_index, f);
+        }
+    }
+
+    /// Visit all nodes from the root of the tree, invoking a closure on each one
+    pub fn visit_nodes<F>(&self, mut f: F) where F: FnMut(SpatialNodeIndex, &SpatialNode) {
+        if self.root_reference_frame_index == SpatialNodeIndex::INVALID {
+            return;
+        }
+
+        self.visit_node_impl(self.root_reference_frame_index, &mut f);
+    }
+
+    /// Visit all nodes from the root of the tree, invoking a closure on each one
+    pub fn visit_nodes_mut<F>(&mut self, mut f: F) where F: FnMut(SpatialNodeIndex, &mut SpatialNode) {
+        if self.root_reference_frame_index == SpatialNodeIndex::INVALID {
+            return;
+        }
+
+        self.visit_node_impl_mut(self.root_reference_frame_index, &mut f);
+    }
+
+    /// Apply updates from a new scene to the frame spatial tree
+    pub fn apply_updates(
+        &mut self,
+        updates: SpatialTreeUpdates,
+    ) {
+        self.root_reference_frame_index = updates.root_reference_frame_index;
+
+        for update in updates.updates {
+            match update {
+                SpatialTreeUpdate::Insert { index, parent, descriptor } => {
+                    if let Some(parent) = parent {
+                        self.get_spatial_node_mut(parent).add_child(SpatialNodeIndex(index as u32));
+                    }
+
+                    let node = SpatialNode {
+                        viewport_transform: ScaleOffset::identity(),
+                        content_transform: ScaleOffset::identity(),
+                        snapping_transform: None,
+                        coordinate_system_id: CoordinateSystemId(0),
+                        transform_kind: TransformedRectKind::AxisAligned,
+                        parent,
+                        children: Vec::new(),
+                        pipeline_id: descriptor.pipeline_id,
+                        node_type: descriptor.node_type,
+                        invertible: true,
+                        is_async_zooming: false,
+                        is_ancestor_or_self_zooming: false,
+                    };
+
+                    assert!(index <= self.spatial_nodes.len());
+                    if index < self.spatial_nodes.len() {
+                        self.spatial_nodes[index] = node;
+                    } else {
+                        self.spatial_nodes.push(node);
+                    }
+                }
+                SpatialTreeUpdate::Update { index, descriptor, parent } => {
+                    let current_parent = self.spatial_nodes[index].parent;
+
+                    if current_parent != parent {
+                        if let Some(current_parent) = current_parent {
+                            let i = self.spatial_nodes[current_parent.0 as usize]
+                                .children
+                                .iter()
+                                .position(|e| e.0 as usize == index)
+                                .expect("bug: not found!");
+                            self.spatial_nodes[current_parent.0 as usize].children.remove(i);
+                        }
+
+                        let new_parent = parent.expect("todo: is this valid?");
+                        self.spatial_nodes[new_parent.0 as usize].add_child(SpatialNodeIndex(index as u32));
+                    }
+
+                    let node = &mut self.spatial_nodes[index];
+
+                    node.node_type = descriptor.node_type;
+                    node.pipeline_id = descriptor.pipeline_id;
+                    node.parent = parent;
+                }
+                SpatialTreeUpdate::Remove { index, .. } => {
+                    let node = &mut self.spatial_nodes[index];
+
+                    // Set the pipeline id to be invalid, so that even though this array
+                    // entry still exists we can easily see it's invalid when debugging.
+                    node.pipeline_id = PipelineId::dummy();
+
+                    if let Some(parent) = node.parent {
+                        let i = self.spatial_nodes[parent.0 as usize]
+                            .children
+                            .iter()
+                            .position(|e| e.0 as usize == index)
+                            .expect("bug: not found!");
+                        self.spatial_nodes[parent.0 as usize].children.remove(i);
+                    }
+                }
+            }
+        }
+
+        self.visit_nodes_mut(|_, node| {
+            match node.node_type {
+                SpatialNodeType::ScrollFrame(ref mut info) => {
+                    info.offsets = vec![SampledScrollOffset{
+                        offset: -info.external_scroll_offset,
+                        generation: info.offset_generation,
+                    }];
+                }
+                SpatialNodeType::StickyFrame(ref mut info) => {
+                    info.current_offset = LayoutVector2D::zero();
+                }
+                SpatialNodeType::ReferenceFrame(..) => {}
+            }
+        });
+    }
+
+    pub fn get_spatial_node(&self, index: SpatialNodeIndex) -> &SpatialNode {
+        &self.spatial_nodes[index.0 as usize]
+    }
+
+    pub fn get_spatial_node_mut(&mut self, index: SpatialNodeIndex) -> &mut SpatialNode {
+        &mut self.spatial_nodes[index.0 as usize]
+    }
+
+    /// Get total number of spatial nodes
+    pub fn spatial_node_count(&self) -> usize {
+        self.spatial_nodes.len()
+    }
+
+    pub fn find_spatial_node_by_anim_id(
+        &self,
+        id: PropertyBindingId,
+    ) -> Option<SpatialNodeIndex> {
+        let mut node_index = None;
+
+        self.visit_nodes(|index, node| {
+            if node.is_transform_bound_to_property(id) {
+                debug_assert!(node_index.is_none());        // Multiple nodes with same anim id
+                node_index = Some(index);
+            }
+        });
+
+        node_index
+    }
+
+    /// Calculate the relative transform from `child_index` to `parent_index`.
+    /// This method will panic if the nodes are not connected!
+    pub fn get_relative_transform(
+        &self,
+        child_index: SpatialNodeIndex,
+        parent_index: SpatialNodeIndex,
+    ) -> CoordinateSpaceMapping<LayoutPixel, LayoutPixel> {
+        self.get_relative_transform_with_face(child_index, parent_index, None)
+    }
+
+    /// Calculate the relative transform from `child_index` to `parent_index`.
+    /// This method will panic if the nodes are not connected!
+    /// Also, switch the visible face to `Back` if at any stage where the
+    /// combined transform is flattened, we see the back face.
+    pub fn get_relative_transform_with_face(
+        &self,
+        child_index: SpatialNodeIndex,
+        parent_index: SpatialNodeIndex,
+        mut visible_face: Option<&mut VisibleFace>,
+    ) -> CoordinateSpaceMapping<LayoutPixel, LayoutPixel> {
+        if child_index == parent_index {
+            return CoordinateSpaceMapping::Local;
+        }
+
+        let child = self.get_spatial_node(child_index);
+        let parent = self.get_spatial_node(parent_index);
+
+        // TODO(gw): We expect this never to fail, but it's possible that it might due to
+        //           either (a) a bug in WR / Gecko, or (b) some obscure real-world content
+        //           that we're unaware of. If we ever hit this, please open a bug with any
+        //           repro steps!
+        assert!(
+            child.coordinate_system_id.0 >= parent.coordinate_system_id.0,
+            "bug: this is an unexpected case - please open a bug and talk to #gfx team!",
+        );
+
+        if child.coordinate_system_id == parent.coordinate_system_id {
+            let scale_offset = parent.content_transform
+                .inverse()
+                .accumulate(&child.content_transform);
+            return CoordinateSpaceMapping::ScaleOffset(scale_offset);
+        }
+
+        let mut coordinate_system_id = child.coordinate_system_id;
+        let mut transform = child.content_transform.to_transform();
+
+        // we need to update the associated parameters of a transform in two cases:
+        // 1) when the flattening happens, so that we don't lose that original 3D aspects
+        // 2) when we reach the end of iteration, so that our result is up to date
+
+        while coordinate_system_id != parent.coordinate_system_id {
+            let coord_system = &self.coord_systems[coordinate_system_id.0 as usize];
+
+            if coord_system.should_flatten {
+                if let Some(ref mut face) = visible_face {
+                    if transform.is_backface_visible() {
+                        **face = VisibleFace::Back;
+                    }
+                }
+                transform.flatten_z_output();
+            }
+
+            coordinate_system_id = coord_system.parent.expect("invalid parent!");
+            transform = transform.then(&coord_system.transform);
+        }
+
+        transform = transform.then(
+            &parent.content_transform
+                .inverse()
+                .to_transform(),
+        );
+        if let Some(face) = visible_face {
+            if transform.is_backface_visible() {
+                *face = VisibleFace::Back;
+            }
+        }
+
+        CoordinateSpaceMapping::Transform(transform)
+    }
+
+    /// Returns true if both supplied spatial nodes are in the same coordinate system
+    /// (implies the relative transform produce axis-aligned rects).
+    pub fn is_matching_coord_system(
+        &self,
+        index0: SpatialNodeIndex,
+        index1: SpatialNodeIndex,
+    ) -> bool {
+        let node0 = self.get_spatial_node(index0);
+        let node1 = self.get_spatial_node(index1);
+
+        node0.coordinate_system_id == node1.coordinate_system_id
+    }
+
+    fn get_world_transform_impl(
+        &self,
+        index: SpatialNodeIndex,
+        scroll: TransformScroll,
+    ) -> CoordinateSpaceMapping<LayoutPixel, WorldPixel> {
+        let child = self.get_spatial_node(index);
+
+        if child.coordinate_system_id.0 == 0 {
+            if index == self.root_reference_frame_index {
+                CoordinateSpaceMapping::Local
+            } else {
+                CoordinateSpaceMapping::ScaleOffset(child.content_transform)
+            }
+        } else {
+            let system = &self.coord_systems[child.coordinate_system_id.0 as usize];
+            let scale_offset = match scroll {
+                TransformScroll::Scrolled => &child.content_transform,
+                TransformScroll::Unscrolled => &child.viewport_transform,
+            };
+            let transform = scale_offset
+                .to_transform()
+                .then(&system.world_transform);
+
+            CoordinateSpaceMapping::Transform(transform)
+        }
+    }
+
+    /// Calculate the relative transform from `index` to the root.
+    pub fn get_world_transform(
+        &self,
+        index: SpatialNodeIndex,
+    ) -> CoordinateSpaceMapping<LayoutPixel, WorldPixel> {
+        self.get_world_transform_impl(index, TransformScroll::Scrolled)
+    }
+
+    /// Calculate the relative transform from `index` to the root.
+    /// Unlike `get_world_transform`, this variant doesn't account for the local scroll offset.
+    pub fn get_world_viewport_transform(
+        &self,
+        index: SpatialNodeIndex,
+    ) -> CoordinateSpaceMapping<LayoutPixel, WorldPixel> {
+        self.get_world_transform_impl(index, TransformScroll::Unscrolled)
+    }
+
+    /// The root reference frame, which is the true root of the SpatialTree.
+    pub fn root_reference_frame_index(&self) -> SpatialNodeIndex {
+        self.root_reference_frame_index
+    }
+
+    pub fn set_scroll_offsets(
+        &mut self,
+        id: ExternalScrollId,
+        offsets: Vec<SampledScrollOffset>,
+    ) -> bool {
+        let mut did_change = false;
+
+        self.visit_nodes_mut(|_, node| {
+            if node.matches_external_id(id) {
+                did_change |= node.set_scroll_offsets(offsets.clone());
+            }
+        });
+
+        did_change
+    }
+
+    pub fn update_tree(
+        &mut self,
+        scene_properties: &SceneProperties,
+    ) {
+        if self.root_reference_frame_index == SpatialNodeIndex::INVALID {
+            return;
+        }
+
+        profile_scope!("update_tree");
+        self.coord_systems.clear();
+        self.coord_systems.push(CoordinateSystem::root());
+
+        let root_node_index = self.root_reference_frame_index();
+        assert!(self.update_state_stack.is_empty());
+
+        let state = TransformUpdateState {
+            parent_reference_frame_transform: LayoutVector2D::zero().into(),
+            parent_accumulated_scroll_offset: LayoutVector2D::zero(),
+            nearest_scrolling_ancestor_offset: LayoutVector2D::zero(),
+            nearest_scrolling_ancestor_viewport: LayoutRect::zero(),
+            current_coordinate_system_id: CoordinateSystemId::root(),
+            coordinate_system_relative_scale_offset: ScaleOffset::identity(),
+            invertible: true,
+            preserves_3d: false,
+            is_ancestor_or_self_zooming: false,
+            external_id: None,
+            scroll_offset: LayoutVector2D::zero(),
+        };
+        self.update_state_stack.push(state);
+
+        self.update_node(
+            root_node_index,
+            scene_properties,
+        );
+
+        self.update_state_stack.pop().unwrap();
+    }
+
+    fn update_node(
+        &mut self,
+        node_index: SpatialNodeIndex,
+        scene_properties: &SceneProperties,
+    ) {
+        let parent_snapping_transform = match self.get_spatial_node(node_index).parent {
+            Some(parent_index) => {
+                self.get_node_info(parent_index).snapping_transform
+            }
+            None => {
+                Some(ScaleOffset::identity())
+            }
+        };
+
+        let node = &mut self.spatial_nodes[node_index.0 as usize];
+
+        node.snapping_transform = calculate_snapping_transform(
+            parent_snapping_transform,
+            &node.node_type,
+        );
+
+        node.update(
+            &self.update_state_stack,
+            &mut self.coord_systems,
+            scene_properties,
+        );
+
+        if !node.children.is_empty() {
+            let mut child_state = self.update_state_stack.last().unwrap().clone();
+            node.prepare_state_for_children(&mut child_state);
+            self.update_state_stack.push(child_state);
+
+            let mut child_indices: SmallVec<[SpatialNodeIndex; 8]> = SmallVec::new();
+            child_indices.extend_from_slice(&node.children);
+
+            for child_index in child_indices {
+                self.update_node(
+                    child_index,
+                    scene_properties,
+                );
+            }
+
+            self.update_state_stack.pop().unwrap();
+        }
+    }
+
+    pub fn build_transform_palette(&self) -> TransformPalette {
+        profile_scope!("build_transform_palette");
+        TransformPalette::new(self.spatial_nodes.len())
+    }
+
+    fn print_node<T: PrintTreePrinter>(
+        &self,
+        index: SpatialNodeIndex,
+        pt: &mut T,
+    ) {
+        let node = self.get_spatial_node(index);
+        match node.node_type {
+            SpatialNodeType::StickyFrame(ref sticky_frame_info) => {
+                pt.new_level(format!("StickyFrame"));
+                pt.add_item(format!("sticky info: {:?}", sticky_frame_info));
+            }
+            SpatialNodeType::ScrollFrame(ref scrolling_info) => {
+                pt.new_level(format!("ScrollFrame"));
+                pt.add_item(format!("viewport: {:?}", scrolling_info.viewport_rect));
+                pt.add_item(format!("scrollable_size: {:?}", scrolling_info.scrollable_size));
+                pt.add_item(format!("scroll offset: {:?}", scrolling_info.offset()));
+                pt.add_item(format!("external_scroll_offset: {:?}", scrolling_info.external_scroll_offset));
+                pt.add_item(format!("offset generation: {:?}", scrolling_info.offset_generation));
+                if scrolling_info.has_scroll_linked_effect == HasScrollLinkedEffect::Yes {
+                    pt.add_item("has scroll-linked effect".to_string());
+                }
+                pt.add_item(format!("kind: {:?}", scrolling_info.frame_kind));
+            }
+            SpatialNodeType::ReferenceFrame(ref info) => {
+                pt.new_level(format!("ReferenceFrame"));
+                pt.add_item(format!("kind: {:?}", info.kind));
+                pt.add_item(format!("transform_style: {:?}", info.transform_style));
+                pt.add_item(format!("source_transform: {:?}", info.source_transform));
+                pt.add_item(format!("origin_in_parent_reference_frame: {:?}", info.origin_in_parent_reference_frame));
+            }
+        }
+
+        pt.add_item(format!("index: {:?}", index));
+        pt.add_item(format!("content_transform: {:?}", node.content_transform));
+        pt.add_item(format!("viewport_transform: {:?}", node.viewport_transform));
+        pt.add_item(format!("snapping_transform: {:?}", node.snapping_transform));
+        pt.add_item(format!("coordinate_system_id: {:?}", node.coordinate_system_id));
+
+        for child_index in &node.children {
+            self.print_node(*child_index, pt);
+        }
+
+        pt.end_level();
+    }
+
+    /// Get the visible face of the transfrom from the specified node to its parent.
+    pub fn get_local_visible_face(&self, node_index: SpatialNodeIndex) -> VisibleFace {
+        let node = self.get_spatial_node(node_index);
+        let mut face = VisibleFace::Front;
+        if let Some(mut parent_index) = node.parent {
+            // Check if the parent is perspective. In CSS, a stacking context may
+            // have both perspective and a regular transformation. Gecko translates the
+            // perspective into a different `nsDisplayPerspective` and `nsDisplayTransform` items.
+            // On WebRender side, we end up with 2 different reference frames:
+            // one has kind of "transform", and it's parented to another of "perspective":
+            // https://searchfox.org/mozilla-central/rev/72c7cef167829b6f1e24cae216fa261934c455fc/layout/generic/nsIFrame.cpp#3716
+            if let SpatialNodeType::ReferenceFrame(ReferenceFrameInfo { kind: ReferenceFrameKind::Transform {
+                paired_with_perspective: true,
+                ..
+            }, .. }) = node.node_type {
+                let parent = self.get_spatial_node(parent_index);
+                match parent.node_type {
+                    SpatialNodeType::ReferenceFrame(ReferenceFrameInfo {
+                        kind: ReferenceFrameKind::Perspective { .. },
+                        ..
+                    }) => {
+                        parent_index = parent.parent.unwrap();
+                    }
+                    _ => {
+                        log::error!("Unexpected parent {:?} is not perspective", parent_index);
+                    }
+                }
+            }
+
+            self.get_relative_transform_with_face(node_index, parent_index, Some(&mut face));
+        }
+        face
+    }
+
+    #[allow(dead_code)]
+    pub fn print(&self) {
+        if self.root_reference_frame_index != SpatialNodeIndex::INVALID {
+            let mut buf = Vec::<u8>::new();
+            {
+                let mut pt = PrintTree::new_with_sink("spatial tree", &mut buf);
+                self.print_with(&mut pt);
+            }
+            // If running in Gecko, set RUST_LOG=webrender::spatial_tree=debug
+            // to get this logging to be emitted to stderr/logcat.
+            debug!("{}", std::str::from_utf8(&buf).unwrap_or("(Tree printer emitted non-utf8)"));
+        }
+    }
+}
+
+impl PrintableTree for SpatialTree {
+    fn print_with<T: PrintTreePrinter>(&self, pt: &mut T) {
+        if self.root_reference_frame_index != SpatialNodeIndex::INVALID {
+            self.print_node(self.root_reference_frame_index(), pt);
+        }
+    }
+}
+
+/// Calculate the accumulated external scroll offset for a given spatial node.
+pub fn get_external_scroll_offset<S: SpatialNodeContainer>(
+    spatial_tree: &S,
+    node_index: SpatialNodeIndex,
+) -> LayoutVector2D {
+    let mut offset = LayoutVector2D::zero();
+    let mut current_node = Some(node_index);
+
+    while let Some(node_index) = current_node {
+        let node_info = spatial_tree.get_node_info(node_index);
+
+        match node_info.node_type {
+            SpatialNodeType::ScrollFrame(ref scrolling) => {
+                offset += scrolling.external_scroll_offset;
+            }
+            SpatialNodeType::StickyFrame(..) => {
+                // Doesn't provide any external scroll offset
+            }
+            SpatialNodeType::ReferenceFrame(..) => {
+                // External scroll offsets are not propagated across
+                // reference frames.
+                break;
+            }
+        }
+
+        current_node = node_info.parent;
+    }
+
+    offset
+}
+
+fn calculate_snapping_transform(
+    parent_snapping_transform: Option<ScaleOffset>,
+    node_type: &SpatialNodeType,
+) -> Option<ScaleOffset> {
+    // We need to incorporate the parent scale/offset with the child.
+    // If the parent does not have a scale/offset, then we know we are
+    // not 2d axis aligned and thus do not need to snap its children
+    // either.
+    let parent_scale_offset = match parent_snapping_transform {
+        Some(parent_snapping_transform) => parent_snapping_transform,
+        None => return None,
+    };
+
+    let scale_offset = match node_type {
+        SpatialNodeType::ReferenceFrame(ref info) => {
+            match info.source_transform {
+                PropertyBinding::Value(ref value) => {
+                    // We can only get a ScaleOffset if the transform is 2d axis
+                    // aligned.
+                    match ScaleOffset::from_transform(value) {
+                        Some(scale_offset) => {
+                            let origin_offset = info.origin_in_parent_reference_frame;
+                            ScaleOffset::from_offset(origin_offset.to_untyped())
+                                .accumulate(&scale_offset)
+                        }
+                        None => return None,
+                    }
+                }
+
+                // Assume animations start at the identity transform for snapping purposes.
+                // We still want to incorporate the reference frame offset however.
+                // TODO(aosmond): Is there a better known starting point?
+                PropertyBinding::Binding(..) => {
+                    let origin_offset = info.origin_in_parent_reference_frame;
+                    ScaleOffset::from_offset(origin_offset.to_untyped())
+                }
+            }
+        }
+        _ => ScaleOffset::identity(),
+    };
+
+    Some(parent_scale_offset.accumulate(&scale_offset))
+}
+
+#[cfg(test)]
+fn add_reference_frame(
+    cst: &mut SceneSpatialTree,
+    parent: SpatialNodeIndex,
+    transform: LayoutTransform,
+    origin_in_parent_reference_frame: LayoutVector2D,
+    key: SpatialTreeItemKey,
+) -> SpatialNodeIndex {
+    let pid = PipelineInstanceId::new(0);
+
+    cst.add_reference_frame(
+        parent,
+        TransformStyle::Preserve3D,
+        PropertyBinding::Value(transform),
+        ReferenceFrameKind::Transform {
+            is_2d_scale_translation: false,
+            should_snap: false,
+            paired_with_perspective: false,
+        },
+        origin_in_parent_reference_frame,
+        PipelineId::dummy(),
+        SpatialNodeUid::external(key, PipelineId::dummy(), pid),
+    )
+}
+
+#[cfg(test)]
+fn test_pt(
+    px: f32,
+    py: f32,
+    cst: &SpatialTree,
+    child: SpatialNodeIndex,
+    parent: SpatialNodeIndex,
+    expected_x: f32,
+    expected_y: f32,
+) {
+    use euclid::approxeq::ApproxEq;
+    const EPSILON: f32 = 0.0001;
+
+    let p = LayoutPoint::new(px, py);
+    let m = cst.get_relative_transform(child, parent).into_transform();
+    let pt = m.transform_point2d(p).unwrap();
+    assert!(pt.x.approx_eq_eps(&expected_x, &EPSILON) &&
+            pt.y.approx_eq_eps(&expected_y, &EPSILON),
+            "p: {:?} -> {:?}\nm={:?}",
+            p, pt, m,
+            );
+}
+
+#[test]
+fn test_cst_simple_translation() {
+    // Basic translations only
+
+    let mut cst = SceneSpatialTree::new();
+    let root_reference_frame_index = cst.root_reference_frame_index();
+
+    let root = add_reference_frame(
+        &mut cst,
+        root_reference_frame_index,
+        LayoutTransform::identity(),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 0),
+    );
+
+    let child1 = add_reference_frame(
+        &mut cst,
+        root,
+        LayoutTransform::translation(100.0, 0.0, 0.0),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 1),
+    );
+
+    let child2 = add_reference_frame(
+        &mut cst,
+        child1,
+        LayoutTransform::translation(0.0, 50.0, 0.0),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 2),
+    );
+
+    let child3 = add_reference_frame(
+        &mut cst,
+        child2,
+        LayoutTransform::translation(200.0, 200.0, 0.0),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 3),
+    );
+
+    let mut st = SpatialTree::new();
+    st.apply_updates(cst.end_frame_and_get_pending_updates());
+    st.update_tree(&SceneProperties::new());
+
+    test_pt(100.0, 100.0, &st, child1, root, 200.0, 100.0);
+    test_pt(100.0, 100.0, &st, child2, root, 200.0, 150.0);
+    test_pt(100.0, 100.0, &st, child2, child1, 100.0, 150.0);
+    test_pt(100.0, 100.0, &st, child3, root, 400.0, 350.0);
+}
+
+#[test]
+fn test_cst_simple_scale() {
+    // Basic scale only
+
+    let mut cst = SceneSpatialTree::new();
+    let root_reference_frame_index = cst.root_reference_frame_index();
+
+    let root = add_reference_frame(
+        &mut cst,
+        root_reference_frame_index,
+        LayoutTransform::identity(),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 0),
+    );
+
+    let child1 = add_reference_frame(
+        &mut cst,
+        root,
+        LayoutTransform::scale(4.0, 1.0, 1.0),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 1),
+    );
+
+    let child2 = add_reference_frame(
+        &mut cst,
+        child1,
+        LayoutTransform::scale(1.0, 2.0, 1.0),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 2),
+    );
+
+    let child3 = add_reference_frame(
+        &mut cst,
+        child2,
+        LayoutTransform::scale(2.0, 2.0, 1.0),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 3),
+    );
+
+    let mut st = SpatialTree::new();
+    st.apply_updates(cst.end_frame_and_get_pending_updates());
+    st.update_tree(&SceneProperties::new());
+
+    test_pt(100.0, 100.0, &st, child1, root, 400.0, 100.0);
+    test_pt(100.0, 100.0, &st, child2, root, 400.0, 200.0);
+    test_pt(100.0, 100.0, &st, child3, root, 800.0, 400.0);
+    test_pt(100.0, 100.0, &st, child2, child1, 100.0, 200.0);
+    test_pt(100.0, 100.0, &st, child3, child1, 200.0, 400.0);
+}
+
+#[test]
+fn test_cst_scale_translation() {
+    // Scale + translation
+
+    let mut cst = SceneSpatialTree::new();
+    let root_reference_frame_index = cst.root_reference_frame_index();
+
+    let root = add_reference_frame(
+        &mut cst,
+        root_reference_frame_index,
+        LayoutTransform::identity(),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 0),
+    );
+
+    let child1 = add_reference_frame(
+        &mut cst,
+        root,
+        LayoutTransform::translation(100.0, 50.0, 0.0),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 1),
+    );
+
+    let child2 = add_reference_frame(
+        &mut cst,
+        child1,
+        LayoutTransform::scale(2.0, 4.0, 1.0),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 2),
+    );
+
+    let child3 = add_reference_frame(
+        &mut cst,
+        child2,
+        LayoutTransform::translation(200.0, -100.0, 0.0),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 3),
+    );
+
+    let child4 = add_reference_frame(
+        &mut cst,
+        child3,
+        LayoutTransform::scale(3.0, 2.0, 1.0),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 4),
+    );
+
+    let mut st = SpatialTree::new();
+    st.apply_updates(cst.end_frame_and_get_pending_updates());
+    st.update_tree(&SceneProperties::new());
+
+    test_pt(100.0, 100.0, &st, child1, root, 200.0, 150.0);
+    test_pt(100.0, 100.0, &st, child2, root, 300.0, 450.0);
+    test_pt(100.0, 100.0, &st, child4, root, 1100.0, 450.0);
+
+    test_pt(0.0, 0.0, &st, child4, child1, 400.0, -400.0);
+    test_pt(100.0, 100.0, &st, child4, child1, 1000.0, 400.0);
+    test_pt(100.0, 100.0, &st, child2, child1, 200.0, 400.0);
+
+    test_pt(100.0, 100.0, &st, child3, child1, 600.0, 0.0);
+}
+
+#[test]
+fn test_cst_translation_rotate() {
+    // Rotation + translation
+    use euclid::Angle;
+
+    let mut cst = SceneSpatialTree::new();
+    let root_reference_frame_index = cst.root_reference_frame_index();
+
+    let root = add_reference_frame(
+        &mut cst,
+        root_reference_frame_index,
+        LayoutTransform::identity(),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 0),
+    );
+
+    let child1 = add_reference_frame(
+        &mut cst,
+        root,
+        LayoutTransform::rotation(0.0, 0.0, 1.0, Angle::degrees(-90.0)),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 1),
+    );
+
+    let mut st = SpatialTree::new();
+    st.apply_updates(cst.end_frame_and_get_pending_updates());
+    st.update_tree(&SceneProperties::new());
+
+    test_pt(100.0, 0.0, &st, child1, root, 0.0, -100.0);
+}
+
+#[test]
+fn test_is_ancestor1() {
+    let mut st = SceneSpatialTree::new();
+    let root_reference_frame_index = st.root_reference_frame_index();
+
+    let root = add_reference_frame(
+        &mut st,
+        root_reference_frame_index,
+        LayoutTransform::identity(),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 0),
+    );
+
+    let child1_0 = add_reference_frame(
+        &mut st,
+        root,
+        LayoutTransform::identity(),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 1),
+    );
+
+    let child1_1 = add_reference_frame(
+        &mut st,
+        child1_0,
+        LayoutTransform::identity(),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 2),
+    );
+
+    let child2 = add_reference_frame(
+        &mut st,
+        root,
+        LayoutTransform::identity(),
+        LayoutVector2D::zero(),
+        SpatialTreeItemKey::new(0, 3),
+    );
+
+    assert!(!st.is_ancestor(root, root));
+    assert!(!st.is_ancestor(child1_0, child1_0));
+    assert!(!st.is_ancestor(child1_1, child1_1));
+    assert!(!st.is_ancestor(child2, child2));
+
+    assert!(st.is_ancestor(root, child1_0));
+    assert!(st.is_ancestor(root, child1_1));
+    assert!(st.is_ancestor(child1_0, child1_1));
+
+    assert!(!st.is_ancestor(child1_0, root));
+    assert!(!st.is_ancestor(child1_1, root));
+    assert!(!st.is_ancestor(child1_1, child1_0));
+
+    assert!(st.is_ancestor(root, child2));
+    assert!(!st.is_ancestor(child2, root));
+
+    assert!(!st.is_ancestor(child1_0, child2));
+    assert!(!st.is_ancestor(child1_1, child2));
+    assert!(!st.is_ancestor(child2, child1_0));
+    assert!(!st.is_ancestor(child2, child1_1));
+}
+
+/// Tests that we select the correct scroll root in the simple case.
+#[test]
+fn test_find_scroll_root_simple() {
+    let mut st = SceneSpatialTree::new();
+    let pid = PipelineInstanceId::new(0);
+
+    let root = st.add_reference_frame(
+        st.root_reference_frame_index(),
+        TransformStyle::Flat,
+        PropertyBinding::Value(LayoutTransform::identity()),
+        ReferenceFrameKind::Transform {
+            is_2d_scale_translation: true,
+            should_snap: true,
+            paired_with_perspective: false,
+        },
+        LayoutVector2D::new(0.0, 0.0),
+        PipelineId::dummy(),
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 0), PipelineId::dummy(), pid),
+    );
+
+    let scroll = st.add_scroll_frame(
+        root,
+        ExternalScrollId(1, PipelineId::dummy()),
+        PipelineId::dummy(),
+        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
+        &LayoutSize::new(800.0, 400.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::new(0.0, 0.0),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 1), PipelineId::dummy(), pid),
+    );
+
+    assert_eq!(st.find_scroll_root(scroll), scroll);
+}
+
+/// Tests that we select the root scroll frame rather than the subframe if both are scrollable.
+#[test]
+fn test_find_scroll_root_sub_scroll_frame() {
+    let mut st = SceneSpatialTree::new();
+    let pid = PipelineInstanceId::new(0);
+
+    let root = st.add_reference_frame(
+        st.root_reference_frame_index(),
+        TransformStyle::Flat,
+        PropertyBinding::Value(LayoutTransform::identity()),
+        ReferenceFrameKind::Transform {
+            is_2d_scale_translation: true,
+            should_snap: true,
+            paired_with_perspective: false,
+        },
+        LayoutVector2D::new(0.0, 0.0),
+        PipelineId::dummy(),
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 0), PipelineId::dummy(), pid),
+    );
+
+    let root_scroll = st.add_scroll_frame(
+        root,
+        ExternalScrollId(1, PipelineId::dummy()),
+        PipelineId::dummy(),
+        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
+        &LayoutSize::new(800.0, 400.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::new(0.0, 0.0),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 1), PipelineId::dummy(), pid),
+    );
+
+    let sub_scroll = st.add_scroll_frame(
+        root_scroll,
+        ExternalScrollId(1, PipelineId::dummy()),
+        PipelineId::dummy(),
+        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
+        &LayoutSize::new(800.0, 400.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::new(0.0, 0.0),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 2), PipelineId::dummy(), pid),
+    );
+
+    assert_eq!(st.find_scroll_root(sub_scroll), root_scroll);
+}
+
+/// Tests that we select the sub scroll frame when the root scroll frame is not scrollable.
+#[test]
+fn test_find_scroll_root_not_scrollable() {
+    let mut st = SceneSpatialTree::new();
+    let pid = PipelineInstanceId::new(0);
+
+    let root = st.add_reference_frame(
+        st.root_reference_frame_index(),
+        TransformStyle::Flat,
+        PropertyBinding::Value(LayoutTransform::identity()),
+        ReferenceFrameKind::Transform {
+            is_2d_scale_translation: true,
+            should_snap: true,
+            paired_with_perspective: false,
+        },
+        LayoutVector2D::new(0.0, 0.0),
+        PipelineId::dummy(),
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 0), PipelineId::dummy(), pid),
+    );
+
+    let root_scroll = st.add_scroll_frame(
+        root,
+        ExternalScrollId(1, PipelineId::dummy()),
+        PipelineId::dummy(),
+        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
+        &LayoutSize::new(400.0, 400.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::new(0.0, 0.0),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 1), PipelineId::dummy(), pid),
+    );
+
+    let sub_scroll = st.add_scroll_frame(
+        root_scroll,
+        ExternalScrollId(1, PipelineId::dummy()),
+        PipelineId::dummy(),
+        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
+        &LayoutSize::new(800.0, 400.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::new(0.0, 0.0),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 2), PipelineId::dummy(), pid),
+    );
+
+    assert_eq!(st.find_scroll_root(sub_scroll), sub_scroll);
+}
+
+/// Tests that we select the sub scroll frame when the root scroll frame is too small.
+#[test]
+fn test_find_scroll_root_too_small() {
+    let mut st = SceneSpatialTree::new();
+    let pid = PipelineInstanceId::new(0);
+
+    let root = st.add_reference_frame(
+        st.root_reference_frame_index(),
+        TransformStyle::Flat,
+        PropertyBinding::Value(LayoutTransform::identity()),
+        ReferenceFrameKind::Transform {
+            is_2d_scale_translation: true,
+            should_snap: true,
+            paired_with_perspective: false,
+        },
+        LayoutVector2D::new(0.0, 0.0),
+        PipelineId::dummy(),
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 0), PipelineId::dummy(), pid),
+    );
+
+    let root_scroll = st.add_scroll_frame(
+        root,
+        ExternalScrollId(1, PipelineId::dummy()),
+        PipelineId::dummy(),
+        &LayoutRect::from_size(LayoutSize::new(MIN_SCROLL_ROOT_SIZE, MIN_SCROLL_ROOT_SIZE)),
+        &LayoutSize::new(1000.0, 1000.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::new(0.0, 0.0),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 1), PipelineId::dummy(), pid),
+    );
+
+    let sub_scroll = st.add_scroll_frame(
+        root_scroll,
+        ExternalScrollId(1, PipelineId::dummy()),
+        PipelineId::dummy(),
+        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
+        &LayoutSize::new(800.0, 400.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::new(0.0, 0.0),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 2), PipelineId::dummy(), pid),
+    );
+
+    assert_eq!(st.find_scroll_root(sub_scroll), sub_scroll);
+}
+
+/// Tests that we select the root scroll node, even if it is not scrollable,
+/// when encountering a non-axis-aligned transform.
+#[test]
+fn test_find_scroll_root_perspective() {
+    let mut st = SceneSpatialTree::new();
+    let pid = PipelineInstanceId::new(0);
+
+    let root = st.add_reference_frame(
+        st.root_reference_frame_index(),
+        TransformStyle::Flat,
+        PropertyBinding::Value(LayoutTransform::identity()),
+        ReferenceFrameKind::Transform {
+            is_2d_scale_translation: true,
+            should_snap: true,
+            paired_with_perspective: false,
+        },
+        LayoutVector2D::new(0.0, 0.0),
+        PipelineId::dummy(),
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 0), PipelineId::dummy(), pid),
+    );
+
+    let root_scroll = st.add_scroll_frame(
+        root,
+        ExternalScrollId(1, PipelineId::dummy()),
+        PipelineId::dummy(),
+        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
+        &LayoutSize::new(400.0, 400.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::new(0.0, 0.0),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 1), PipelineId::dummy(), pid),
+    );
+
+    let perspective = st.add_reference_frame(
+        root_scroll,
+        TransformStyle::Flat,
+        PropertyBinding::Value(LayoutTransform::identity()),
+        ReferenceFrameKind::Perspective {
+            scrolling_relative_to: None,
+        },
+        LayoutVector2D::new(0.0, 0.0),
+        PipelineId::dummy(),
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 2), PipelineId::dummy(), pid),
+    );
+
+    let sub_scroll = st.add_scroll_frame(
+        perspective,
+        ExternalScrollId(1, PipelineId::dummy()),
+        PipelineId::dummy(),
+        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
+        &LayoutSize::new(800.0, 400.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::new(0.0, 0.0),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 3), PipelineId::dummy(), pid),
+    );
+
+    assert_eq!(st.find_scroll_root(sub_scroll), root_scroll);
+}
+
+/// Tests that encountering a 2D scale or translation transform does not prevent
+/// us from selecting the sub scroll frame if the root scroll frame is unscrollable.
+#[test]
+fn test_find_scroll_root_2d_scale() {
+    let mut st = SceneSpatialTree::new();
+    let pid = PipelineInstanceId::new(0);
+
+    let root = st.add_reference_frame(
+        st.root_reference_frame_index(),
+        TransformStyle::Flat,
+        PropertyBinding::Value(LayoutTransform::identity()),
+        ReferenceFrameKind::Transform {
+            is_2d_scale_translation: true,
+            should_snap: true,
+            paired_with_perspective: false,
+        },
+        LayoutVector2D::new(0.0, 0.0),
+        PipelineId::dummy(),
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 0), PipelineId::dummy(), pid),
+    );
+
+    let root_scroll = st.add_scroll_frame(
+        root,
+        ExternalScrollId(1, PipelineId::dummy()),
+        PipelineId::dummy(),
+        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
+        &LayoutSize::new(400.0, 400.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::new(0.0, 0.0),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 1), PipelineId::dummy(), pid),
+    );
+
+    let scale = st.add_reference_frame(
+        root_scroll,
+        TransformStyle::Flat,
+        PropertyBinding::Value(LayoutTransform::identity()),
+        ReferenceFrameKind::Transform {
+            is_2d_scale_translation: true,
+            should_snap: false,
+            paired_with_perspective: false,
+        },
+        LayoutVector2D::new(0.0, 0.0),
+        PipelineId::dummy(),
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 2), PipelineId::dummy(), pid),
+    );
+
+    let sub_scroll = st.add_scroll_frame(
+        scale,
+        ExternalScrollId(1, PipelineId::dummy()),
+        PipelineId::dummy(),
+        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
+        &LayoutSize::new(800.0, 400.0),
+        ScrollFrameKind::Explicit,
+        LayoutVector2D::new(0.0, 0.0),
+        APZScrollGeneration::default(),
+        HasScrollLinkedEffect::No,
+        SpatialNodeUid::external(SpatialTreeItemKey::new(0, 3), PipelineId::dummy(), pid),
+    );
+
+    assert_eq!(st.find_scroll_root(sub_scroll), sub_scroll);
+}
diff --git a/gfx/wr/webrender/src/surface.rs b/gfx/wr/webrender/src/surface.rs
new file mode 100644
index 0000000000..20c0f2fa9b
--- /dev/null
+++ b/gfx/wr/webrender/src/surface.rs
@@ -0,0 +1,665 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::units::*;
+use crate::command_buffer::{CommandBufferBuilderKind, CommandBufferList, CommandBufferBuilder, CommandBufferIndex, PrimitiveCommand};
+use crate::internal_types::FastHashMap;
+use crate::picture::{SurfaceInfo, SurfaceIndex, TileKey, SubSliceIndex};
+use crate::prim_store::{PictureIndex};
+use crate::render_task_graph::{RenderTaskId, RenderTaskGraphBuilder};
+use crate::spatial_tree::SpatialNodeIndex;
+use crate::render_target::ResolveOp;
+use crate::render_task::{RenderTask, RenderTaskKind, RenderTaskLocation};
+use crate::visibility::{VisibilityState, PrimitiveVisibility};
+
+/*
+ Contains functionality to help building the render task graph from a series of off-screen
+ surfaces that are created during the prepare pass. For now, it maintains existing behavior.
+ A future patch will add support for surface sub-graphs, while ensuring the render task
+ graph itself is built correctly with dependencies regardless of the surface kind (chained,
+ tiled, simple).
+ */
+
+// Information about the render task(s) for a given tile
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SurfaceTileDescriptor {
+    /// Target render task for commands added to this tile. This is changed
+    /// each time a sub-graph is encountered on this tile
+    pub current_task_id: RenderTaskId,
+    /// The compositing task for this tile, if required. This is only needed
+    /// when a tile contains one or more sub-graphs.
+    pub composite_task_id: Option<RenderTaskId>,
+}
+
+// Details of how a surface is rendered
+pub enum SurfaceDescriptorKind {
+    // Picture cache tiles
+    Tiled {
+        tiles: FastHashMap<TileKey, SurfaceTileDescriptor>,
+    },
+    // A single surface (e.g. for an opacity filter)
+    Simple {
+        render_task_id: RenderTaskId,
+    },
+    // A surface with 1+ intermediate tasks (e.g. blur)
+    Chained {
+        render_task_id: RenderTaskId,
+        root_task_id: RenderTaskId,
+    },
+}
+
+// Describes how a surface is rendered
+pub struct SurfaceDescriptor {
+    kind: SurfaceDescriptorKind,
+    dirty_rects: Vec<PictureRect>,
+}
+
+impl SurfaceDescriptor {
+    // Create a picture cache tiled surface
+    pub fn new_tiled(
+        tiles: FastHashMap<TileKey, SurfaceTileDescriptor>,
+        dirty_rects: Vec<PictureRect>,
+    ) -> Self {
+        SurfaceDescriptor {
+            kind: SurfaceDescriptorKind::Tiled {
+                tiles,
+            },
+            dirty_rects,
+        }
+    }
+
+    // Create a chained surface (e.g. blur)
+    pub fn new_chained(
+        render_task_id: RenderTaskId,
+        root_task_id: RenderTaskId,
+        dirty_rect: PictureRect,
+    ) -> Self {
+        SurfaceDescriptor {
+            kind: SurfaceDescriptorKind::Chained {
+                render_task_id,
+                root_task_id,
+            },
+            dirty_rects: vec![dirty_rect],
+        }
+    }
+
+    // Create a simple surface (e.g. opacity)
+    pub fn new_simple(
+        render_task_id: RenderTaskId,
+        dirty_rect: PictureRect,
+    ) -> Self {
+        SurfaceDescriptor {
+            kind: SurfaceDescriptorKind::Simple {
+                render_task_id,
+            },
+            dirty_rects: vec![dirty_rect],
+        }
+    }
+}
+
+// Describes a list of command buffers that we are adding primitives to
+// for a given surface. These are created from a command buffer builder
+// as an optimization - skipping the indirection pic_task -> cmd_buffer_index
+enum CommandBufferTargets {
+    // Picture cache targets target multiple command buffers
+    Tiled {
+        tiles: FastHashMap<TileKey, CommandBufferIndex>,
+    },
+    // Child surfaces target a single command buffer
+    Simple {
+        cmd_buffer_index: CommandBufferIndex,
+    },
+}
+
+impl CommandBufferTargets {
+    // Initialize command buffer targets from a command buffer builder
+    fn init(
+        &mut self,
+        cb: &CommandBufferBuilder,
+        rg_builder: &RenderTaskGraphBuilder,
+    ) {
+        let new_target = match cb.kind {
+            CommandBufferBuilderKind::Tiled { ref tiles, .. } => {
+                let mut cb_tiles = FastHashMap::default();
+
+                for (key, desc) in tiles {
+                    let task = rg_builder.get_task(desc.current_task_id);
+                    match task.kind {
+                        RenderTaskKind::Picture(ref info) => {
+                            cb_tiles.insert(*key, info.cmd_buffer_index);
+                        }
+                        _ => unreachable!("bug: not a picture"),
+                    }
+                }
+
+                CommandBufferTargets::Tiled { tiles: cb_tiles }
+            }
+            CommandBufferBuilderKind::Simple { render_task_id, .. } => {
+                let task = rg_builder.get_task(render_task_id);
+                match task.kind {
+                    RenderTaskKind::Picture(ref info) => {
+                        CommandBufferTargets::Simple { cmd_buffer_index: info.cmd_buffer_index }
+                    }
+                    _ => unreachable!("bug: not a picture"),
+                }
+            }
+            CommandBufferBuilderKind::Invalid => {
+                CommandBufferTargets::Tiled { tiles: FastHashMap::default() }
+            }
+        };
+
+        *self = new_target;
+    }
+
+    /// Push a new primitive in to the command buffer builder
+    fn push_prim(
+        &mut self,
+        prim_cmd: &PrimitiveCommand,
+        spatial_node_index: SpatialNodeIndex,
+        tile_rect: crate::picture::TileRect,
+        sub_slice_index: SubSliceIndex,
+        cmd_buffers: &mut CommandBufferList,
+    ) {
+        match self {
+            CommandBufferTargets::Tiled { ref mut tiles } => {
+                // For tiled builders, add the prim to the command buffer of each
+                // tile that this primitive affects.
+                for y in tile_rect.min.y .. tile_rect.max.y {
+                    for x in tile_rect.min.x .. tile_rect.max.x {
+                        let key = TileKey {
+                            tile_offset: crate::picture::TileOffset::new(x, y),
+                            sub_slice_index,
+                        };
+                        if let Some(cmd_buffer_index) = tiles.get(&key) {
+                            cmd_buffers.get_mut(*cmd_buffer_index).add_prim(
+                                prim_cmd,
+                                spatial_node_index,
+                            );
+                        }
+                    }
+                }
+            }
+            CommandBufferTargets::Simple { cmd_buffer_index, .. } => {
+                // For simple builders, just add the prim
+                cmd_buffers.get_mut(*cmd_buffer_index).add_prim(
+                    prim_cmd,
+                    spatial_node_index,
+                );
+            }
+        }
+    }
+}
+
+// Main helper interface to build a graph of surfaces. In future patches this
+// will support building sub-graphs.
+pub struct SurfaceBuilder {
+    // The currently set cmd buffer targets (updated during push/pop)
+    current_cmd_buffers: CommandBufferTargets,
+    // Stack of surfaces that are parents to the current targets
+    builder_stack: Vec<CommandBufferBuilder>,
+    // Dirty rect stack used to reject adding primitives
+    dirty_rect_stack: Vec<Vec<PictureRect>>,
+    // A map of the output render tasks from any sub-graphs that haven't
+    // been consumed by BackdropRender prims yet
+    pub sub_graph_output_map: FastHashMap<PictureIndex, RenderTaskId>,
+}
+
+impl SurfaceBuilder {
+    pub fn new() -> Self {
+        SurfaceBuilder {
+            current_cmd_buffers: CommandBufferTargets::Tiled { tiles: FastHashMap::default() },
+            builder_stack: Vec::new(),
+            dirty_rect_stack: Vec::new(),
+            sub_graph_output_map: FastHashMap::default(),
+        }
+    }
+
+    /// Register the current surface as the source of a resolve for the task sub-graph that
+    /// is currently on the surface builder stack.
+    pub fn register_resolve_source(
+        &mut self,
+    ) {
+        let surface_task_id = match self.builder_stack.last().unwrap().kind {
+            CommandBufferBuilderKind::Tiled { .. } | CommandBufferBuilderKind::Invalid => {
+                panic!("bug: only supported for non-tiled surfaces");
+            }
+            CommandBufferBuilderKind::Simple { render_task_id, .. } => render_task_id,
+        };
+
+        for builder in self.builder_stack.iter_mut().rev() {
+            if builder.establishes_sub_graph {
+                assert_eq!(builder.resolve_source, None);
+                builder.resolve_source = Some(surface_task_id);
+                return;
+            }
+        }
+
+        unreachable!("bug: resolve source with no sub-graph");
+    }
+
+    pub fn push_surface(
+        &mut self,
+        surface_index: SurfaceIndex,
+        is_sub_graph: bool,
+        clipping_rect: PictureRect,
+        descriptor: SurfaceDescriptor,
+        surfaces: &mut [SurfaceInfo],
+        rg_builder: &RenderTaskGraphBuilder,
+    ) {
+        // Init the surface
+        surfaces[surface_index.0].clipping_rect = clipping_rect;
+
+        self.dirty_rect_stack.push(descriptor.dirty_rects);
+
+        let builder = match descriptor.kind {
+            SurfaceDescriptorKind::Tiled { tiles } => {
+                CommandBufferBuilder::new_tiled(
+                    tiles,
+                )
+            }
+            SurfaceDescriptorKind::Simple { render_task_id } => {
+                CommandBufferBuilder::new_simple(
+                    render_task_id,
+                    is_sub_graph,
+                    None,
+                )
+            }
+            SurfaceDescriptorKind::Chained { render_task_id, root_task_id } => {
+                CommandBufferBuilder::new_simple(
+                    render_task_id,
+                    is_sub_graph,
+                    Some(root_task_id),
+                )
+            }
+        };
+
+        self.current_cmd_buffers.init(&builder, rg_builder);
+        self.builder_stack.push(builder);
+    }
+
+    // Add a child render task (e.g. a render task cache item, or a clip mask) as a
+    // dependency of the current surface
+    pub fn add_child_render_task(
+        &mut self,
+        child_task_id: RenderTaskId,
+        rg_builder: &mut RenderTaskGraphBuilder,
+    ) {
+        let builder = self.builder_stack.last().unwrap();
+
+        match builder.kind {
+            CommandBufferBuilderKind::Tiled { ref tiles } => {
+                for (_, descriptor) in tiles {
+                    rg_builder.add_dependency(
+                        descriptor.current_task_id,
+                        child_task_id,
+                    );
+                }
+            }
+            CommandBufferBuilderKind::Simple { render_task_id, .. } => {
+                rg_builder.add_dependency(
+                    render_task_id,
+                    child_task_id,
+                );
+            }
+            CommandBufferBuilderKind::Invalid { .. } => {}
+        }
+    }
+
+    // Add a picture render task as a dependency of the parent surface. This is a
+    // special case with extra complexity as the root of the surface may change
+    // when inside a sub-graph. It's currently only needed for drop-shadow effects.
+    pub fn add_picture_render_task(
+        &mut self,
+        child_task_id: RenderTaskId,
+    ) {
+        self.builder_stack
+            .last_mut()
+            .unwrap()
+            .extra_dependencies
+            .push(child_task_id);
+    }
+
+    // Returns true if the given primitive is visible and also intersects the dirty
+    // region of the current surface
+    pub fn is_prim_visible_and_in_dirty_region(
+        &self,
+        vis: &PrimitiveVisibility,
+    ) -> bool {
+        match vis.state {
+            VisibilityState::Unset => {
+                panic!("bug: invalid vis state");
+            }
+            VisibilityState::Culled => {
+                false
+            }
+            VisibilityState::Visible { .. } => {
+                self.dirty_rect_stack
+                    .last()
+                    .unwrap()
+                    .iter()
+                    .any(|dirty_rect| {
+                        dirty_rect.intersects(&vis.clip_chain.pic_coverage_rect)
+                    })
+            }
+            VisibilityState::PassThrough => {
+                true
+            }
+        }
+    }
+
+    // Push a primitive to the current cmd buffer target(s)
+    pub fn push_prim(
+        &mut self,
+        prim_cmd: &PrimitiveCommand,
+        spatial_node_index: SpatialNodeIndex,
+        vis: &PrimitiveVisibility,
+        cmd_buffers: &mut CommandBufferList,
+    ) {
+        match vis.state {
+            VisibilityState::Unset => {
+                panic!("bug: invalid vis state");
+            }
+            VisibilityState::Visible { tile_rect, sub_slice_index, .. } => {
+                self.current_cmd_buffers.push_prim(
+                    prim_cmd,
+                    spatial_node_index,
+                    tile_rect,
+                    sub_slice_index,
+                    cmd_buffers,
+                )
+            }
+            VisibilityState::PassThrough | VisibilityState::Culled => {}
+        }
+    }
+
+    // Finish adding primitives and child tasks to a surface and pop it off the stack
+    pub fn pop_surface(
+        &mut self,
+        pic_index: PictureIndex,
+        rg_builder: &mut RenderTaskGraphBuilder,
+        cmd_buffers: &mut CommandBufferList,
+    ) {
+        self.dirty_rect_stack.pop().unwrap();
+
+        let builder = self.builder_stack.pop().unwrap();
+
+        if builder.establishes_sub_graph {
+            // If we are popping a sub-graph off the stack the dependency setup is rather more complex...
+            match builder.kind {
+                CommandBufferBuilderKind::Tiled { .. } | CommandBufferBuilderKind::Invalid => {
+                    unreachable!("bug: sub-graphs can only be simple surfaces");
+                }
+                CommandBufferBuilderKind::Simple { render_task_id: child_render_task_id, root_task_id: child_root_task_id } => {
+                    // Get info about the resolve operation to copy from parent surface or tiles to the picture cache task
+                    if let Some(resolve_task_id) = builder.resolve_source {
+                        let mut src_task_ids = Vec::new();
+
+                        // Make the output of the sub-graph a dependency of the new replacement tile task
+                        let _old = self.sub_graph_output_map.insert(
+                            pic_index,
+                            child_root_task_id.unwrap_or(child_render_task_id),
+                        );
+                        debug_assert!(_old.is_none());
+
+                        // Set up dependencies for the sub-graph. The basic concepts below are the same, but for
+                        // tiled surfaces are a little more complex as there are multiple tasks to set up.
+                        //  (a) Set up new task(s) on parent surface that write to the same location
+                        //  (b) Set up a resolve target to copy from parent surface tasks(s) to the resolve target
+                        //  (c) Make the old parent surface tasks input dependencies of the resolve target
+                        //  (d) Make the sub-graph output an input dependency of the new task(s).
+
+                        match self.builder_stack.last_mut().unwrap().kind {
+                            CommandBufferBuilderKind::Tiled { ref mut tiles } => {
+                                let keys: Vec<TileKey> = tiles.keys().cloned().collect();
+
+                                // For each tile in parent surface
+                                for key in keys {
+                                    let descriptor = tiles.remove(&key).unwrap();
+                                    let parent_task_id = descriptor.current_task_id;
+                                    let parent_task = rg_builder.get_task_mut(parent_task_id);
+
+                                    match parent_task.location {
+                                        RenderTaskLocation::Unallocated { .. } | RenderTaskLocation::Existing { .. } => {
+                                            // Get info about the parent tile task location and params
+                                            let location = RenderTaskLocation::Existing {
+                                                parent_task_id,
+                                                size: parent_task.location.size(),
+                                            };
+
+                                            let pic_task = match parent_task.kind {
+                                                RenderTaskKind::Picture(ref mut pic_task) => {
+                                                    let cmd_buffer_index = cmd_buffers.create_cmd_buffer();
+                                                    let new_pic_task = pic_task.duplicate(cmd_buffer_index);
+
+                                                    // Add the resolve src to copy from tile -> picture input task
+                                                    src_task_ids.push(parent_task_id);
+
+                                                    new_pic_task
+                                                }
+                                                _ => panic!("bug: not a picture"),
+                                            };
+
+                                            // Make the existing tile an input dependency of the resolve target
+                                            rg_builder.add_dependency(
+                                                resolve_task_id,
+                                                parent_task_id,
+                                            );
+
+                                            // Create the new task to replace the tile task
+                                            let new_task_id = rg_builder.add().init(
+                                                RenderTask::new(
+                                                    location,          // draw to same place
+                                                    RenderTaskKind::Picture(pic_task),
+                                                ),
+                                            );
+
+                                            // Ensure that the parent task will get scheduled earlier during
+                                            // pass assignment since we are reusing the existing surface,
+                                            // even though it's not technically needed for rendering order.
+                                            rg_builder.add_dependency(
+                                                new_task_id,
+                                                parent_task_id,
+                                            );
+
+                                            // Update the surface builder with the now current target for future primitives
+                                            tiles.insert(
+                                                key,
+                                                SurfaceTileDescriptor {
+                                                    current_task_id: new_task_id,
+                                                    ..descriptor
+                                                },
+                                            );
+                                        }
+                                        RenderTaskLocation::Static { .. } => {
+                                            // Update the surface builder with the now current target for future primitives
+                                            tiles.insert(
+                                                key,
+                                                descriptor,
+                                            );
+                                        }
+                                        _ => {
+                                            panic!("bug: unexpected task location");
+                                        }
+                                    }
+                                }
+                            }
+                            CommandBufferBuilderKind::Simple { render_task_id: ref mut parent_task_id, .. } => {
+                                let parent_task = rg_builder.get_task_mut(*parent_task_id);
+
+                                // Get info about the parent tile task location and params
+                                let location = RenderTaskLocation::Existing {
+                                    parent_task_id: *parent_task_id,
+                                    size: parent_task.location.size(),
+                                };
+                                let pic_task = match parent_task.kind {
+                                    RenderTaskKind::Picture(ref mut pic_task) => {
+                                        let cmd_buffer_index = cmd_buffers.create_cmd_buffer();
+
+                                        let new_pic_task = pic_task.duplicate(cmd_buffer_index);
+
+                                        // Add the resolve src to copy from tile -> picture input task
+                                        src_task_ids.push(*parent_task_id);
+
+                                        new_pic_task
+                                    }
+                                    _ => panic!("bug: not a picture"),
+                                };
+
+                                // Make the existing surface an input dependency of the resolve target
+                                rg_builder.add_dependency(
+                                    resolve_task_id,
+                                    *parent_task_id,
+                                );
+
+                                // Create the new task to replace the parent surface task
+                                let new_task_id = rg_builder.add().init(
+                                    RenderTask::new(
+                                        location,          // draw to same place
+                                        RenderTaskKind::Picture(pic_task),
+                                    ),
+                                );
+
+                                // Ensure that the parent task will get scheduled earlier during
+                                // pass assignment since we are reusing the existing surface,
+                                // even though it's not technically needed for rendering order.
+                                rg_builder.add_dependency(
+                                    new_task_id,
+                                    *parent_task_id,
+                                );
+
+                                // Update the surface builder with the now current target for future primitives
+                                *parent_task_id = new_task_id;
+                            }
+                            CommandBufferBuilderKind::Invalid => {
+                                unreachable!();
+                            }
+                        }
+
+                        let dest_task = rg_builder.get_task_mut(resolve_task_id);
+
+                        match dest_task.kind {
+                            RenderTaskKind::Picture(ref mut dest_task_info) => {
+                                assert!(dest_task_info.resolve_op.is_none());
+                                dest_task_info.resolve_op = Some(ResolveOp {
+                                    src_task_ids,
+                                    dest_task_id: resolve_task_id,
+                                })
+                            }
+                            _ => {
+                                unreachable!("bug: not a picture");
+                            }
+                        }
+                    }
+
+                    // This can occur if there is an edge case where the resolve target is found
+                    // not visible even though the filter chain was (for example, in the case of
+                    // an extreme scale causing floating point inaccuracies). Adding a dependency
+                    // here is also a safety in case for some reason the backdrop render primitive
+                    // doesn't pick up the dependency, ensuring that it gets scheduled and freed
+                    // as early as possible.
+                    match self.builder_stack.last().unwrap().kind {
+                        CommandBufferBuilderKind::Tiled { ref tiles } => {
+                            // For a tiled render task, add as a dependency to every tile.
+                            for (_, descriptor) in tiles {
+                                rg_builder.add_dependency(
+                                    descriptor.current_task_id,
+                                    child_root_task_id.unwrap_or(child_render_task_id),
+                                );
+                            }
+                        }
+                        CommandBufferBuilderKind::Simple { render_task_id: parent_task_id, .. } => {
+                            rg_builder.add_dependency(
+                                parent_task_id,
+                                child_root_task_id.unwrap_or(child_render_task_id),
+                            );
+                        }
+                        CommandBufferBuilderKind::Invalid => {
+                            unreachable!();
+                        }
+                    }
+                }
+            }
+        } else {
+            match builder.kind {
+                CommandBufferBuilderKind::Tiled { ref tiles } => {
+                    for (_, descriptor) in tiles {
+                        if let Some(composite_task_id) = descriptor.composite_task_id {
+                            rg_builder.add_dependency(
+                                composite_task_id,
+                                descriptor.current_task_id,
+                            );
+
+                            let composite_task = rg_builder.get_task_mut(composite_task_id);
+                            match composite_task.kind {
+                                RenderTaskKind::TileComposite(ref mut info) => {
+                                    info.task_id = Some(descriptor.current_task_id);
+                                }
+                                _ => unreachable!("bug: not a tile composite"),
+                            }
+                        }
+                    }
+                }
+                CommandBufferBuilderKind::Simple { render_task_id: child_task_id, root_task_id: child_root_task_id } => {
+                    match self.builder_stack.last().unwrap().kind {
+                        CommandBufferBuilderKind::Tiled { ref tiles } => {
+                            // For a tiled render task, add as a dependency to every tile.
+                            for (_, descriptor) in tiles {
+                                rg_builder.add_dependency(
+                                    descriptor.current_task_id,
+                                    child_root_task_id.unwrap_or(child_task_id),
+                                );
+                            }
+                        }
+                        CommandBufferBuilderKind::Simple { render_task_id: parent_task_id, .. } => {
+                            rg_builder.add_dependency(
+                                parent_task_id,
+                                child_root_task_id.unwrap_or(child_task_id),
+                            );
+                        }
+                        CommandBufferBuilderKind::Invalid => {
+                            unreachable!();
+                        }
+                    }
+                }
+                CommandBufferBuilderKind::Invalid => {
+                    unreachable!();
+                }
+            }
+        }
+
+        // Step through the dependencies for this builder and add them to the finalized
+        // render task root(s) for this surface
+        match builder.kind {
+            CommandBufferBuilderKind::Tiled { ref tiles } => {
+                for (_, descriptor) in tiles {
+                    for task_id in &builder.extra_dependencies {
+                        rg_builder.add_dependency(
+                            descriptor.current_task_id,
+                            *task_id,
+                        );
+                    }
+                }
+            }
+            CommandBufferBuilderKind::Simple { render_task_id, .. } => {
+                for task_id in &builder.extra_dependencies {
+                    rg_builder.add_dependency(
+                        render_task_id,
+                        *task_id,
+                    );
+                }
+            }
+            CommandBufferBuilderKind::Invalid { .. } => {}
+        }
+
+        // Set up the cmd-buffer targets to write prims into the popped surface
+        self.current_cmd_buffers.init(
+            self.builder_stack.last().unwrap_or(&CommandBufferBuilder::empty()), rg_builder
+        );
+    }
+
+    pub fn finalize(self) {
+        assert!(self.builder_stack.is_empty());
+    }
+}
diff --git a/gfx/wr/webrender/src/telemetry.rs b/gfx/wr/webrender/src/telemetry.rs
new file mode 100644
index 0000000000..6a9f4068c1
--- /dev/null
+++ b/gfx/wr/webrender/src/telemetry.rs
@@ -0,0 +1,41 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use glean::TimerId;
+#[cfg(feature = "gecko")]
+use fog::metrics::wr;
+use std::time::Duration;
+
+pub struct Telemetry;
+
+/// Defines the interface for hooking up an external telemetry reporter to WR.
+#[cfg(not(feature = "gecko"))]
+impl Telemetry {
+    pub fn record_rasterize_blobs_time(_duration: Duration) { }
+    pub fn start_framebuild_time() -> TimerId { TimerId { id: 0 } }
+    pub fn stop_and_accumulate_framebuild_time(_id: TimerId) { }
+    pub fn record_renderer_time(_duration: Duration) { }
+    pub fn record_renderer_time_no_sc(_duration: Duration) { }
+    pub fn record_scenebuild_time(_duration: Duration) { }
+    pub fn start_sceneswap_time() -> TimerId { TimerId { id: 0 } }
+    pub fn stop_and_accumulate_sceneswap_time(_id: TimerId) { }
+    pub fn cancel_sceneswap_time(_id: TimerId) { }
+    pub fn record_texture_cache_update_time(_duration: Duration) { }
+    pub fn record_time_to_frame_build(_duration: Duration) { }
+}
+
+#[cfg(feature = "gecko")]
+impl Telemetry {
+    pub fn record_rasterize_blobs_time(duration: Duration) { wr::rasterize_blobs_time.accumulate_raw_duration(duration); }
+    pub fn start_framebuild_time() -> TimerId { wr::framebuild_time.start() }
+    pub fn stop_and_accumulate_framebuild_time(id: TimerId) { wr::framebuild_time.stop_and_accumulate(id); }
+    pub fn record_renderer_time(duration: Duration) { wr::renderer_time.accumulate_raw_duration(duration); }
+    pub fn record_renderer_time_no_sc(duration: Duration) { wr::renderer_time_no_sc.accumulate_raw_duration(duration); }
+    pub fn record_scenebuild_time(duration: Duration) { wr::scenebuild_time.accumulate_raw_duration(duration); }
+    pub fn start_sceneswap_time() -> TimerId { wr::sceneswap_time.start() }
+    pub fn stop_and_accumulate_sceneswap_time(id: TimerId) { wr::sceneswap_time.stop_and_accumulate(id); }
+    pub fn cancel_sceneswap_time(id: TimerId) { wr::sceneswap_time.cancel(id); }
+    pub fn record_texture_cache_update_time(duration: Duration) { wr::texture_cache_update_time.accumulate_raw_duration(duration); }
+    pub fn record_time_to_frame_build(duration: Duration) { wr::time_to_frame_build.accumulate_raw_duration(duration); }
+}
diff --git a/gfx/wr/webrender/src/texture_cache.rs b/gfx/wr/webrender/src/texture_cache.rs
new file mode 100644
index 0000000000..8650f12ecf
--- /dev/null
+++ b/gfx/wr/webrender/src/texture_cache.rs
@@ -0,0 +1,1707 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{DirtyRect, ExternalImageType, ImageFormat, ImageBufferKind};
+use api::{DebugFlags, ImageDescriptor};
+use api::units::*;
+#[cfg(test)]
+use api::{DocumentId, IdNamespace};
+use crate::device::{TextureFilter, TextureFormatPair};
+use crate::freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
+use crate::gpu_cache::{GpuCache, GpuCacheHandle};
+use crate::gpu_types::{ImageSource, UvRectKind};
+use crate::internal_types::{
+    CacheTextureId, Swizzle, SwizzleSettings, FrameStamp, FrameId,
+    TextureUpdateList, TextureUpdateSource, TextureSource,
+    TextureCacheAllocInfo, TextureCacheUpdate, TextureCacheCategory,
+};
+use crate::lru_cache::LRUCache;
+use crate::profiler::{self, TransactionProfile};
+use crate::resource_cache::{CacheItem, CachedImageData};
+use crate::texture_pack::{
+    AllocatorList, AllocId, AtlasAllocatorList, ShelfAllocator, ShelfAllocatorOptions,
+};
+use std::cell::Cell;
+use std::mem;
+use std::rc::Rc;
+use euclid::size2;
+use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
+
+/// Information about which shader will use the entry.
+///
+/// For batching purposes, it's beneficial to group some items in their
+/// own textures if we know that they are used by a specific shader.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum TargetShader {
+    Default,
+    Text,
+}
+
+/// The size of each region in shared cache texture arrays.
+pub const TEXTURE_REGION_DIMENSIONS: i32 = 512;
+
+/// Items in the texture cache can either be standalone textures,
+/// or a sub-rect inside the shared cache.
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum EntryDetails {
+    Standalone {
+        /// Number of bytes this entry allocates
+        size_in_bytes: usize,
+    },
+    Cache {
+        /// Origin within the texture layer where this item exists.
+        origin: DeviceIntPoint,
+        /// ID of the allocation specific to its allocator.
+        alloc_id: AllocId,
+        /// The allocated size in bytes for this entry.
+        allocated_size_in_bytes: usize,
+    },
+}
+
+impl EntryDetails {
+    fn describe(&self) -> DeviceIntPoint {
+        match *self {
+            EntryDetails::Standalone { .. }  => DeviceIntPoint::zero(),
+            EntryDetails::Cache { origin, .. } => origin,
+        }
+    }
+}
+
+#[derive(Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum AutoCacheEntryMarker {}
+
+#[derive(Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum ManualCacheEntryMarker {}
+
+// Stores information related to a single entry in the texture
+// cache. This is stored for each item whether it's in the shared
+// cache or a standalone texture.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct CacheEntry {
+    /// Size of the requested item, in device pixels. Does not include any
+    /// padding for alignment that the allocator may have added to this entry's
+    /// allocation.
+    pub size: DeviceIntSize,
+    /// Details specific to standalone or shared items.
+    pub details: EntryDetails,
+    /// Arbitrary user data associated with this item.
+    pub user_data: [f32; 4],
+    /// The last frame this item was requested for rendering.
+    // TODO(gw): This stamp is only used for picture cache tiles, and some checks
+    //           in the glyph cache eviction code. We could probably remove it
+    //           entirely in future (or move to PictureCacheEntry).
+    pub last_access: FrameStamp,
+    /// Handle to the resource rect in the GPU cache.
+    pub uv_rect_handle: GpuCacheHandle,
+    /// Image format of the data that the entry expects.
+    pub input_format: ImageFormat,
+    pub filter: TextureFilter,
+    pub swizzle: Swizzle,
+    /// The actual device texture ID this is part of.
+    pub texture_id: CacheTextureId,
+    /// Optional notice when the entry is evicted from the cache.
+    pub eviction_notice: Option<EvictionNotice>,
+    /// The type of UV rect this entry specifies.
+    pub uv_rect_kind: UvRectKind,
+
+    pub shader: TargetShader,
+}
+
+malloc_size_of::malloc_size_of_is_0!(
+    CacheEntry,
+    AutoCacheEntryMarker, ManualCacheEntryMarker
+);
+
+impl CacheEntry {
+    // Create a new entry for a standalone texture.
+    fn new_standalone(
+        texture_id: CacheTextureId,
+        last_access: FrameStamp,
+        params: &CacheAllocParams,
+        swizzle: Swizzle,
+        size_in_bytes: usize,
+    ) -> Self {
+        CacheEntry {
+            size: params.descriptor.size,
+            user_data: params.user_data,
+            last_access,
+            details: EntryDetails::Standalone {
+                size_in_bytes,
+            },
+            texture_id,
+            input_format: params.descriptor.format,
+            filter: params.filter,
+            swizzle,
+            uv_rect_handle: GpuCacheHandle::new(),
+            eviction_notice: None,
+            uv_rect_kind: params.uv_rect_kind,
+            shader: TargetShader::Default,
+        }
+    }
+
+    // Update the GPU cache for this texture cache entry.
+    // This ensures that the UV rect, and texture layer index
+    // are up to date in the GPU cache for vertex shaders
+    // to fetch from.
+    fn update_gpu_cache(&mut self, gpu_cache: &mut GpuCache) {
+        if let Some(mut request) = gpu_cache.request(&mut self.uv_rect_handle) {
+            let origin = self.details.describe();
+            let image_source = ImageSource {
+                p0: origin.to_f32(),
+                p1: (origin + self.size).to_f32(),
+                user_data: self.user_data,
+                uv_rect_kind: self.uv_rect_kind,
+            };
+            image_source.write_gpu_blocks(&mut request);
+        }
+    }
+
+    fn evict(&self) {
+        if let Some(eviction_notice) = self.eviction_notice.as_ref() {
+            eviction_notice.notify();
+        }
+    }
+
+    fn alternative_input_format(&self) -> ImageFormat {
+        match self.input_format {
+            ImageFormat::RGBA8 => ImageFormat::BGRA8,
+            ImageFormat::BGRA8 => ImageFormat::RGBA8,
+            other => other,
+        }
+    }
+}
+
+
+/// A texture cache handle is a weak reference to a cache entry.
+///
+/// If the handle has not been inserted into the cache yet, or if the entry was
+/// previously inserted and then evicted, lookup of the handle will fail, and
+/// the cache handle needs to re-upload this item to the texture cache (see
+/// request() below).
+
+#[derive(MallocSizeOf,Clone,PartialEq,Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum TextureCacheHandle {
+    /// A fresh handle.
+    Empty,
+
+    /// A handle for an entry with automatic eviction.
+    Auto(WeakFreeListHandle<AutoCacheEntryMarker>),
+
+    /// A handle for an entry with manual eviction.
+    Manual(WeakFreeListHandle<ManualCacheEntryMarker>)
+}
+
+impl TextureCacheHandle {
+    pub fn invalid() -> Self {
+        TextureCacheHandle::Empty
+    }
+}
+
+/// Describes the eviction policy for a given entry in the texture cache.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum Eviction {
+    /// The entry will be evicted under the normal rules (which differ between
+    /// standalone and shared entries).
+    Auto,
+    /// The entry will not be evicted until the policy is explicitly set to a
+    /// different value.
+    Manual,
+}
+
+// An eviction notice is a shared condition useful for detecting
+// when a TextureCacheHandle gets evicted from the TextureCache.
+// It is optionally installed to the TextureCache when an update()
+// is scheduled. A single notice may be shared among any number of
+// TextureCacheHandle updates. The notice may then be subsequently
+// checked to see if any of the updates using it have been evicted.
+#[derive(Clone, Debug, Default)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct EvictionNotice {
+    evicted: Rc<Cell<bool>>,
+}
+
+impl EvictionNotice {
+    fn notify(&self) {
+        self.evicted.set(true);
+    }
+
+    pub fn check(&self) -> bool {
+        if self.evicted.get() {
+            self.evicted.set(false);
+            true
+        } else {
+            false
+        }
+    }
+}
+
+/// The different budget types for the texture cache. Each type has its own
+/// memory budget. Once the budget is exceeded, entries with automatic eviction
+/// are evicted. Entries with manual eviction share the same budget but are not
+/// evicted once the budget is exceeded.
+/// Keeping separate budgets ensures that we don't evict entries from unrelated
+/// textures if one texture gets full.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[repr(u8)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+enum BudgetType {
+    SharedColor8Linear,
+    SharedColor8Nearest,
+    SharedColor8Glyphs,
+    SharedAlpha8,
+    SharedAlpha8Glyphs,
+    SharedAlpha16,
+    Standalone,
+}
+
+impl BudgetType {
+    pub const COUNT: usize = 7;
+
+    pub const VALUES: [BudgetType; BudgetType::COUNT] = [
+        BudgetType::SharedColor8Linear,
+        BudgetType::SharedColor8Nearest,
+        BudgetType::SharedColor8Glyphs,
+        BudgetType::SharedAlpha8,
+        BudgetType::SharedAlpha8Glyphs,
+        BudgetType::SharedAlpha16,
+        BudgetType::Standalone,
+    ];
+
+    pub const PRESSURE_COUNTERS: [usize; BudgetType::COUNT] = [
+        profiler::ATLAS_COLOR8_LINEAR_PRESSURE,
+        profiler::ATLAS_COLOR8_NEAREST_PRESSURE,
+        profiler::ATLAS_COLOR8_GLYPHS_PRESSURE,
+        profiler::ATLAS_ALPHA8_PRESSURE,
+        profiler::ATLAS_ALPHA8_GLYPHS_PRESSURE,
+        profiler::ATLAS_ALPHA16_PRESSURE,
+        profiler::ATLAS_STANDALONE_PRESSURE,
+    ];
+
+    pub fn iter() -> impl Iterator<Item = BudgetType> {
+        BudgetType::VALUES.iter().cloned()
+    }
+}
+
+/// A set of lazily allocated, fixed size, texture arrays for each format the
+/// texture cache supports.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct SharedTextures {
+    color8_nearest: AllocatorList<ShelfAllocator, TextureParameters>,
+    alpha8_linear: AllocatorList<ShelfAllocator, TextureParameters>,
+    alpha8_glyphs: AllocatorList<ShelfAllocator, TextureParameters>,
+    alpha16_linear: AllocatorList<ShelfAllocator, TextureParameters>,
+    color8_linear: AllocatorList<ShelfAllocator, TextureParameters>,
+    color8_glyphs: AllocatorList<ShelfAllocator, TextureParameters>,
+    bytes_per_texture_of_type: [i32 ; BudgetType::COUNT],
+    next_compaction_idx: usize,
+}
+
+impl SharedTextures {
+    /// Mints a new set of shared textures.
+    fn new(color_formats: TextureFormatPair<ImageFormat>, config: &TextureCacheConfig) -> Self {
+        let mut bytes_per_texture_of_type = [0 ; BudgetType::COUNT];
+
+        // Used primarily for cached shadow masks. There can be lots of
+        // these on some pages like francine, but most pages don't use it
+        // much.
+        // Most content tends to fit into two 512x512 textures. We are
+        // conservatively using 1024x1024 to fit everything in a single
+        // texture and avoid breaking batches, but it's worth checking
+        // whether it would actually lead to a lot of batch breaks in
+        // practice.
+        let alpha8_linear = AllocatorList::new(
+            config.alpha8_texture_size,
+            ShelfAllocatorOptions {
+                num_columns: 1,
+                alignment: size2(8, 8),
+                .. ShelfAllocatorOptions::default()
+            },
+            TextureParameters {
+                formats: TextureFormatPair::from(ImageFormat::R8),
+                filter: TextureFilter::Linear,
+            },
+        );
+        bytes_per_texture_of_type[BudgetType::SharedAlpha8 as usize] =
+            config.alpha8_texture_size * config.alpha8_texture_size;
+
+        // The cache for alpha glyphs (separate to help with batching).
+        let alpha8_glyphs = AllocatorList::new(
+            config.alpha8_glyph_texture_size,
+            ShelfAllocatorOptions {
+                num_columns: if config.alpha8_glyph_texture_size >= 1024 { 2 } else { 1 },
+                alignment: size2(4, 8),
+                .. ShelfAllocatorOptions::default()
+            },
+            TextureParameters {
+                formats: TextureFormatPair::from(ImageFormat::R8),
+                filter: TextureFilter::Linear,
+            },
+        );
+        bytes_per_texture_of_type[BudgetType::SharedAlpha8Glyphs as usize] =
+            config.alpha8_glyph_texture_size * config.alpha8_glyph_texture_size;
+
+        // Used for experimental hdr yuv texture support, but not used in
+        // production Firefox.
+        let alpha16_linear = AllocatorList::new(
+            config.alpha16_texture_size,
+            ShelfAllocatorOptions {
+                num_columns: if config.alpha16_texture_size >= 1024 { 2 } else { 1 },
+                alignment: size2(8, 8),
+                .. ShelfAllocatorOptions::default()
+            },
+            TextureParameters {
+                formats: TextureFormatPair::from(ImageFormat::R16),
+                filter: TextureFilter::Linear,
+            },
+        );
+        bytes_per_texture_of_type[BudgetType::SharedAlpha16 as usize] =
+            ImageFormat::R16.bytes_per_pixel() *
+            config.alpha16_texture_size * config.alpha16_texture_size;
+
+        // The primary cache for images, etc.
+        let color8_linear = AllocatorList::new(
+            config.color8_linear_texture_size,
+            ShelfAllocatorOptions {
+                num_columns: if config.color8_linear_texture_size >= 1024 { 2 } else { 1 },
+                alignment: size2(16, 16),
+                .. ShelfAllocatorOptions::default()
+            },
+            TextureParameters {
+                formats: color_formats.clone(),
+                filter: TextureFilter::Linear,
+            },
+        );
+        bytes_per_texture_of_type[BudgetType::SharedColor8Linear as usize] =
+            color_formats.internal.bytes_per_pixel() *
+            config.color8_linear_texture_size * config.color8_linear_texture_size;
+
+        // The cache for subpixel-AA and bitmap glyphs (separate to help with batching).
+        let color8_glyphs = AllocatorList::new(
+            config.color8_glyph_texture_size,
+            ShelfAllocatorOptions {
+                num_columns: if config.color8_glyph_texture_size >= 1024 { 2 } else { 1 },
+                alignment: size2(4, 8),
+                .. ShelfAllocatorOptions::default()
+            },
+            TextureParameters {
+                formats: color_formats.clone(),
+                filter: TextureFilter::Linear,
+            },
+        );
+        bytes_per_texture_of_type[BudgetType::SharedColor8Glyphs as usize] =
+            color_formats.internal.bytes_per_pixel() *
+            config.color8_glyph_texture_size * config.color8_glyph_texture_size;
+
+        // Used for image-rendering: crisp. This is mostly favicons, which
+        // are small. Some other images use it too, but those tend to be
+        // larger than 512x512 and thus don't use the shared cache anyway.
+        let color8_nearest = AllocatorList::new(
+            config.color8_nearest_texture_size,
+            ShelfAllocatorOptions::default(),
+            TextureParameters {
+                formats: color_formats.clone(),
+                filter: TextureFilter::Nearest,
+            }
+        );
+        bytes_per_texture_of_type[BudgetType::SharedColor8Nearest as usize] =
+            color_formats.internal.bytes_per_pixel() *
+            config.color8_nearest_texture_size * config.color8_nearest_texture_size;
+
+        Self {
+            alpha8_linear,
+            alpha8_glyphs,
+            alpha16_linear,
+            color8_linear,
+            color8_glyphs,
+            color8_nearest,
+            bytes_per_texture_of_type,
+            next_compaction_idx: 0,
+        }
+    }
+
+    /// Clears each texture in the set, with the given set of pending updates.
+    fn clear(&mut self, updates: &mut TextureUpdateList) {
+        let texture_dealloc_cb = &mut |texture_id| {
+            updates.push_free(texture_id);
+        };
+
+        self.alpha8_linear.clear(texture_dealloc_cb);
+        self.alpha8_glyphs.clear(texture_dealloc_cb);
+        self.alpha16_linear.clear(texture_dealloc_cb);
+        self.color8_linear.clear(texture_dealloc_cb);
+        self.color8_nearest.clear(texture_dealloc_cb);
+        self.color8_glyphs.clear(texture_dealloc_cb);
+    }
+
+    /// Returns a mutable borrow for the shared texture array matching the parameters.
+    fn select(
+        &mut self, external_format: ImageFormat, filter: TextureFilter, shader: TargetShader,
+    ) -> (&mut dyn AtlasAllocatorList<TextureParameters>, BudgetType) {
+        match external_format {
+            ImageFormat::R8 => {
+                assert_eq!(filter, TextureFilter::Linear);
+                match shader {
+                    TargetShader::Text => {
+                        (&mut self.alpha8_glyphs, BudgetType::SharedAlpha8Glyphs)
+                    },
+                    _ => (&mut self.alpha8_linear, BudgetType::SharedAlpha8),
+                }
+            }
+            ImageFormat::R16 => {
+                assert_eq!(filter, TextureFilter::Linear);
+                (&mut self.alpha16_linear, BudgetType::SharedAlpha16)
+            }
+            ImageFormat::RGBA8 |
+            ImageFormat::BGRA8 => {
+                match (filter, shader) {
+                    (TextureFilter::Linear, TargetShader::Text) => {
+                        (&mut self.color8_glyphs, BudgetType::SharedColor8Glyphs)
+                    },
+                    (TextureFilter::Linear, _) => {
+                        (&mut self.color8_linear, BudgetType::SharedColor8Linear)
+                    },
+                    (TextureFilter::Nearest, _) => {
+                        (&mut self.color8_nearest, BudgetType::SharedColor8Nearest)
+                    },
+                    _ => panic!("Unexpected filter {:?}", filter),
+                }
+            }
+            _ => panic!("Unexpected format {:?}", external_format),
+        }
+    }
+
+    /// How many bytes a single texture of the given type takes up, for the
+    /// configured texture sizes.
+    fn bytes_per_shared_texture(&self, budget_type: BudgetType) -> usize {
+        self.bytes_per_texture_of_type[budget_type as usize] as usize
+    }
+
+    fn has_multiple_textures(&self, budget_type: BudgetType) -> bool {
+        match budget_type {
+            BudgetType::SharedColor8Linear => self.color8_linear.allocated_textures() > 1,
+            BudgetType::SharedColor8Nearest => self.color8_nearest.allocated_textures() > 1,
+            BudgetType::SharedColor8Glyphs => self.color8_glyphs.allocated_textures() > 1,
+            BudgetType::SharedAlpha8 => self.alpha8_linear.allocated_textures() > 1,
+            BudgetType::SharedAlpha8Glyphs => self.alpha8_glyphs.allocated_textures() > 1,
+            BudgetType::SharedAlpha16 => self.alpha16_linear.allocated_textures() > 1,
+            BudgetType::Standalone => false,
+        }
+    }
+}
+
+/// Container struct for the various parameters used in cache allocation.
+struct CacheAllocParams {
+    descriptor: ImageDescriptor,
+    filter: TextureFilter,
+    user_data: [f32; 4],
+    uv_rect_kind: UvRectKind,
+    shader: TargetShader,
+}
+
+/// Startup parameters for the texture cache.
+///
+/// Texture sizes must be at least 512.
+#[derive(Clone)]
+pub struct TextureCacheConfig {
+    pub color8_linear_texture_size: i32,
+    pub color8_nearest_texture_size: i32,
+    pub color8_glyph_texture_size: i32,
+    pub alpha8_texture_size: i32,
+    pub alpha8_glyph_texture_size: i32,
+    pub alpha16_texture_size: i32,
+}
+
+impl TextureCacheConfig {
+    pub const DEFAULT: Self = TextureCacheConfig {
+        color8_linear_texture_size: 2048,
+        color8_nearest_texture_size: 512,
+        color8_glyph_texture_size: 2048,
+        alpha8_texture_size: 1024,
+        alpha8_glyph_texture_size: 2048,
+        alpha16_texture_size: 512,
+    };
+}
+
+/// General-purpose manager for images in GPU memory. This includes images,
+/// rasterized glyphs, rasterized blobs, cached render tasks, etc.
+///
+/// The texture cache is owned and managed by the RenderBackend thread, and
+/// produces a series of commands to manipulate the textures on the Renderer
+/// thread. These commands are executed before any rendering is performed for
+/// a given frame.
+///
+/// Entries in the texture cache are not guaranteed to live past the end of the
+/// frame in which they are requested, and may be evicted. The API supports
+/// querying whether an entry is still available.
+///
+/// The TextureCache is different from the GpuCache in that the former stores
+/// images, whereas the latter stores data and parameters for use in the shaders.
+/// This means that the texture cache can be visualized, which is a good way to
+/// understand how it works. Enabling gfx.webrender.debug.texture-cache shows a
+/// live view of its contents in Firefox.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct TextureCache {
+    /// Set of texture arrays in different formats used for the shared cache.
+    shared_textures: SharedTextures,
+
+    /// Maximum texture size supported by hardware.
+    max_texture_size: i32,
+
+    /// Maximum texture size before it is considered preferable to break the
+    /// texture into tiles.
+    tiling_threshold: i32,
+
+    /// Settings on using texture unit swizzling.
+    swizzle: Option<SwizzleSettings>,
+
+    /// The current set of debug flags.
+    debug_flags: DebugFlags,
+
+    /// The next unused virtual texture ID. Monotonically increasing.
+    pub next_id: CacheTextureId,
+
+    /// A list of allocations and updates that need to be applied to the texture
+    /// cache in the rendering thread this frame.
+    #[cfg_attr(all(feature = "serde", any(feature = "capture", feature = "replay")), serde(skip))]
+    pub pending_updates: TextureUpdateList,
+
+    /// The current `FrameStamp`. Used for cache eviction policies.
+    now: FrameStamp,
+
+    /// Cache of texture cache handles with automatic lifetime management, evicted
+    /// in a least-recently-used order.
+    lru_cache: LRUCache<CacheEntry, AutoCacheEntryMarker>,
+
+    /// Cache of texture cache entries with manual liftime management.
+    manual_entries: FreeList<CacheEntry, ManualCacheEntryMarker>,
+
+    /// Strong handles for the manual_entries FreeList.
+    manual_handles: Vec<FreeListHandle<ManualCacheEntryMarker>>,
+
+    /// Memory usage of allocated entries in all of the shared or standalone
+    /// textures. Includes both manually and automatically evicted entries.
+    bytes_allocated: [usize ; BudgetType::COUNT],
+}
+
+impl TextureCache {
+    /// The maximum number of items that will be evicted per frame. This limit helps avoid jank
+    /// on frames where we want to evict a large number of items. Instead, we'd prefer to drop
+    /// the items incrementally over a number of frames, even if that means the total allocated
+    /// size of the cache is above the desired threshold for a small number of frames.
+    const MAX_EVICTIONS_PER_FRAME: usize = 32;
+
+    pub fn new(
+        max_texture_size: i32,
+        tiling_threshold: i32,
+        color_formats: TextureFormatPair<ImageFormat>,
+        swizzle: Option<SwizzleSettings>,
+        config: &TextureCacheConfig,
+    ) -> Self {
+        let pending_updates = TextureUpdateList::new();
+
+        // Shared texture cache controls swizzling on a per-entry basis, assuming that
+        // the texture as a whole doesn't need to be swizzled (but only some entries do).
+        // It would be possible to support this, but not needed at the moment.
+        assert!(color_formats.internal != ImageFormat::BGRA8 ||
+            swizzle.map_or(true, |s| s.bgra8_sampling_swizzle == Swizzle::default())
+        );
+
+        let next_texture_id = CacheTextureId(1);
+
+        TextureCache {
+            shared_textures: SharedTextures::new(color_formats, config),
+            max_texture_size,
+            tiling_threshold,
+            swizzle,
+            debug_flags: DebugFlags::empty(),
+            next_id: next_texture_id,
+            pending_updates,
+            now: FrameStamp::INVALID,
+            lru_cache: LRUCache::new(BudgetType::COUNT),
+            manual_entries: FreeList::new(),
+            manual_handles: Vec::new(),
+            bytes_allocated: [0 ; BudgetType::COUNT],
+        }
+    }
+
+    /// Creates a TextureCache and sets it up with a valid `FrameStamp`, which
+    /// is useful for avoiding panics when instantiating the `TextureCache`
+    /// directly from unit test code.
+    #[cfg(test)]
+    pub fn new_for_testing(
+        max_texture_size: i32,
+        image_format: ImageFormat,
+    ) -> Self {
+        let mut cache = Self::new(
+            max_texture_size,
+            max_texture_size,
+            TextureFormatPair::from(image_format),
+            None,
+            &TextureCacheConfig::DEFAULT,
+        );
+        let mut now = FrameStamp::first(DocumentId::new(IdNamespace(1), 1));
+        now.advance();
+        cache.begin_frame(now, &mut TransactionProfile::new());
+        cache
+    }
+
+    pub fn set_debug_flags(&mut self, flags: DebugFlags) {
+        self.debug_flags = flags;
+    }
+
+    /// Clear all entries in the texture cache. This is a fairly drastic
+    /// step that should only be called very rarely.
+    pub fn clear_all(&mut self) {
+        // Evict all manual eviction handles
+        let manual_handles = mem::replace(
+            &mut self.manual_handles,
+            Vec::new(),
+        );
+        for handle in manual_handles {
+            let entry = self.manual_entries.free(handle);
+            self.evict_impl(entry);
+        }
+
+        // Evict all auto (LRU) cache handles
+        for budget_type in BudgetType::iter() {
+            while let Some(entry) = self.lru_cache.pop_oldest(budget_type as u8) {
+                entry.evict();
+                self.free(&entry);
+            }
+        }
+
+        // Free the picture and shared textures
+        self.shared_textures.clear(&mut self.pending_updates);
+        self.pending_updates.note_clear();
+    }
+
+    /// Called at the beginning of each frame.
+    pub fn begin_frame(&mut self, stamp: FrameStamp, profile: &mut TransactionProfile) {
+        debug_assert!(!self.now.is_valid());
+        profile_scope!("begin_frame");
+        self.now = stamp;
+
+        // Texture cache eviction is done at the start of the frame. This ensures that
+        // we won't evict items that have been requested on this frame.
+        // It also frees up space in the cache for items allocated later in the frame
+        // potentially reducing texture allocations and fragmentation.
+        self.evict_items_from_cache_if_required(profile);
+    }
+
+    pub fn end_frame(&mut self, profile: &mut TransactionProfile) {
+        debug_assert!(self.now.is_valid());
+
+        let updates = &mut self.pending_updates; // To avoid referring to self in the closure.
+        let callback = &mut|texture_id| { updates.push_free(texture_id); };
+
+        // Release of empty shared textures is done at the end of the frame. That way, if the
+        // eviction at the start of the frame frees up a texture, that is then subsequently
+        // used during the frame, we avoid doing a free/alloc for it.
+        self.shared_textures.alpha8_linear.release_empty_textures(callback);
+        self.shared_textures.alpha8_glyphs.release_empty_textures(callback);
+        self.shared_textures.alpha16_linear.release_empty_textures(callback);
+        self.shared_textures.color8_linear.release_empty_textures(callback);
+        self.shared_textures.color8_nearest.release_empty_textures(callback);
+        self.shared_textures.color8_glyphs.release_empty_textures(callback);
+
+        for budget in BudgetType::iter() {
+            let threshold = self.get_eviction_threshold(budget);
+            let pressure = self.bytes_allocated[budget as usize] as f32 / threshold as f32;
+            profile.set(BudgetType::PRESSURE_COUNTERS[budget as usize], pressure);
+        }
+
+        profile.set(profiler::ATLAS_A8_PIXELS, self.shared_textures.alpha8_linear.allocated_space());
+        profile.set(profiler::ATLAS_A8_TEXTURES, self.shared_textures.alpha8_linear.allocated_textures());
+        profile.set(profiler::ATLAS_A8_GLYPHS_PIXELS, self.shared_textures.alpha8_glyphs.allocated_space());
+        profile.set(profiler::ATLAS_A8_GLYPHS_TEXTURES, self.shared_textures.alpha8_glyphs.allocated_textures());
+        profile.set(profiler::ATLAS_A16_PIXELS, self.shared_textures.alpha16_linear.allocated_space());
+        profile.set(profiler::ATLAS_A16_TEXTURES, self.shared_textures.alpha16_linear.allocated_textures());
+        profile.set(profiler::ATLAS_RGBA8_LINEAR_PIXELS, self.shared_textures.color8_linear.allocated_space());
+        profile.set(profiler::ATLAS_RGBA8_LINEAR_TEXTURES, self.shared_textures.color8_linear.allocated_textures());
+        profile.set(profiler::ATLAS_RGBA8_NEAREST_PIXELS, self.shared_textures.color8_nearest.allocated_space());
+        profile.set(profiler::ATLAS_RGBA8_NEAREST_TEXTURES, self.shared_textures.color8_nearest.allocated_textures());
+        profile.set(profiler::ATLAS_RGBA8_GLYPHS_PIXELS, self.shared_textures.color8_glyphs.allocated_space());
+        profile.set(profiler::ATLAS_RGBA8_GLYPHS_TEXTURES, self.shared_textures.color8_glyphs.allocated_textures());
+
+        let shared_bytes = [
+            BudgetType::SharedColor8Linear,
+            BudgetType::SharedColor8Nearest,
+            BudgetType::SharedColor8Glyphs,
+            BudgetType::SharedAlpha8,
+            BudgetType::SharedAlpha8Glyphs,
+            BudgetType::SharedAlpha16,
+        ].iter().map(|b| self.bytes_allocated[*b as usize]).sum();
+
+        profile.set(profiler::ATLAS_ITEMS_MEM, profiler::bytes_to_mb(shared_bytes));
+
+        self.now = FrameStamp::INVALID;
+    }
+
+    pub fn run_compaction(&mut self, gpu_cache: &mut GpuCache) {
+        // Use the same order as BudgetType::VALUES so that we can index self.bytes_allocated
+        // with the same index.
+        let allocator_lists = [
+            &mut self.shared_textures.color8_linear,
+            &mut self.shared_textures.color8_nearest,
+            &mut self.shared_textures.color8_glyphs,
+            &mut self.shared_textures.alpha8_linear,
+            &mut self.shared_textures.alpha8_glyphs,
+            &mut self.shared_textures.alpha16_linear,
+        ];
+
+        // Pick a texture type on which to try to run the compaction logic this frame.
+        let idx = self.shared_textures.next_compaction_idx;
+
+        // Number of moved pixels after which we stop attempting to move more items for this frame.
+        // The constant is up for adjustment, the main goal is to avoid causing frame spikes on
+        // low end GPUs.
+        let area_threshold = 512*512; 
+
+        let mut changes = Vec::new();
+        allocator_lists[idx].try_compaction(area_threshold, &mut changes);
+
+        if changes.is_empty() {
+            // Nothing to do, we'll try another texture type next frame.
+            self.shared_textures.next_compaction_idx = (self.shared_textures.next_compaction_idx + 1) % allocator_lists.len();
+        }
+
+        for change in changes {
+            let bpp = allocator_lists[idx].texture_parameters().formats.internal.bytes_per_pixel();
+
+            // While the area of the image does not change, the area it occupies in the texture
+            // atlas may (in other words the number of wasted pixels can change), so we have
+            // to keep track of that.
+            let old_bytes = (change.old_rect.area() * bpp) as usize;
+            let new_bytes = (change.new_rect.area() * bpp) as usize;
+            self.bytes_allocated[idx] -= old_bytes;
+            self.bytes_allocated[idx] += new_bytes;
+
+            let entry = match change.handle {
+                TextureCacheHandle::Auto(handle) => self.lru_cache.get_opt_mut(&handle).unwrap(),
+                TextureCacheHandle::Manual(handle) => self.manual_entries.get_opt_mut(&handle).unwrap(),
+                TextureCacheHandle::Empty => { panic!("invalid handle"); }
+            };
+            entry.texture_id = change.new_tex;
+            entry.details = EntryDetails::Cache {
+                origin: change.new_rect.min,
+                alloc_id: change.new_id,
+                allocated_size_in_bytes: new_bytes,
+            };
+
+            gpu_cache.invalidate(&entry.uv_rect_handle);
+            entry.uv_rect_handle = GpuCacheHandle::new();
+
+            let src_rect = DeviceIntRect::from_origin_and_size(change.old_rect.min, entry.size);
+            let dst_rect = DeviceIntRect::from_origin_and_size(change.new_rect.min, entry.size);
+
+            self.pending_updates.push_copy(change.old_tex, &src_rect, change.new_tex, &dst_rect);
+
+            if self.debug_flags.contains(
+                DebugFlags::TEXTURE_CACHE_DBG |
+                DebugFlags::TEXTURE_CACHE_DBG_CLEAR_EVICTED)
+            {
+                self.pending_updates.push_debug_clear(
+                    change.old_tex,
+                    src_rect.min,
+                    src_rect.width(),
+                    src_rect.height(),
+                );
+            }
+        }
+    }
+
+    // Request an item in the texture cache. All images that will
+    // be used on a frame *must* have request() called on their
+    // handle, to update the last used timestamp and ensure
+    // that resources are not flushed from the cache too early.
+    //
+    // Returns true if the image needs to be uploaded to the
+    // texture cache (either never uploaded, or has been
+    // evicted on a previous frame).
+    pub fn request(&mut self, handle: &TextureCacheHandle, gpu_cache: &mut GpuCache) -> bool {
+        let now = self.now;
+        let entry = match handle {
+            TextureCacheHandle::Empty => None,
+            TextureCacheHandle::Auto(handle) => {
+                // Call touch rather than get_opt_mut so that the LRU index
+                // knows that the entry has been used.
+                self.lru_cache.touch(handle)
+            },
+            TextureCacheHandle::Manual(handle) => {
+                self.manual_entries.get_opt_mut(handle)
+            },
+        };
+        entry.map_or(true, |entry| {
+            // If an image is requested that is already in the cache,
+            // refresh the GPU cache data associated with this item.
+            entry.last_access = now;
+            entry.update_gpu_cache(gpu_cache);
+            false
+        })
+    }
+
+    fn get_entry_opt(&self, handle: &TextureCacheHandle) -> Option<&CacheEntry> {
+        match handle {
+            TextureCacheHandle::Empty => None,
+            TextureCacheHandle::Auto(handle) => self.lru_cache.get_opt(handle),
+            TextureCacheHandle::Manual(handle) => self.manual_entries.get_opt(handle),
+        }
+    }
+
+    fn get_entry_opt_mut(&mut self, handle: &TextureCacheHandle) -> Option<&mut CacheEntry> {
+        match handle {
+            TextureCacheHandle::Empty => None,
+            TextureCacheHandle::Auto(handle) => self.lru_cache.get_opt_mut(handle),
+            TextureCacheHandle::Manual(handle) => self.manual_entries.get_opt_mut(handle),
+        }
+    }
+
+    // Returns true if the image needs to be uploaded to the
+    // texture cache (either never uploaded, or has been
+    // evicted on a previous frame).
+    pub fn needs_upload(&self, handle: &TextureCacheHandle) -> bool {
+        !self.is_allocated(handle)
+    }
+
+    pub fn max_texture_size(&self) -> i32 {
+        self.max_texture_size
+    }
+
+    pub fn tiling_threshold(&self) -> i32 {
+        self.tiling_threshold
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn color_formats(&self) -> TextureFormatPair<ImageFormat> {
+        self.shared_textures.color8_linear.texture_parameters().formats.clone()
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn swizzle_settings(&self) -> Option<SwizzleSettings> {
+        self.swizzle
+    }
+
+    pub fn pending_updates(&mut self) -> TextureUpdateList {
+        mem::replace(&mut self.pending_updates, TextureUpdateList::new())
+    }
+
+    // Update the data stored by a given texture cache handle.
+    pub fn update(
+        &mut self,
+        handle: &mut TextureCacheHandle,
+        descriptor: ImageDescriptor,
+        filter: TextureFilter,
+        data: Option<CachedImageData>,
+        user_data: [f32; 4],
+        mut dirty_rect: ImageDirtyRect,
+        gpu_cache: &mut GpuCache,
+        eviction_notice: Option<&EvictionNotice>,
+        uv_rect_kind: UvRectKind,
+        eviction: Eviction,
+        shader: TargetShader,
+    ) {
+        debug_assert!(self.now.is_valid());
+        // Determine if we need to allocate texture cache memory
+        // for this item. We need to reallocate if any of the following
+        // is true:
+        // - Never been in the cache
+        // - Has been in the cache but was evicted.
+        // - Exists in the cache but dimensions / format have changed.
+        let realloc = match self.get_entry_opt(handle) {
+            Some(entry) => {
+                entry.size != descriptor.size || (entry.input_format != descriptor.format &&
+                    entry.alternative_input_format() != descriptor.format)
+            }
+            None => {
+                // Not allocated, or was previously allocated but has been evicted.
+                true
+            }
+        };
+
+        if realloc {
+            let params = CacheAllocParams { descriptor, filter, user_data, uv_rect_kind, shader };
+            self.allocate(&params, handle, eviction);
+
+            // If we reallocated, we need to upload the whole item again.
+            dirty_rect = DirtyRect::All;
+        }
+
+        let entry = self.get_entry_opt_mut(handle)
+            .expect("BUG: There must be an entry at this handle now");
+
+        // Install the new eviction notice for this update, if applicable.
+        entry.eviction_notice = eviction_notice.cloned();
+        entry.uv_rect_kind = uv_rect_kind;
+
+        // Invalidate the contents of the resource rect in the GPU cache.
+        // This ensures that the update_gpu_cache below will add
+        // the new information to the GPU cache.
+        //TODO: only invalidate if the parameters change?
+        gpu_cache.invalidate(&entry.uv_rect_handle);
+
+        // Upload the resource rect and texture array layer.
+        entry.update_gpu_cache(gpu_cache);
+
+        // Create an update command, which the render thread processes
+        // to upload the new image data into the correct location
+        // in GPU memory.
+        if let Some(data) = data {
+            // If the swizzling is supported, we always upload in the internal
+            // texture format (thus avoiding the conversion by the driver).
+            // Otherwise, pass the external format to the driver.
+            let origin = entry.details.describe();
+            let texture_id = entry.texture_id;
+            let size = entry.size;
+            let use_upload_format = self.swizzle.is_none();
+            let op = TextureCacheUpdate::new_update(
+                data,
+                &descriptor,
+                origin,
+                size,
+                use_upload_format,
+                &dirty_rect,
+            );
+            self.pending_updates.push_update(texture_id, op);
+        }
+    }
+
+    // Check if a given texture handle has a valid allocation
+    // in the texture cache.
+    pub fn is_allocated(&self, handle: &TextureCacheHandle) -> bool {
+        self.get_entry_opt(handle).is_some()
+    }
+
+    // Return the allocated size of the texture handle's associated data,
+    // or otherwise indicate the handle is invalid.
+    pub fn get_allocated_size(&self, handle: &TextureCacheHandle) -> Option<usize> {
+        self.get_entry_opt(handle).map(|entry| {
+            (entry.input_format.bytes_per_pixel() * entry.size.area()) as usize
+        })
+    }
+
+    // Retrieve the details of an item in the cache. This is used
+    // during batch creation to provide the resource rect address
+    // to the shaders and texture ID to the batching logic.
+    // This function will assert in debug modes if the caller
+    // tries to get a handle that was not requested this frame.
+    pub fn get(&self, handle: &TextureCacheHandle) -> CacheItem {
+        let (texture_id, uv_rect, swizzle, uv_rect_handle, user_data) = self.get_cache_location(handle);
+        CacheItem {
+            uv_rect_handle,
+            texture_id: TextureSource::TextureCache(
+                texture_id,
+                swizzle,
+            ),
+            uv_rect,
+            user_data,
+        }
+    }
+
+    /// A more detailed version of get(). This allows access to the actual
+    /// device rect of the cache allocation.
+    ///
+    /// Returns a tuple identifying the texture, the layer, the region,
+    /// and its GPU handle.
+    pub fn get_cache_location(
+        &self,
+        handle: &TextureCacheHandle,
+    ) -> (CacheTextureId, DeviceIntRect, Swizzle, GpuCacheHandle, [f32; 4]) {
+        let entry = self
+            .get_entry_opt(handle)
+            .expect("BUG: was dropped from cache or not updated!");
+        debug_assert_eq!(entry.last_access, self.now);
+        let origin = entry.details.describe();
+        (
+            entry.texture_id,
+            DeviceIntRect::from_origin_and_size(origin, entry.size),
+            entry.swizzle,
+            entry.uv_rect_handle,
+            entry.user_data,
+        )
+    }
+
+    /// Internal helper function to evict a strong texture cache handle
+    fn evict_impl(
+        &mut self,
+        entry: CacheEntry,
+    ) {
+        entry.evict();
+        self.free(&entry);
+    }
+
+    /// Evict a texture cache handle that was previously set to be in manual
+    /// eviction mode.
+    pub fn evict_handle(&mut self, handle: &TextureCacheHandle) {
+        match handle {
+            TextureCacheHandle::Manual(handle) => {
+                // Find the strong handle that matches this weak handle. If this
+                // ever shows up in profiles, we can make it a hash (but the number
+                // of manual eviction handles is typically small).
+                // Alternatively, we could make a more forgiving FreeList variant
+                // which does not differentiate between strong and weak handles.
+                let index = self.manual_handles.iter().position(|strong_handle| {
+                    strong_handle.matches(handle)
+                });
+                if let Some(index) = index {
+                    let handle = self.manual_handles.swap_remove(index);
+                    let entry = self.manual_entries.free(handle);
+                    self.evict_impl(entry);
+                }
+            }
+            TextureCacheHandle::Auto(handle) => {
+                if let Some(entry) = self.lru_cache.remove(handle) {
+                    self.evict_impl(entry);
+                }
+            }
+            _ => {}
+        }
+    }
+
+    pub fn dump_color8_linear_as_svg(&self, output: &mut dyn std::io::Write) -> std::io::Result<()> {
+        self.shared_textures.color8_linear.dump_as_svg(output)
+    }
+
+    pub fn dump_color8_glyphs_as_svg(&self, output: &mut dyn std::io::Write) -> std::io::Result<()> {
+        self.shared_textures.color8_glyphs.dump_as_svg(output)
+    }
+
+    pub fn dump_alpha8_glyphs_as_svg(&self, output: &mut dyn std::io::Write) -> std::io::Result<()> {
+        self.shared_textures.alpha8_glyphs.dump_as_svg(output)
+    }
+
+    pub fn dump_alpha8_linear_as_svg(&self, output: &mut dyn std::io::Write) -> std::io::Result<()> {
+        self.shared_textures.alpha8_linear.dump_as_svg(output)
+    }
+
+    /// Get the eviction threshold, in bytes, for the given budget type.
+    fn get_eviction_threshold(&self, budget_type: BudgetType) -> usize {
+        if budget_type == BudgetType::Standalone {
+            // For standalone textures, the only reason to evict textures is
+            // to save GPU memory. Batching / draw call concerns do not apply
+            // to standalone textures, because unused textures don't cause
+            // extra draw calls.
+            return 8 * 1024 * 1024;
+        }
+
+        // For shared textures, evicting an entry only frees up GPU memory if it
+        // causes one of the shared textures to become empty, so we want to avoid
+        // getting slightly above the capacity of a texture.
+        // The other concern for shared textures is batching: The entries that
+        // are needed in the current frame should be distributed across as few
+        // shared textures as possible, to minimize the number of draw calls.
+        // Ideally we only want one texture per type under simple workloads.
+
+        let bytes_per_texture = self.shared_textures.bytes_per_shared_texture(budget_type);
+
+        // Number of allocated bytes under which we don't bother with evicting anything
+        // from the cache. Above the threshold we consider evicting the coldest items
+        // depending on how cold they are.
+        //
+        // Above all else we want to make sure that even after a heavy workload, the
+        // shared cache settles back to a single texture atlas per type over some reasonable
+        // period of time.
+        // This is achieved by the compaction logic which will try to consolidate items that
+        // are spread over multiple textures into few ones, and by evicting old items
+        // so that the compaction logic has room to do its job.
+        //
+        // The other goal is to leave enough empty space in the texture atlases
+        // so that we are not too likely to have to allocate a new texture atlas on
+        // the next frame if we switch to a new tab or load a new page. That's why
+        // the following thresholds are rather low. Note that even when above the threshold,
+        // we only evict cold items and ramp up the eviction pressure depending on the amount
+        // of allocated memory (See should_continue_evicting).
+        let ideal_utilization = match budget_type {
+            BudgetType::SharedAlpha8Glyphs | BudgetType::SharedColor8Glyphs => {
+                // Glyphs are usually small and tightly packed so they waste very little
+                // space in the cache.
+                bytes_per_texture * 2 / 3
+            }
+            _ => {
+                // Other types of images come with a variety of sizes making them more
+                // prone to wasting pixels and causing fragmentation issues so we put
+                // more pressure on them.
+                bytes_per_texture / 3
+            }
+        };
+
+        ideal_utilization
+    }
+
+    /// Returns whether to continue eviction and how cold an item need to be to be evicted.
+    ///
+    /// If the None is returned, stop evicting.
+    /// If the Some(n) is returned, continue evicting if the coldest item hasn't been used
+    /// for more than n frames.
+    fn should_continue_evicting(
+        &self,
+        budget_type: BudgetType,
+        eviction_count: usize,
+    ) -> Option<usize> {
+
+        let threshold = self.get_eviction_threshold(budget_type);
+        let bytes_allocated = self.bytes_allocated[budget_type as usize];
+
+        let uses_multiple_atlases = self.shared_textures.has_multiple_textures(budget_type);
+
+        // If current memory usage is below selected threshold, we can stop evicting items
+        // except when using shared texture atlases and more than one texture is in use.
+        // This is not very common but can happen due to fragmentation and the only way
+        // to get rid of that fragmentation is to continue evicting.
+        if bytes_allocated < threshold && !uses_multiple_atlases {
+            return None;
+        }
+
+        // Number of frames since last use that is considered too recent for eviction,
+        // depending on the cache pressure.
+        let age_theshold = match bytes_allocated / threshold {
+            0 => 400,
+            1 => 200,
+            2 => 100,
+            3 => 50,
+            4 => 25,
+            5 => 10,
+            6 => 5,
+            _ => 1,
+        };
+
+        // If current memory usage is significantly more than the threshold, keep evicting this frame
+        if bytes_allocated > 4 * threshold {
+            return Some(age_theshold);
+        }
+
+        // Otherwise, only allow evicting up to a certain number of items per frame. This allows evictions
+        // to be spread over a number of frames, to avoid frame spikes.
+        if eviction_count < Self::MAX_EVICTIONS_PER_FRAME {
+            return Some(age_theshold)
+        }
+
+        None
+    }
+
+
+    /// Evict old items from the shared and standalone caches, if we're over a
+    /// threshold memory usage value
+    fn evict_items_from_cache_if_required(&mut self, profile: &mut TransactionProfile) {
+        let previous_frame_id = self.now.frame_id() - 1;
+        let mut eviction_count = 0;
+        let mut youngest_evicted = FrameId::first();
+
+        for budget in BudgetType::iter() {
+            while let Some(age_threshold) = self.should_continue_evicting(
+                budget,
+                eviction_count,
+            ) {
+                if let Some(entry) = self.lru_cache.peek_oldest(budget as u8) {
+                    // Only evict this item if it wasn't used in the previous frame. The reason being that if it
+                    // was used the previous frame then it will likely be used in this frame too, and we don't
+                    // want to be continually evicting and reuploading the item every frame.
+                    if entry.last_access.frame_id() + age_threshold > previous_frame_id {
+                        // Since the LRU cache is ordered by frame access, we can break out of the loop here because
+                        // we know that all remaining items were also used in the previous frame (or more recently).
+                        break;
+                    }
+                    if entry.last_access.frame_id() > youngest_evicted {
+                        youngest_evicted = entry.last_access.frame_id();
+                    }
+                    let entry = self.lru_cache.pop_oldest(budget as u8).unwrap();
+                    entry.evict();
+                    self.free(&entry);
+                    eviction_count += 1;
+                } else {
+                    // The LRU cache is empty, all remaining items use manual
+                    // eviction. In this case, there's nothing we can do until
+                    // the calling code manually evicts items to reduce the
+                    // allocated cache size.
+                    break;
+                }
+            }
+        }
+
+        if eviction_count > 0 {
+            profile.set(profiler::TEXTURE_CACHE_EVICTION_COUNT, eviction_count);
+            profile.set(
+                profiler::TEXTURE_CACHE_YOUNGEST_EVICTION,
+                self.now.frame_id().as_usize() - youngest_evicted.as_usize()
+            );
+        }
+    }
+
+    // Free a cache entry from the standalone list or shared cache.
+    fn free(&mut self, entry: &CacheEntry) {
+        match entry.details {
+            EntryDetails::Standalone { size_in_bytes, .. } => {
+                self.bytes_allocated[BudgetType::Standalone as usize] -= size_in_bytes;
+
+                // This is a standalone texture allocation. Free it directly.
+                self.pending_updates.push_free(entry.texture_id);
+            }
+            EntryDetails::Cache { origin, alloc_id, allocated_size_in_bytes } => {
+                let (allocator_list, budget_type) = self.shared_textures.select(
+                    entry.input_format,
+                    entry.filter,
+                    entry.shader,
+                );
+
+                allocator_list.deallocate(entry.texture_id, alloc_id);
+
+                self.bytes_allocated[budget_type as usize] -= allocated_size_in_bytes;
+
+                if self.debug_flags.contains(
+                    DebugFlags::TEXTURE_CACHE_DBG |
+                    DebugFlags::TEXTURE_CACHE_DBG_CLEAR_EVICTED)
+                {
+                    self.pending_updates.push_debug_clear(
+                        entry.texture_id,
+                        origin,
+                        entry.size.width,
+                        entry.size.height,
+                    );
+                }
+            }
+        }
+    }
+
+    /// Allocate a block from the shared cache.
+    fn allocate_from_shared_cache(
+        &mut self,
+        params: &CacheAllocParams,
+    ) -> (CacheEntry, BudgetType) {
+        let (allocator_list, budget_type) = self.shared_textures.select(
+            params.descriptor.format,
+            params.filter,
+            params.shader,
+        );
+
+        // To avoid referring to self in the closure.
+        let next_id = &mut self.next_id;
+        let pending_updates = &mut self.pending_updates;
+
+        let (texture_id, alloc_id, allocated_rect) = allocator_list.allocate(
+            params.descriptor.size,
+            &mut |size, parameters| {
+                let texture_id = *next_id;
+                next_id.0 += 1;
+                pending_updates.push_alloc(
+                    texture_id,
+                    TextureCacheAllocInfo {
+                        target: ImageBufferKind::Texture2D,
+                        width: size.width,
+                        height: size.height,
+                        format: parameters.formats.internal,
+                        filter: parameters.filter,
+                        is_shared_cache: true,
+                        has_depth: false,
+                        category: TextureCacheCategory::Atlas,
+                    },
+                );
+
+                texture_id
+            },
+        );
+
+        let formats = &allocator_list.texture_parameters().formats;
+
+        let swizzle = if formats.external == params.descriptor.format {
+            Swizzle::default()
+        } else {
+            match self.swizzle {
+                Some(_) => Swizzle::Bgra,
+                None => Swizzle::default(),
+            }
+        };
+
+        let bpp = formats.internal.bytes_per_pixel();
+        let allocated_size_in_bytes = (allocated_rect.area() * bpp) as usize;
+        self.bytes_allocated[budget_type as usize] += allocated_size_in_bytes;
+
+        (CacheEntry {
+            size: params.descriptor.size,
+            user_data: params.user_data,
+            last_access: self.now,
+            details: EntryDetails::Cache {
+                origin: allocated_rect.min,
+                alloc_id,
+                allocated_size_in_bytes,
+            },
+            uv_rect_handle: GpuCacheHandle::new(),
+            input_format: params.descriptor.format,
+            filter: params.filter,
+            swizzle,
+            texture_id,
+            eviction_notice: None,
+            uv_rect_kind: params.uv_rect_kind,
+            shader: params.shader
+        }, budget_type)
+    }
+
+    // Returns true if the given image descriptor *may* be
+    // placed in the shared texture cache.
+    pub fn is_allowed_in_shared_cache(
+        &self,
+        filter: TextureFilter,
+        descriptor: &ImageDescriptor,
+    ) -> bool {
+        let mut allowed_in_shared_cache = true;
+
+        if matches!(descriptor.format, ImageFormat::RGBA8 | ImageFormat::BGRA8)
+            && filter == TextureFilter::Linear
+        {
+            // Allow the maximum that can fit in the linear color texture's two column layout.
+            let max = self.shared_textures.color8_linear.size() / 2;
+            allowed_in_shared_cache = descriptor.size.width.max(descriptor.size.height) <= max;
+        } else if descriptor.size.width > TEXTURE_REGION_DIMENSIONS {
+            allowed_in_shared_cache = false;
+        }
+
+        if descriptor.size.height > TEXTURE_REGION_DIMENSIONS {
+            allowed_in_shared_cache = false;
+        }
+
+        // TODO(gw): For now, alpha formats of the texture cache can only be linearly sampled.
+        //           Nearest sampling gets a standalone texture.
+        //           This is probably rare enough that it can be fixed up later.
+        if filter == TextureFilter::Nearest &&
+           descriptor.format.bytes_per_pixel() <= 2
+        {
+            allowed_in_shared_cache = false;
+        }
+
+        allowed_in_shared_cache
+    }
+
+    /// Allocate a render target via the pending updates sent to the renderer
+    pub fn alloc_render_target(
+        &mut self,
+        size: DeviceIntSize,
+        format: ImageFormat,
+    ) -> CacheTextureId {
+        let texture_id = self.next_id;
+        self.next_id.0 += 1;
+
+        // Push a command to allocate device storage of the right size / format.
+        let info = TextureCacheAllocInfo {
+            target: ImageBufferKind::Texture2D,
+            width: size.width,
+            height: size.height,
+            format,
+            filter: TextureFilter::Linear,
+            is_shared_cache: false,
+            has_depth: false,
+            category: TextureCacheCategory::RenderTarget,
+        };
+
+        self.pending_updates.push_alloc(texture_id, info);
+
+        texture_id
+    }
+
+    /// Free an existing render target
+    pub fn free_render_target(
+        &mut self,
+        id: CacheTextureId,
+    ) {
+        self.pending_updates.push_free(id);
+    }
+
+    /// Allocates a new standalone cache entry.
+    fn allocate_standalone_entry(
+        &mut self,
+        params: &CacheAllocParams,
+    ) -> (CacheEntry, BudgetType) {
+        let texture_id = self.next_id;
+        self.next_id.0 += 1;
+
+        // Push a command to allocate device storage of the right size / format.
+        let info = TextureCacheAllocInfo {
+            target: ImageBufferKind::Texture2D,
+            width: params.descriptor.size.width,
+            height: params.descriptor.size.height,
+            format: params.descriptor.format,
+            filter: params.filter,
+            is_shared_cache: false,
+            has_depth: false,
+            category: TextureCacheCategory::Standalone,
+        };
+
+        let size_in_bytes = (info.width * info.height * info.format.bytes_per_pixel()) as usize;
+        self.bytes_allocated[BudgetType::Standalone as usize] += size_in_bytes;
+
+        self.pending_updates.push_alloc(texture_id, info);
+
+        // Special handing for BGRA8 textures that may need to be swizzled.
+        let swizzle = if params.descriptor.format == ImageFormat::BGRA8 {
+            self.swizzle.map(|s| s.bgra8_sampling_swizzle)
+        } else {
+            None
+        };
+
+        (CacheEntry::new_standalone(
+            texture_id,
+            self.now,
+            params,
+            swizzle.unwrap_or_default(),
+            size_in_bytes,
+        ), BudgetType::Standalone)
+    }
+
+    /// Allocates a cache entry for the given parameters, and updates the
+    /// provided handle to point to the new entry.
+    fn allocate(
+        &mut self,
+        params: &CacheAllocParams,
+        handle: &mut TextureCacheHandle,
+        eviction: Eviction,
+    ) {
+        debug_assert!(self.now.is_valid());
+        assert!(!params.descriptor.size.is_empty());
+
+        // If this image doesn't qualify to go in the shared (batching) cache,
+        // allocate a standalone entry.
+        let use_shared_cache = self.is_allowed_in_shared_cache(params.filter, &params.descriptor);
+        let (new_cache_entry, budget_type) = if use_shared_cache {
+            self.allocate_from_shared_cache(params)
+        } else {
+            self.allocate_standalone_entry(params)
+        };
+
+        let details = new_cache_entry.details.clone();
+        let texture_id = new_cache_entry.texture_id;
+
+        // If the handle points to a valid cache entry, we want to replace the
+        // cache entry with our newly updated location. We also need to ensure
+        // that the storage (region or standalone) associated with the previous
+        // entry here gets freed.
+        //
+        // If the handle is invalid, we need to insert the data, and append the
+        // result to the corresponding vector.
+        let old_entry = match (&mut *handle, eviction) {
+            (TextureCacheHandle::Auto(handle), Eviction::Auto) => {
+                self.lru_cache.replace_or_insert(handle, budget_type as u8, new_cache_entry)
+            },
+            (TextureCacheHandle::Manual(handle), Eviction::Manual) => {
+                let entry = self.manual_entries.get_opt_mut(handle)
+                    .expect("Don't call this after evicting");
+                Some(mem::replace(entry, new_cache_entry))
+            },
+            (TextureCacheHandle::Manual(_), Eviction::Auto) |
+            (TextureCacheHandle::Auto(_), Eviction::Manual) => {
+                panic!("Can't change eviction policy after initial allocation");
+            },
+            (TextureCacheHandle::Empty, Eviction::Auto) => {
+                let new_handle = self.lru_cache.push_new(budget_type as u8, new_cache_entry);
+                *handle = TextureCacheHandle::Auto(new_handle);
+                None
+            },
+            (TextureCacheHandle::Empty, Eviction::Manual) => {
+                let manual_handle = self.manual_entries.insert(new_cache_entry);
+                let new_handle = manual_handle.weak();
+                self.manual_handles.push(manual_handle);
+                *handle = TextureCacheHandle::Manual(new_handle);
+                None
+            },
+        };
+        if let Some(old_entry) = old_entry {
+            old_entry.evict();
+            self.free(&old_entry);
+        }
+
+        if let EntryDetails::Cache { alloc_id, .. } = details {
+            let allocator_list = self.shared_textures.select(
+                params.descriptor.format,
+                params.filter,
+                params.shader,
+            ).0;
+
+            allocator_list.set_handle(texture_id, alloc_id, handle);
+        }
+    }
+
+    pub fn shared_alpha_expected_format(&self) -> ImageFormat {
+        self.shared_textures.alpha8_linear.texture_parameters().formats.external
+    }
+
+    pub fn shared_color_expected_format(&self) -> ImageFormat {
+        self.shared_textures.color8_linear.texture_parameters().formats.external
+    }
+
+
+    #[cfg(test)]
+    pub fn total_allocated_bytes_for_testing(&self) -> usize {
+        BudgetType::iter().map(|b| self.bytes_allocated[b as usize]).sum()
+    }
+
+    pub fn report_memory(&self, ops: &mut MallocSizeOfOps) -> usize {
+        self.lru_cache.size_of(ops)
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct TextureParameters {
+    pub formats: TextureFormatPair<ImageFormat>,
+    pub filter: TextureFilter,
+}
+
+impl TextureCacheUpdate {
+    // Constructs a TextureCacheUpdate operation to be passed to the
+    // rendering thread in order to do an upload to the right
+    // location in the texture cache.
+    fn new_update(
+        data: CachedImageData,
+        descriptor: &ImageDescriptor,
+        origin: DeviceIntPoint,
+        size: DeviceIntSize,
+        use_upload_format: bool,
+        dirty_rect: &ImageDirtyRect,
+    ) -> TextureCacheUpdate {
+        let source = match data {
+            CachedImageData::Blob => {
+                panic!("The vector image should have been rasterized.");
+            }
+            CachedImageData::External(ext_image) => match ext_image.image_type {
+                ExternalImageType::TextureHandle(_) => {
+                    panic!("External texture handle should not go through texture_cache.");
+                }
+                ExternalImageType::Buffer => TextureUpdateSource::External {
+                    id: ext_image.id,
+                    channel_index: ext_image.channel_index,
+                },
+            },
+            CachedImageData::Raw(bytes) => {
+                let finish = descriptor.offset +
+                    descriptor.size.width * descriptor.format.bytes_per_pixel() +
+                    (descriptor.size.height - 1) * descriptor.compute_stride();
+                assert!(bytes.len() >= finish as usize);
+
+                TextureUpdateSource::Bytes { data: bytes }
+            }
+        };
+        let format_override = if use_upload_format {
+            Some(descriptor.format)
+        } else {
+            None
+        };
+
+        match *dirty_rect {
+            DirtyRect::Partial(dirty) => {
+                // the dirty rectangle doesn't have to be within the area but has to intersect it, at least
+                let stride = descriptor.compute_stride();
+                let offset = descriptor.offset + dirty.min.y * stride + dirty.min.x * descriptor.format.bytes_per_pixel();
+
+                TextureCacheUpdate {
+                    rect: DeviceIntRect::from_origin_and_size(
+                        DeviceIntPoint::new(origin.x + dirty.min.x, origin.y + dirty.min.y),
+                        DeviceIntSize::new(
+                            dirty.width().min(size.width - dirty.min.x),
+                            dirty.height().min(size.height - dirty.min.y),
+                        ),
+                    ),
+                    source,
+                    stride: Some(stride),
+                    offset,
+                    format_override,
+                }
+            }
+            DirtyRect::All => {
+                TextureCacheUpdate {
+                    rect: DeviceIntRect::from_origin_and_size(origin, size),
+                    source,
+                    stride: descriptor.stride,
+                    offset: descriptor.offset,
+                    format_override,
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test_texture_cache {
+    #[test]
+    fn check_allocation_size_balance() {
+        // Allocate some glyphs, observe the total allocation size, and free
+        // the glyphs again. Check that the total allocation size is back at the
+        // original value.
+
+        use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction, TargetShader};
+        use crate::gpu_cache::GpuCache;
+        use crate::device::TextureFilter;
+        use crate::gpu_types::UvRectKind;
+        use api::{ImageDescriptor, ImageDescriptorFlags, ImageFormat, DirtyRect};
+        use api::units::*;
+        use euclid::size2;
+        let mut texture_cache = TextureCache::new_for_testing(2048, ImageFormat::BGRA8);
+        let mut gpu_cache = GpuCache::new_for_testing();
+
+        let sizes: &[DeviceIntSize] = &[
+            size2(23, 27),
+            size2(15, 22),
+            size2(11, 5),
+            size2(20, 25),
+            size2(38, 41),
+            size2(11, 19),
+            size2(13, 21),
+            size2(37, 40),
+            size2(13, 15),
+            size2(14, 16),
+            size2(10, 9),
+            size2(25, 28),
+        ];
+
+        let bytes_at_start = texture_cache.total_allocated_bytes_for_testing();
+
+        let handles: Vec<TextureCacheHandle> = sizes.iter().map(|size| {
+            let mut texture_cache_handle = TextureCacheHandle::invalid();
+            texture_cache.request(&texture_cache_handle, &mut gpu_cache);
+            texture_cache.update(
+                &mut texture_cache_handle,
+                ImageDescriptor {
+                    size: *size,
+                    stride: None,
+                    format: ImageFormat::BGRA8,
+                    flags: ImageDescriptorFlags::empty(),
+                    offset: 0,
+                },
+                TextureFilter::Linear,
+                None,
+                [0.0; 4],
+                DirtyRect::All,
+                &mut gpu_cache,
+                None,
+                UvRectKind::Rect,
+                Eviction::Manual,
+                TargetShader::Text,
+            );
+            texture_cache_handle
+        }).collect();
+
+        let bytes_after_allocating = texture_cache.total_allocated_bytes_for_testing();
+        assert!(bytes_after_allocating > bytes_at_start);
+
+        for handle in handles {
+            texture_cache.evict_handle(&handle);
+        }
+
+        let bytes_at_end = texture_cache.total_allocated_bytes_for_testing();
+        assert_eq!(bytes_at_end, bytes_at_start);
+    }
+}
diff --git a/gfx/wr/webrender/src/texture_pack/guillotine.rs b/gfx/wr/webrender/src/texture_pack/guillotine.rs
new file mode 100644
index 0000000000..68a08caf2f
--- /dev/null
+++ b/gfx/wr/webrender/src/texture_pack/guillotine.rs
@@ -0,0 +1,284 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::units::{DeviceIntPoint, DeviceIntRect, DeviceIntSize};
+
+//TODO: gather real-world statistics on the bin usage in order to assist the decision
+// on where to place the size thresholds.
+
+const NUM_BINS: usize = 3;
+/// The minimum number of pixels on each side that we require for rects to be classified as
+/// particular bin of freelists.
+const MIN_RECT_AXIS_SIZES: [i32; NUM_BINS] = [1, 16, 32];
+
+#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
+struct FreeListBin(u8);
+
+#[derive(Debug, Clone, Copy)]
+struct FreeListIndex(usize);
+
+impl FreeListBin {
+    fn for_size(size: &DeviceIntSize) -> Self {
+        MIN_RECT_AXIS_SIZES
+            .iter()
+            .enumerate()
+            .rev()
+            .find(|(_, &min_size)| min_size <= size.width && min_size <= size.height)
+            .map(|(id, _)| FreeListBin(id as u8))
+            .unwrap_or_else(|| panic!("Unable to find a bin for {:?}!", size))
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct FreeRectSlice(pub u32);
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct FreeRect {
+    slice: FreeRectSlice,
+    rect: DeviceIntRect,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct FreeRectSize {
+    width: i16,
+    height: i16,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct Bin {
+    // Store sizes with fewer bits per item and in a separate array to speed up
+    // the search.
+    sizes: Vec<FreeRectSize>,
+    rects: Vec<FreeRect>,
+}
+
+/// A texture allocator using the guillotine algorithm.
+///
+/// See sections 2.2 and 2.2.5 in "A Thousand Ways to Pack the Bin - A Practical Approach to Two-
+/// Dimensional Rectangle Bin Packing":
+///
+///    http://clb.demon.fi/files/RectangleBinPack.pdf
+///
+/// This approach was chosen because of its simplicity and good performance.
+///
+/// Note: the allocations are spread across multiple textures, and also are binned
+/// orthogonally in order to speed up the search.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GuillotineAllocator {
+    bins: [Bin; NUM_BINS],
+}
+
+impl GuillotineAllocator {
+    pub fn new(initial_size: Option<DeviceIntSize>) -> Self {
+        let mut allocator = GuillotineAllocator {
+            bins: [
+                Bin { rects: Vec::new(), sizes: Vec::new() },
+                Bin { rects: Vec::new(), sizes: Vec::new() },
+                Bin { rects: Vec::new(), sizes: Vec::new() },
+            ],
+        };
+
+        if let Some(initial_size) = initial_size {
+            allocator.push(
+                FreeRectSlice(0),
+                initial_size.into(),
+            );
+        }
+
+        allocator
+    }
+
+    fn push(&mut self, slice: FreeRectSlice, rect: DeviceIntRect) {
+        let id = FreeListBin::for_size(&rect.size()).0 as usize;
+        self.bins[id].rects.push(FreeRect {
+            slice,
+            rect,
+        });
+        self.bins[id].sizes.push(FreeRectSize {
+            width: rect.width() as i16,
+            height: rect.height() as i16,
+        });
+    }
+
+    /// Find a suitable rect in the free list. We choose the first fit.
+    fn find_index_of_best_rect(
+        &self,
+        requested_dimensions: &DeviceIntSize,
+    ) -> Option<(FreeListBin, FreeListIndex)> {
+
+        let start_bin = FreeListBin::for_size(&requested_dimensions);
+
+        let w = requested_dimensions.width as i16;
+        let h = requested_dimensions.height as i16;
+        (start_bin.0 .. NUM_BINS as u8)
+            .find_map(|id| {
+                self.bins[id as usize].sizes
+                    .iter()
+                    .position(|candidate| w <= candidate.width && h <= candidate.height)
+                    .map(|index| (FreeListBin(id), FreeListIndex(index)))
+            })
+    }
+
+    // Split that results in the single largest area (Min Area Split Rule, MINAS).
+    fn split_guillotine(&mut self, chosen: &FreeRect, requested_dimensions: &DeviceIntSize) {
+        let candidate_free_rect_to_right = DeviceIntRect::from_origin_and_size(
+            DeviceIntPoint::new(
+                chosen.rect.min.x + requested_dimensions.width,
+                chosen.rect.min.y,
+            ),
+            DeviceIntSize::new(
+                chosen.rect.width() - requested_dimensions.width,
+                requested_dimensions.height,
+            ),
+        );
+        let candidate_free_rect_to_bottom = DeviceIntRect::from_origin_and_size(
+            DeviceIntPoint::new(
+                chosen.rect.min.x,
+                chosen.rect.min.y + requested_dimensions.height,
+            ),
+            DeviceIntSize::new(
+                requested_dimensions.width,
+                chosen.rect.height() - requested_dimensions.height,
+            ),
+        );
+
+        // Guillotine the rectangle.
+        let new_free_rect_to_right;
+        let new_free_rect_to_bottom;
+        if candidate_free_rect_to_right.area() > candidate_free_rect_to_bottom.area() {
+            new_free_rect_to_right = DeviceIntRect::from_origin_and_size(
+                candidate_free_rect_to_right.min,
+                DeviceIntSize::new(
+                    candidate_free_rect_to_right.width(),
+                    chosen.rect.height(),
+                ),
+            );
+            new_free_rect_to_bottom = candidate_free_rect_to_bottom
+        } else {
+            new_free_rect_to_right = candidate_free_rect_to_right;
+            new_free_rect_to_bottom = DeviceIntRect::from_origin_and_size(
+                candidate_free_rect_to_bottom.min,
+                DeviceIntSize::new(
+                    chosen.rect.width(),
+                    candidate_free_rect_to_bottom.height(),
+                ),
+            )
+        }
+
+        // Add the guillotined rects back to the free list.
+        if !new_free_rect_to_right.is_empty() {
+            self.push(chosen.slice, new_free_rect_to_right);
+        }
+        if !new_free_rect_to_bottom.is_empty() {
+            self.push(chosen.slice, new_free_rect_to_bottom);
+        }
+    }
+
+    pub fn allocate(
+        &mut self, requested_dimensions: &DeviceIntSize
+    ) -> Option<(FreeRectSlice, DeviceIntPoint)> {
+        let mut requested_dimensions = *requested_dimensions;
+        // Round up the size to a multiple of 8. This reduces the fragmentation
+        // of the atlas.
+        requested_dimensions.width = (requested_dimensions.width + 7) & !7;
+        requested_dimensions.height = (requested_dimensions.height + 7) & !7;
+
+        if requested_dimensions.width == 0 || requested_dimensions.height == 0 {
+            return Some((FreeRectSlice(0), DeviceIntPoint::new(0, 0)));
+        }
+
+        let (bin, index) = self.find_index_of_best_rect(&requested_dimensions)?;
+
+        // Remove the rect from the free list and decide how to guillotine it.
+        let chosen = self.bins[bin.0 as usize].rects.swap_remove(index.0);
+        self.bins[bin.0 as usize].sizes.swap_remove(index.0);
+        self.split_guillotine(&chosen, &requested_dimensions);
+
+        // Return the result.
+        Some((chosen.slice, chosen.rect.min))
+    }
+
+    /// Add a new slice to the allocator, and immediately allocate a rect from it.
+    pub fn extend(
+        &mut self,
+        slice: FreeRectSlice,
+        total_size: DeviceIntSize,
+        requested_dimensions: DeviceIntSize,
+    ) {
+        self.split_guillotine(
+            &FreeRect { slice, rect: total_size.into() },
+            &requested_dimensions
+        );
+    }
+}
+
+#[cfg(test)]
+fn random_fill(count: usize, texture_size: i32) -> f32 {
+    use rand::{thread_rng, Rng};
+
+    let total_rect = DeviceIntRect::from_size(
+        DeviceIntSize::new(texture_size, texture_size),
+    );
+    let mut rng = thread_rng();
+    let mut allocator = GuillotineAllocator::new(None);
+
+    // check for empty allocation
+    assert_eq!(
+        allocator.allocate(&DeviceIntSize::new(0, 12)),
+        Some((FreeRectSlice(0), DeviceIntPoint::zero())),
+    );
+
+    let mut slices: Vec<Vec<DeviceIntRect>> = Vec::new();
+    let mut requested_area = 0f32;
+    // fill up the allocator
+    for _ in 0 .. count {
+        let size = DeviceIntSize::new(
+            rng.gen_range(1, texture_size),
+            rng.gen_range(1, texture_size),
+        );
+        requested_area += size.area() as f32;
+
+        match allocator.allocate(&size) {
+            Some((slice, origin)) => {
+                let rect = DeviceIntRect::from_origin_and_size(origin, size);
+                assert_eq!(None, slices[slice.0 as usize].iter().find(|r| r.intersects(&rect)));
+                assert!(total_rect.contains_box(&rect));
+                slices[slice.0 as usize].push(rect);
+            }
+            None => {
+                allocator.extend(FreeRectSlice(slices.len() as u32), total_rect.size(), size);
+                let rect = DeviceIntRect::from_size(size);
+                slices.push(vec![rect]);
+            }
+        }
+    }
+    // validate the free rects
+    for (i, bin) in allocator.bins.iter().enumerate() {
+        for fr in &bin.rects {
+            assert_eq!(FreeListBin(i as u8), FreeListBin::for_size(&fr.rect.size()));
+            assert_eq!(None, slices[fr.slice.0 as usize].iter().find(|r| r.intersects(&fr.rect)));
+            assert!(total_rect.contains_box(&fr.rect));
+            slices[fr.slice.0 as usize].push(fr.rect);
+        }
+    }
+
+    let allocated_area = slices.len() as f32 * (texture_size * texture_size) as f32;
+    requested_area / allocated_area
+}
+
+#[test]
+fn test_small() {
+    random_fill(100, 100);
+}
+
+#[test]
+fn test_large() {
+    random_fill(1000, 10000);
+}
diff --git a/gfx/wr/webrender/src/texture_pack/mod.rs b/gfx/wr/webrender/src/texture_pack/mod.rs
new file mode 100644
index 0000000000..f89a82b0a1
--- /dev/null
+++ b/gfx/wr/webrender/src/texture_pack/mod.rs
@@ -0,0 +1,441 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+mod guillotine;
+use crate::texture_cache::TextureCacheHandle;
+use crate::internal_types::FastHashMap;
+pub use guillotine::*;
+
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::units::*;
+use crate::internal_types::CacheTextureId;
+use euclid::{point2, size2, default::Box2D};
+use smallvec::SmallVec;
+
+pub use etagere::AllocatorOptions as ShelfAllocatorOptions;
+pub use etagere::BucketedAtlasAllocator as BucketedShelfAllocator;
+pub use etagere::AtlasAllocator as ShelfAllocator;
+
+/// ID of an allocation within a given allocator.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct AllocId(pub u32);
+
+pub trait AtlasAllocator {
+    /// Specific parameters of the allocator.
+    type Parameters;
+    /// Constructor
+    fn new(size: i32, parameters: &Self::Parameters) -> Self;
+    /// Allocate a rectangle.
+    fn allocate(&mut self, size: DeviceIntSize) -> Option<(AllocId, DeviceIntRect)>;
+    /// Deallocate a rectangle and return its size.
+    fn deallocate(&mut self, id: AllocId);
+    /// Return true if there is no live allocations.
+    fn is_empty(&self) -> bool;
+    /// Allocated area in pixels.
+    fn allocated_space(&self) -> i32;
+    /// Write a debug visualization of the atlas fitting in the provided rectangle.
+    ///
+    /// This is inserted in a larger dump so it shouldn't contain the xml start/end tags.
+    fn dump_into_svg(&self, rect: &Box2D<f32>, output: &mut dyn std::io::Write) -> std::io::Result<()>;
+}
+
+pub trait AtlasAllocatorList<TextureParameters> {
+    /// Allocate a rectangle.
+    ///
+    /// If allocation fails, call the provided callback, add a new allocator to the list and try again.
+    fn allocate(
+        &mut self,
+        size: DeviceIntSize,
+        texture_alloc_cb: &mut dyn FnMut(DeviceIntSize, &TextureParameters) -> CacheTextureId,
+    ) -> (CacheTextureId, AllocId, DeviceIntRect);
+
+    fn set_handle(&mut self, texture_id: CacheTextureId, alloc_id: AllocId, handle: &TextureCacheHandle);
+
+    fn remove_handle(&mut self, texture_id: CacheTextureId, alloc_id: AllocId);
+
+    /// Deallocate a rectangle and return its size.
+    fn deallocate(&mut self, texture_id: CacheTextureId, alloc_id: AllocId);
+
+    fn texture_parameters(&self) -> &TextureParameters;
+}
+
+/// A number of 2D textures (single layer), with their own atlas allocator.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct TextureUnit<Allocator> {
+    allocator: Allocator,
+    handles: FastHashMap<AllocId, TextureCacheHandle>,
+    texture_id: CacheTextureId,
+    // The texture might become empty during a frame where we copy items out
+    // of it, in which case we want to postpone deleting the texture to the
+    // next frame.
+    delay_deallocation: bool,
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct AllocatorList<Allocator: AtlasAllocator, TextureParameters> {
+    units: SmallVec<[TextureUnit<Allocator>; 1]>,
+    size: i32,
+    atlas_parameters: Allocator::Parameters,
+    texture_parameters: TextureParameters,
+}
+
+impl<Allocator: AtlasAllocator, TextureParameters> AllocatorList<Allocator, TextureParameters> {
+    pub fn new(
+        size: i32,
+        atlas_parameters: Allocator::Parameters,
+        texture_parameters: TextureParameters,
+    ) -> Self {
+        AllocatorList {
+            units: SmallVec::new(),
+            size,
+            atlas_parameters,
+            texture_parameters,
+        }
+    }
+
+    pub fn allocate(
+        &mut self,
+        requested_size: DeviceIntSize,
+        texture_alloc_cb: &mut dyn FnMut(DeviceIntSize, &TextureParameters) -> CacheTextureId,
+    ) -> (CacheTextureId, AllocId, DeviceIntRect) {
+        // Try to allocate from one of the existing textures.
+        for unit in &mut self.units {
+            if let Some((alloc_id, rect)) = unit.allocator.allocate(requested_size) {
+                return (unit.texture_id, alloc_id, rect);
+            }
+        }
+
+        // Need to create a new texture to hold the allocation.
+        let texture_id = texture_alloc_cb(size2(self.size, self.size), &self.texture_parameters);
+        let unit_index = self.units.len();
+
+        self.units.push(TextureUnit {
+            allocator: Allocator::new(self.size, &self.atlas_parameters),
+            handles: FastHashMap::default(),
+            texture_id,
+            delay_deallocation: false,
+        });
+
+        let (alloc_id, rect) = self.units[unit_index]
+            .allocator
+            .allocate(requested_size)
+            .unwrap();
+
+        (texture_id, alloc_id, rect)
+    }
+
+    pub fn deallocate(&mut self, texture_id: CacheTextureId, alloc_id: AllocId) {
+        let unit = self.units
+            .iter_mut()
+            .find(|unit| unit.texture_id == texture_id)
+            .expect("Unable to find the associated texture array unit");
+
+        unit.handles.remove(&alloc_id);
+        unit.allocator.deallocate(alloc_id);
+    }
+
+    pub fn release_empty_textures<'l>(&mut self, texture_dealloc_cb: &'l mut dyn FnMut(CacheTextureId)) {
+        self.units.retain(|unit| {
+            if unit.allocator.is_empty() && !unit.delay_deallocation {
+                texture_dealloc_cb(unit.texture_id);
+
+                false
+            } else{
+                unit.delay_deallocation = false;
+                true
+            }
+        });
+    }
+
+    pub fn clear(&mut self, texture_dealloc_cb: &mut dyn FnMut(CacheTextureId)) {
+        for unit in self.units.drain(..) {
+            texture_dealloc_cb(unit.texture_id);
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn dump_as_svg(&self, output: &mut dyn std::io::Write) -> std::io::Result<()> {
+        use svg_fmt::*;
+
+        let num_arrays = self.units.len() as f32;
+
+        let text_spacing = 15.0;
+        let unit_spacing = 30.0;
+        let texture_size = self.size as f32 / 2.0;
+
+        let svg_w = unit_spacing * 2.0 + texture_size;
+        let svg_h = unit_spacing + num_arrays * (texture_size + text_spacing + unit_spacing);
+
+        writeln!(output, "{}", BeginSvg { w: svg_w, h: svg_h })?;
+
+        // Background.
+        writeln!(output,
+            "    {}",
+            rectangle(0.0, 0.0, svg_w, svg_h)
+                .inflate(1.0, 1.0)
+                .fill(rgb(50, 50, 50))
+        )?;
+
+        let mut y = unit_spacing;
+        for unit in &self.units {
+            writeln!(output, "    {}", text(unit_spacing, y, format!("{:?}", unit.texture_id)).color(rgb(230, 230, 230)))?;
+
+            let rect = Box2D {
+                min: point2(unit_spacing, y),
+                max: point2(unit_spacing + texture_size, y + texture_size),
+            };
+
+            unit.allocator.dump_into_svg(&rect, output)?;
+
+            y += unit_spacing + texture_size + text_spacing;
+        }
+
+        writeln!(output, "{}", EndSvg)
+    }
+
+    pub fn allocated_space(&self) -> i32 {
+        let mut accum = 0;
+        for unit in &self.units {
+            accum += unit.allocator.allocated_space();
+        }
+
+        accum
+    }
+
+    pub fn allocated_textures(&self) -> usize {
+        self.units.len()
+    }
+
+    pub fn size(&self) -> i32 { self.size }
+}
+
+impl<Allocator: AtlasAllocator, TextureParameters> AtlasAllocatorList<TextureParameters> 
+for AllocatorList<Allocator, TextureParameters> {
+    fn allocate(
+        &mut self,
+        requested_size: DeviceIntSize,
+        texture_alloc_cb: &mut dyn FnMut(DeviceIntSize, &TextureParameters) -> CacheTextureId,
+    ) -> (CacheTextureId, AllocId, DeviceIntRect) {
+        self.allocate(requested_size, texture_alloc_cb)
+    }
+
+    fn set_handle(&mut self, texture_id: CacheTextureId, alloc_id: AllocId, handle: &TextureCacheHandle) {
+        let unit = self.units
+            .iter_mut()
+            .find(|unit| unit.texture_id == texture_id)
+            .expect("Unable to find the associated texture array unit");
+        unit.handles.insert(alloc_id, handle.clone());
+    }
+
+    fn remove_handle(&mut self, texture_id: CacheTextureId, alloc_id: AllocId) {
+        let unit = self.units
+            .iter_mut()
+            .find(|unit| unit.texture_id == texture_id)
+            .expect("Unable to find the associated texture array unit");
+        unit.handles.remove(&alloc_id);
+    }
+
+    fn deallocate(&mut self, texture_id: CacheTextureId, alloc_id: AllocId) {
+        self.deallocate(texture_id, alloc_id);
+    }
+
+    fn texture_parameters(&self) -> &TextureParameters {
+        &self.texture_parameters
+    }
+}
+
+impl AtlasAllocator for BucketedShelfAllocator {
+    type Parameters = ShelfAllocatorOptions;
+
+    fn new(size: i32, options: &Self::Parameters) -> Self {
+        BucketedShelfAllocator::with_options(size2(size, size), options)
+    }
+
+    fn allocate(&mut self, size: DeviceIntSize) -> Option<(AllocId, DeviceIntRect)> {
+        self.allocate(size.to_untyped()).map(|alloc| {
+            (AllocId(alloc.id.serialize()), alloc.rectangle.cast_unit())
+        })
+    }
+
+    fn deallocate(&mut self, id: AllocId) {
+        self.deallocate(etagere::AllocId::deserialize(id.0));
+    }
+
+    fn is_empty(&self) -> bool {
+        self.is_empty()
+    }
+
+    fn allocated_space(&self) -> i32 {
+        self.allocated_space()
+    }
+
+    fn dump_into_svg(&self, rect: &Box2D<f32>, output: &mut dyn std::io::Write) -> std::io::Result<()> {
+        self.dump_into_svg(Some(&rect.to_i32().cast_unit()), output)
+    }
+}
+
+impl AtlasAllocator for ShelfAllocator {
+    type Parameters = ShelfAllocatorOptions;
+
+    fn new(size: i32, options: &Self::Parameters) -> Self {
+        ShelfAllocator::with_options(size2(size, size), options)
+    }
+
+    fn allocate(&mut self, size: DeviceIntSize) -> Option<(AllocId, DeviceIntRect)> {
+        self.allocate(size.to_untyped()).map(|alloc| {
+            (AllocId(alloc.id.serialize()), alloc.rectangle.cast_unit())
+        })
+    }
+
+    fn deallocate(&mut self, id: AllocId) {
+        self.deallocate(etagere::AllocId::deserialize(id.0));
+    }
+
+    fn is_empty(&self) -> bool {
+        self.is_empty()
+    }
+
+    fn allocated_space(&self) -> i32 {
+        self.allocated_space()
+    }
+
+    fn dump_into_svg(&self, rect: &Box2D<f32>, output: &mut dyn std::io::Write) -> std::io::Result<()> {
+        self.dump_into_svg(Some(&rect.to_i32().cast_unit()), output)
+    }
+}
+
+pub struct CompactionChange {
+    pub handle: TextureCacheHandle,
+    pub old_id: AllocId,
+    pub old_tex: CacheTextureId,
+    pub old_rect: DeviceIntRect,
+    pub new_id: AllocId,
+    pub new_tex: CacheTextureId,
+    pub new_rect: DeviceIntRect,
+}
+
+impl<P> AllocatorList<ShelfAllocator, P> {
+    /// Attempt to move some allocations from a texture to another to reduce the number of textures.
+    pub fn try_compaction(
+        &mut self,
+        max_pixels: i32,
+        changes: &mut Vec<CompactionChange>,
+    ) {
+        // The goal here is to consolidate items in the first texture by moving them from the last.
+
+        if self.units.len() < 2 {
+            // Nothing to do we are already "compact".
+            return;
+        }
+
+        let last_unit = self.units.len() - 1;
+        let mut pixels = 0;
+        while let Some(alloc) = self.units[last_unit].allocator.iter().next() {
+            // For each allocation in the last texture, try to allocate it in the first one.
+            let new_alloc = match self.units[0].allocator.allocate(alloc.rectangle.size()) {
+                Some(new_alloc) => new_alloc,
+                None => {
+                    // Stop when we fail to fit an item into the first texture.
+                    // We could potentially fit another smaller item in there but we take it as
+                    // an indication that the texture is more or less full, and we'll eventually
+                    // manage to move the items later if they still exist as other items expire,
+                    // which is what matters.
+                    break;
+                }
+            };
+
+            // The item was successfully reallocated in the first texture, we can proceed
+            // with removing it from the last.
+
+            // We keep track of the texture cache handle for each allocation, make sure
+            // the new allocation has the proper handle.
+            let alloc_id = AllocId(alloc.id.serialize());
+            let new_alloc_id = AllocId(new_alloc.id.serialize());
+            let handle = self.units[last_unit].handles.get(&alloc_id).unwrap().clone();
+            self.units[0].handles.insert(new_alloc_id, handle.clone());
+
+            // Remove the allocation for the last texture.
+            self.units[last_unit].handles.remove(&alloc_id);
+            self.units[last_unit].allocator.deallocate(alloc.id);
+
+            // Prevent the texture from being deleted on the same frame.
+            self.units[last_unit].delay_deallocation = true;
+
+            // Record the change so that the texture cache can do additional bookkeeping.
+            changes.push(CompactionChange {
+                handle,
+                old_id: AllocId(alloc.id.serialize()),
+                old_tex: self.units[last_unit].texture_id,
+                old_rect: alloc.rectangle.cast_unit(),
+                new_id: AllocId(new_alloc.id.serialize()),
+                new_tex: self.units[0].texture_id,
+                new_rect: new_alloc.rectangle.cast_unit(),
+            });
+
+            // We are not in a hurry to move all allocations we can in one go, as long as we
+            // eventually have a chance to move them all within a reasonable amount of time.
+            // It's best to spread the load over multiple frames to avoid sudden spikes, so we
+            // stop after we have passed a certain threshold.
+            pixels += alloc.rectangle.area();
+            if pixels > max_pixels {
+                break;
+            }
+        }
+    }
+
+}
+
+#[test]
+fn bug_1680769() {
+    let mut allocators: AllocatorList<ShelfAllocator, ()> = AllocatorList::new(
+        1024,
+        ShelfAllocatorOptions::default(),
+        (),
+    );
+
+    let mut allocations = Vec::new();
+    let mut next_id = CacheTextureId(0);
+    let alloc_cb = &mut |_: DeviceIntSize, _: &()| {
+        let texture_id = next_id;
+        next_id.0 += 1;
+
+        texture_id
+    };
+
+    // Make some allocations, forcing the the creation of multiple textures.
+    for _ in 0..50 {
+        let alloc = allocators.allocate(size2(256, 256), alloc_cb);
+        allocators.set_handle(alloc.0, alloc.1, &TextureCacheHandle::Empty);
+        allocations.push(alloc);
+    }
+
+    // Deallocate everything.
+    // It should empty all atlases and we still have textures allocated because
+    // we haven't called release_empty_textures yet.
+    for alloc in allocations.drain(..) {
+        allocators.deallocate(alloc.0, alloc.1);
+    }
+
+    // Allocate something else.
+    // Bug 1680769 was causing this allocation to be duplicated and leaked in
+    // all textures.
+    allocations.push(allocators.allocate(size2(8, 8), alloc_cb));
+
+    // Deallocate all known allocations.
+    for alloc in allocations.drain(..) {
+        allocators.deallocate(alloc.0, alloc.1);
+    }
+
+    // If we have leaked items, this won't manage to remove all textures.
+    allocators.release_empty_textures(&mut |_| {});
+
+    assert_eq!(allocators.allocated_textures(), 0);
+}
diff --git a/gfx/wr/webrender/src/tile_cache.rs b/gfx/wr/webrender/src/tile_cache.rs
new file mode 100644
index 0000000000..3b2600d0c0
--- /dev/null
+++ b/gfx/wr/webrender/src/tile_cache.rs
@@ -0,0 +1,745 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ColorF, PrimitiveFlags, QualitySettings, RasterSpace, ClipId};
+use api::units::*;
+use crate::clip::{ClipNodeKind, ClipLeafId, ClipNodeId, ClipTreeBuilder};
+use crate::frame_builder::FrameBuilderConfig;
+use crate::internal_types::{FastHashMap};
+use crate::picture::{PrimitiveList, PictureCompositeMode, PicturePrimitive, SliceId};
+use crate::picture::{Picture3DContext, TileCacheParams, TileOffset, PictureFlags};
+use crate::prim_store::{PrimitiveInstance, PrimitiveStore, PictureIndex};
+use crate::scene_building::SliceFlags;
+use crate::scene_builder_thread::Interners;
+use crate::spatial_tree::{SpatialNodeIndex, SceneSpatialTree};
+use crate::util::VecHelper;
+use std::mem;
+
+/*
+ Types and functionality related to picture caching. In future, we'll
+ move more and more of the existing functionality out of picture.rs
+ and into here.
+ */
+
+// If the page would create too many slices (an arbitrary definition where
+// it's assumed the GPU memory + compositing overhead would be too high)
+// then create a single picture cache for the remaining content. This at
+// least means that we can cache small content changes efficiently when
+// scrolling isn't occurring. Scrolling regions will be handled reasonably
+// efficiently by the dirty rect tracking (since it's likely that if the
+// page has so many slices there isn't a single major scroll region).
+const MAX_CACHE_SLICES: usize = 12;
+
+struct SliceDescriptor {
+    prim_list: PrimitiveList,
+    scroll_root: SpatialNodeIndex,
+    shared_clip_node_id: ClipNodeId,
+}
+
+enum SliceKind {
+    Default {
+        secondary_slices: Vec<SliceDescriptor>,
+    },
+    Atomic {
+        prim_list: PrimitiveList,
+    },
+}
+
+impl SliceKind {
+    fn default() -> Self {
+        SliceKind::Default {
+            secondary_slices: Vec::new(),
+        }
+    }
+}
+
+struct PrimarySlice {
+    /// Whether this slice is atomic or has secondary slice(s)
+    kind: SliceKind,
+    /// Optional background color of this slice
+    background_color: Option<ColorF>,
+    /// Optional root clip for the iframe
+    iframe_clip: Option<ClipId>,
+    /// Information about how to draw and composite this slice
+    slice_flags: SliceFlags,
+}
+
+impl PrimarySlice {
+    fn new(
+        slice_flags: SliceFlags,
+        iframe_clip: Option<ClipId>,
+        background_color: Option<ColorF>,
+    ) -> Self {
+        PrimarySlice {
+            kind: SliceKind::default(),
+            background_color,
+            iframe_clip,
+            slice_flags,
+        }
+    }
+
+    fn has_too_many_slices(&self) -> bool {
+        match self.kind {
+            SliceKind::Atomic { .. } => false,
+            SliceKind::Default { ref secondary_slices } => secondary_slices.len() > MAX_CACHE_SLICES,
+        }
+    }
+
+    fn merge(&mut self) {
+        self.slice_flags |= SliceFlags::IS_ATOMIC;
+
+        let old = mem::replace(
+            &mut self.kind,
+            SliceKind::Default { secondary_slices: Vec::new() },
+        );
+
+        self.kind = match old {
+            SliceKind::Default { mut secondary_slices } => {
+                let mut prim_list = PrimitiveList::empty();
+
+                for descriptor in secondary_slices.drain(..) {
+                    prim_list.merge(descriptor.prim_list);
+                }
+
+                SliceKind::Atomic {
+                    prim_list,
+                }
+            }
+            atomic => atomic,
+        }
+    }
+}
+
+/// Used during scene building to construct the list of pending tile caches.
+pub struct TileCacheBuilder {
+    /// List of tile caches that have been created so far (last in the list is currently active).
+    primary_slices: Vec<PrimarySlice>,
+    /// Cache the previous scroll root search for a spatial node, since they are often the same.
+    prev_scroll_root_cache: (SpatialNodeIndex, SpatialNodeIndex),
+    /// Handle to the root reference frame
+    root_spatial_node_index: SpatialNodeIndex,
+}
+
+/// The output of a tile cache builder, containing all details needed to construct the
+/// tile cache(s) for the next scene, and retain tiles from the previous frame when sent
+/// send to the frame builder.
+pub struct TileCacheConfig {
+    /// Mapping of slice id to the parameters needed to construct this tile cache.
+    pub tile_caches: FastHashMap<SliceId, TileCacheParams>,
+    /// Number of picture cache slices that were created (for profiler)
+    pub picture_cache_slice_count: usize,
+}
+
+impl TileCacheConfig {
+    pub fn new(picture_cache_slice_count: usize) -> Self {
+        TileCacheConfig {
+            tile_caches: FastHashMap::default(),
+            picture_cache_slice_count,
+        }
+    }
+}
+
+impl TileCacheBuilder {
+    /// Construct a new tile cache builder.
+    pub fn new(
+        root_spatial_node_index: SpatialNodeIndex,
+        background_color: Option<ColorF>,
+    ) -> Self {
+        TileCacheBuilder {
+            primary_slices: vec![PrimarySlice::new(SliceFlags::empty(), None, background_color)],
+            prev_scroll_root_cache: (SpatialNodeIndex::INVALID, SpatialNodeIndex::INVALID),
+            root_spatial_node_index,
+        }
+    }
+
+    pub fn make_current_slice_atomic(&mut self) {
+        self.primary_slices
+            .last_mut()
+            .unwrap()
+            .merge();
+    }
+
+    /// Returns true if the current slice has no primitives added yet
+    pub fn is_current_slice_empty(&self) -> bool {
+        match self.primary_slices.last() {
+            Some(slice) => {
+                match slice.kind {
+                    SliceKind::Default { ref secondary_slices } => {
+                        secondary_slices.is_empty()
+                    }
+                    SliceKind::Atomic { ref prim_list } => {
+                        prim_list.is_empty()
+                    }
+                }
+            }
+            None => {
+                true
+            }
+        }
+    }
+
+    /// Set a barrier that forces a new tile cache next time a prim is added.
+    pub fn add_tile_cache_barrier(
+        &mut self,
+        slice_flags: SliceFlags,
+        iframe_clip: Option<ClipId>,
+    ) {
+        let new_slice = PrimarySlice::new(
+            slice_flags,
+            iframe_clip,
+            None,
+        );
+
+        self.primary_slices.push(new_slice);
+    }
+
+    /// Create a new tile cache for an existing prim_list
+    fn build_tile_cache(
+        &mut self,
+        prim_list: PrimitiveList,
+        spatial_tree: &SceneSpatialTree,
+        prim_instances: &[PrimitiveInstance],
+        clip_tree_builder: &ClipTreeBuilder,
+    ) -> Option<SliceDescriptor> {
+        if prim_list.is_empty() {
+            return None;
+        }
+
+        // Iterate the clusters and determine which is the most commonly occurring
+        // scroll root. This is a reasonable heuristic to decide which spatial node
+        // should be considered the scroll root of this tile cache, in order to
+        // minimize the invalidations that occur due to scrolling. It's often the
+        // case that a blend container will have only a single scroll root.
+        let mut scroll_root_occurrences = FastHashMap::default();
+
+        for cluster in &prim_list.clusters {
+            // If we encounter a cluster which has an unknown spatial node,
+            // we don't include that in the set of spatial nodes that we
+            // are trying to find scroll roots for. Later on, in finalize_picture,
+            // the cluster spatial node will be updated to the selected scroll root.
+            if cluster.spatial_node_index == SpatialNodeIndex::UNKNOWN {
+                continue;
+            }
+
+            let scroll_root = find_scroll_root(
+                cluster.spatial_node_index,
+                &mut self.prev_scroll_root_cache,
+                spatial_tree,
+            );
+
+            *scroll_root_occurrences.entry(scroll_root).or_insert(0) += 1;
+        }
+
+        // We can't just select the most commonly occurring scroll root in this
+        // primitive list. If that is a nested scroll root, there may be
+        // primitives in the list that are outside that scroll root, which
+        // can cause panics when calculating relative transforms. To ensure
+        // this doesn't happen, only retain scroll root candidates that are
+        // also ancestors of every other scroll root candidate.
+        let scroll_roots: Vec<SpatialNodeIndex> = scroll_root_occurrences
+            .keys()
+            .cloned()
+            .collect();
+
+        scroll_root_occurrences.retain(|parent_spatial_node_index, _| {
+            scroll_roots.iter().all(|child_spatial_node_index| {
+                parent_spatial_node_index == child_spatial_node_index ||
+                spatial_tree.is_ancestor(
+                    *parent_spatial_node_index,
+                    *child_spatial_node_index,
+                )
+            })
+        });
+
+        // Select the scroll root by finding the most commonly occurring one
+        let scroll_root = scroll_root_occurrences
+            .iter()
+            .max_by_key(|entry | entry.1)
+            .map(|(spatial_node_index, _)| *spatial_node_index)
+            .unwrap_or(self.root_spatial_node_index);
+
+        // Work out which clips are shared by all prim instances and can thus be applied
+        // at the tile cache level. In future, we aim to remove this limitation by knowing
+        // during initial scene build which are the relevant compositor clips, but for now
+        // this is unlikely to be a significant cost.
+        let mut shared_clip_node_id = None;
+
+        for cluster in &prim_list.clusters {
+            for prim_instance in &prim_instances[cluster.prim_range()] {
+                let leaf = clip_tree_builder.get_leaf(prim_instance.clip_leaf_id);
+
+                // TODO(gw): Need to cache last clip-node id here?
+                shared_clip_node_id = match shared_clip_node_id {
+                    Some(current) => {
+                        Some(clip_tree_builder.find_lowest_common_ancestor(current, leaf.node_id))
+                    }
+                    None => {
+                        Some(leaf.node_id)
+                    }
+                }
+            }
+        }
+
+        let shared_clip_node_id = shared_clip_node_id.expect("bug: no shared clip root");
+
+        Some(SliceDescriptor {
+            scroll_root,
+            shared_clip_node_id,
+            prim_list,
+        })
+    }
+
+    /// Add a primitive, either to the current tile cache, or a new one, depending on various conditions.
+    pub fn add_prim(
+        &mut self,
+        prim_instance: PrimitiveInstance,
+        prim_rect: LayoutRect,
+        spatial_node_index: SpatialNodeIndex,
+        prim_flags: PrimitiveFlags,
+        spatial_tree: &SceneSpatialTree,
+        interners: &Interners,
+        quality_settings: &QualitySettings,
+        prim_instances: &mut Vec<PrimitiveInstance>,
+        clip_tree_builder: &ClipTreeBuilder,
+    ) {
+        let primary_slice = self.primary_slices.last_mut().unwrap();
+
+        match primary_slice.kind {
+            SliceKind::Atomic { ref mut prim_list } => {
+                prim_list.add_prim(
+                    prim_instance,
+                    prim_rect,
+                    spatial_node_index,
+                    prim_flags,
+                    prim_instances,
+                    clip_tree_builder,
+                );
+            }
+            SliceKind::Default { ref mut secondary_slices } => {
+                assert_ne!(spatial_node_index, SpatialNodeIndex::UNKNOWN);
+
+                // Check if we want to create a new slice based on the current / next scroll root
+                let scroll_root = find_scroll_root(
+                    spatial_node_index,
+                    &mut self.prev_scroll_root_cache,
+                    spatial_tree,
+                );
+
+                let current_scroll_root = secondary_slices
+                    .last()
+                    .map(|p| p.scroll_root);
+
+                let mut want_new_tile_cache = secondary_slices.is_empty();
+
+                if let Some(current_scroll_root) = current_scroll_root {
+                    want_new_tile_cache |= match (current_scroll_root, scroll_root) {
+                        (_, _) if current_scroll_root == self.root_spatial_node_index && scroll_root == self.root_spatial_node_index => {
+                            // Both current slice and this cluster are fixed position, no need to cut
+                            false
+                        }
+                        (_, _) if current_scroll_root == self.root_spatial_node_index => {
+                            // A real scroll root is being established, so create a cache slice
+                            true
+                        }
+                        (_, _) if scroll_root == self.root_spatial_node_index => {
+                            // If quality settings force subpixel AA over performance, skip creating
+                            // a slice for the fixed position element(s) here.
+                            if quality_settings.force_subpixel_aa_where_possible {
+                                false
+                            } else {
+                                // A fixed position slice is encountered within a scroll root. Only create
+                                // a slice in this case if all the clips referenced by this cluster are also
+                                // fixed position. There's no real point in creating slices for these cases,
+                                // since we'll have to rasterize them as the scrolling clip moves anyway. It
+                                // also allows us to retain subpixel AA in these cases. For these types of
+                                // slices, the intra-slice dirty rect handling typically works quite well
+                                // (a common case is parallax scrolling effects).
+                                let mut create_slice = true;
+
+                                let leaf = clip_tree_builder.get_leaf(prim_instance.clip_leaf_id);
+                                let mut current_node_id = leaf.node_id;
+
+                                while current_node_id != ClipNodeId::NONE {
+                                    let node = clip_tree_builder.get_node(current_node_id);
+
+                                    let clip_node_data = &interners.clip[node.handle];
+
+                                    let spatial_root = find_scroll_root(
+                                        clip_node_data.key.spatial_node_index,
+                                        &mut self.prev_scroll_root_cache,
+                                        spatial_tree,
+                                    );
+
+                                    if spatial_root != self.root_spatial_node_index {
+                                        create_slice = false;
+                                        break;
+                                    }
+
+                                    current_node_id = node.parent;
+                                }
+
+                                create_slice
+                            }
+                        }
+                        (curr_scroll_root, scroll_root) => {
+                            // Two scrolling roots - only need a new slice if they differ
+                            curr_scroll_root != scroll_root
+                        }
+                    };
+
+                    // Update the list of clips that apply to this primitive instance, to track which are the
+                    // shared clips for this tile cache that can be applied during compositing.
+
+                    let shared_clip_node_id = find_shared_clip_root(
+                        current_scroll_root,
+                        prim_instance.clip_leaf_id,
+                        spatial_tree,
+                        clip_tree_builder,
+                        interners,
+                    );
+
+                    let current_shared_clip_node_id = secondary_slices.last().unwrap().shared_clip_node_id;
+
+                    // If the shared clips are not compatible, create a new slice.
+                    want_new_tile_cache |= shared_clip_node_id != current_shared_clip_node_id;
+                }
+
+                if want_new_tile_cache {
+
+                    let shared_clip_node_id = find_shared_clip_root(
+                        scroll_root,
+                        prim_instance.clip_leaf_id,
+                        spatial_tree,
+                        clip_tree_builder,
+                        interners,
+                    );
+
+                    secondary_slices.push(SliceDescriptor {
+                        prim_list: PrimitiveList::empty(),
+                        scroll_root,
+                        shared_clip_node_id,
+                    });
+                }
+
+                secondary_slices
+                    .last_mut()
+                    .unwrap()
+                    .prim_list
+                    .add_prim(
+                        prim_instance,
+                        prim_rect,
+                        spatial_node_index,
+                        prim_flags,
+                        prim_instances,
+                        clip_tree_builder,
+                    );
+            }
+        }
+    }
+
+    /// Consume this object and build the list of tile cache primitives
+    pub fn build(
+        mut self,
+        config: &FrameBuilderConfig,
+        prim_store: &mut PrimitiveStore,
+        spatial_tree: &SceneSpatialTree,
+        prim_instances: &[PrimitiveInstance],
+        clip_tree_builder: &mut ClipTreeBuilder,
+    ) -> (TileCacheConfig, Vec<PictureIndex>) {
+        let mut result = TileCacheConfig::new(self.primary_slices.len());
+        let mut tile_cache_pictures = Vec::new();
+        let primary_slices = std::mem::replace(&mut self.primary_slices, Vec::new());
+
+        for mut primary_slice in primary_slices {
+
+            if primary_slice.has_too_many_slices() {
+                primary_slice.merge();
+            }
+
+            match primary_slice.kind {
+                SliceKind::Atomic { prim_list } => {
+                    if let Some(descriptor) = self.build_tile_cache(
+                        prim_list,
+                        spatial_tree,
+                        prim_instances,
+                        clip_tree_builder,
+                    ) {
+                        create_tile_cache(
+                            primary_slice.slice_flags,
+                            descriptor.scroll_root,
+                            primary_slice.iframe_clip,
+                            descriptor.prim_list,
+                            primary_slice.background_color,
+                            descriptor.shared_clip_node_id,
+                            prim_store,
+                            config,
+                            &mut result.tile_caches,
+                            &mut tile_cache_pictures,
+                            clip_tree_builder,
+                        );
+                    }
+                }
+                SliceKind::Default { secondary_slices } => {
+                    for descriptor in secondary_slices {
+                        create_tile_cache(
+                            primary_slice.slice_flags,
+                            descriptor.scroll_root,
+                            primary_slice.iframe_clip,
+                            descriptor.prim_list,
+                            primary_slice.background_color,
+                            descriptor.shared_clip_node_id,
+                            prim_store,
+                            config,
+                            &mut result.tile_caches,
+                            &mut tile_cache_pictures,
+                            clip_tree_builder,
+                        );
+                    }
+                }
+            }
+        }
+
+        (result, tile_cache_pictures)
+    }
+}
+
+/// Find the scroll root for a given spatial node
+fn find_scroll_root(
+    spatial_node_index: SpatialNodeIndex,
+    prev_scroll_root_cache: &mut (SpatialNodeIndex, SpatialNodeIndex),
+    spatial_tree: &SceneSpatialTree,
+) -> SpatialNodeIndex {
+    if prev_scroll_root_cache.0 == spatial_node_index {
+        return prev_scroll_root_cache.1;
+    }
+
+    let scroll_root = spatial_tree.find_scroll_root(spatial_node_index);
+    *prev_scroll_root_cache = (spatial_node_index, scroll_root);
+
+    scroll_root
+}
+
+fn find_shared_clip_root(
+    scroll_root: SpatialNodeIndex,
+    clip_leaf_id: ClipLeafId,
+    spatial_tree: &SceneSpatialTree,
+    clip_tree_builder: &ClipTreeBuilder,
+    interners: &Interners,
+) -> ClipNodeId {
+    let leaf = clip_tree_builder.get_leaf(clip_leaf_id);
+    let mut current_node_id = leaf.node_id;
+
+    while current_node_id != ClipNodeId::NONE {
+        let node = clip_tree_builder.get_node(current_node_id);
+
+        let clip_node_data = &interners.clip[node.handle];
+
+        if let ClipNodeKind::Rectangle = clip_node_data.key.kind.node_kind() {
+            let is_ancestor = spatial_tree.is_ancestor(
+                clip_node_data.key.spatial_node_index,
+                scroll_root,
+            );
+
+            let has_complex_clips = clip_tree_builder.clip_node_has_complex_clips(
+                current_node_id,
+                interners,
+            );
+
+            if is_ancestor && !has_complex_clips {
+                break;
+            }
+        }
+
+        current_node_id = node.parent;
+    }
+
+    current_node_id
+}
+
+/// Given a PrimitiveList and scroll root, construct a tile cache primitive instance
+/// that wraps the primitive list.
+fn create_tile_cache(
+    slice_flags: SliceFlags,
+    scroll_root: SpatialNodeIndex,
+    iframe_clip: Option<ClipId>,
+    prim_list: PrimitiveList,
+    background_color: Option<ColorF>,
+    shared_clip_node_id: ClipNodeId,
+    prim_store: &mut PrimitiveStore,
+    frame_builder_config: &FrameBuilderConfig,
+    tile_caches: &mut FastHashMap<SliceId, TileCacheParams>,
+    tile_cache_pictures: &mut Vec<PictureIndex>,
+    clip_tree_builder: &mut ClipTreeBuilder,
+) {
+    // Accumulate any clip instances from the iframe_clip into the shared clips
+    // that will be applied by this tile cache during compositing.
+    let mut additional_clips = Vec::new();
+
+    if let Some(clip_id) = iframe_clip {
+        additional_clips.push(clip_id);
+    }
+
+    let shared_clip_leaf_id = Some(clip_tree_builder.build_for_tile_cache(
+        shared_clip_node_id,
+        &additional_clips,
+    ));
+
+    // Build a clip-chain for the tile cache, that contains any of the shared clips
+    // we will apply when drawing the tiles. In all cases provided by Gecko, these
+    // are rectangle clips with a scale/offset transform only, and get handled as
+    // a simple local clip rect in the vertex shader. However, this should in theory
+    // also work with any complex clips, such as rounded rects and image masks, by
+    // producing a clip mask that is applied to the picture cache tiles.
+
+    let slice = tile_cache_pictures.len();
+
+    let background_color = if slice == 0 {
+        background_color
+    } else {
+        None
+    };
+
+    let slice_id = SliceId::new(slice);
+
+    // Store some information about the picture cache slice. This is used when we swap the
+    // new scene into the frame builder to either reuse existing slices, or create new ones.
+    tile_caches.insert(slice_id, TileCacheParams {
+        slice,
+        slice_flags,
+        spatial_node_index: scroll_root,
+        background_color,
+        shared_clip_node_id,
+        shared_clip_leaf_id,
+        virtual_surface_size: frame_builder_config.compositor_kind.get_virtual_surface_size(),
+        compositor_surface_count: prim_list.compositor_surface_count,
+    });
+
+    let pic_index = prim_store.pictures.alloc().init(PicturePrimitive::new_image(
+        Some(PictureCompositeMode::TileCache { slice_id }),
+        Picture3DContext::Out,
+        PrimitiveFlags::IS_BACKFACE_VISIBLE,
+        prim_list,
+        scroll_root,
+        RasterSpace::Screen,
+        PictureFlags::empty(),
+    ));
+
+    tile_cache_pictures.push(PictureIndex(pic_index));
+}
+
+/// Debug information about a set of picture cache slices, exposed via RenderResults
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PictureCacheDebugInfo {
+    pub slices: FastHashMap<usize, SliceDebugInfo>,
+}
+
+impl PictureCacheDebugInfo {
+    pub fn new() -> Self {
+        PictureCacheDebugInfo {
+            slices: FastHashMap::default(),
+        }
+    }
+
+    /// Convenience method to retrieve a given slice. Deliberately panics
+    /// if the slice isn't present.
+    pub fn slice(&self, slice: usize) -> &SliceDebugInfo {
+        &self.slices[&slice]
+    }
+}
+
+impl Default for PictureCacheDebugInfo {
+    fn default() -> PictureCacheDebugInfo {
+        PictureCacheDebugInfo::new()
+    }
+}
+
+/// Debug information about a set of picture cache tiles, exposed via RenderResults
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct SliceDebugInfo {
+    pub tiles: FastHashMap<TileOffset, TileDebugInfo>,
+}
+
+impl SliceDebugInfo {
+    pub fn new() -> Self {
+        SliceDebugInfo {
+            tiles: FastHashMap::default(),
+        }
+    }
+
+    /// Convenience method to retrieve a given tile. Deliberately panics
+    /// if the tile isn't present.
+    pub fn tile(&self, x: i32, y: i32) -> &TileDebugInfo {
+        &self.tiles[&TileOffset::new(x, y)]
+    }
+}
+
+/// Debug information about a tile that was dirty and was rasterized
+#[derive(Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct DirtyTileDebugInfo {
+    pub local_valid_rect: PictureRect,
+    pub local_dirty_rect: PictureRect,
+}
+
+/// Debug information about the state of a tile
+#[derive(Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum TileDebugInfo {
+    /// Tile was occluded by a tile in front of it
+    Occluded,
+    /// Tile was culled (not visible in current display port)
+    Culled,
+    /// Tile was valid (no rasterization was done) and visible
+    Valid,
+    /// Tile was dirty, and was updated
+    Dirty(DirtyTileDebugInfo),
+}
+
+impl TileDebugInfo {
+    pub fn is_occluded(&self) -> bool {
+        match self {
+            TileDebugInfo::Occluded => true,
+            TileDebugInfo::Culled |
+            TileDebugInfo::Valid |
+            TileDebugInfo::Dirty(..) => false,
+        }
+    }
+
+    pub fn is_valid(&self) -> bool {
+        match self {
+            TileDebugInfo::Valid => true,
+            TileDebugInfo::Culled |
+            TileDebugInfo::Occluded |
+            TileDebugInfo::Dirty(..) => false,
+        }
+    }
+
+    pub fn is_culled(&self) -> bool {
+        match self {
+            TileDebugInfo::Culled => true,
+            TileDebugInfo::Valid |
+            TileDebugInfo::Occluded |
+            TileDebugInfo::Dirty(..) => false,
+        }
+    }
+
+    pub fn as_dirty(&self) -> &DirtyTileDebugInfo {
+        match self {
+            TileDebugInfo::Occluded |
+            TileDebugInfo::Culled |
+            TileDebugInfo::Valid => {
+                panic!("not a dirty tile!");
+            }
+            TileDebugInfo::Dirty(ref info) => {
+                info
+            }
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/util.rs b/gfx/wr/webrender/src/util.rs
new file mode 100644
index 0000000000..b9ad78ae9f
--- /dev/null
+++ b/gfx/wr/webrender/src/util.rs
@@ -0,0 +1,1630 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::BorderRadius;
+use api::units::*;
+use euclid::{Point2D, Rect, Box2D, Size2D, Vector2D, point2, point3};
+use euclid::{default, Transform2D, Transform3D, Scale};
+use malloc_size_of::{MallocShallowSizeOf, MallocSizeOf, MallocSizeOfOps};
+use plane_split::{Clipper, Polygon};
+use std::{i32, f32, fmt, ptr};
+use std::borrow::Cow;
+use std::num::NonZeroUsize;
+use std::os::raw::c_void;
+use std::sync::Arc;
+use std::mem::replace;
+
+
+// Matches the definition of SK_ScalarNearlyZero in Skia.
+const NEARLY_ZERO: f32 = 1.0 / 4096.0;
+
+/// A typesafe helper that separates new value construction from
+/// vector growing, allowing LLVM to ideally construct the element in place.
+pub struct Allocation<'a, T: 'a> {
+    vec: &'a mut Vec<T>,
+    index: usize,
+}
+
+impl<'a, T> Allocation<'a, T> {
+    // writing is safe because alloc() ensured enough capacity
+    // and `Allocation` holds a mutable borrow to prevent anyone else
+    // from breaking this invariant.
+    #[inline(always)]
+    pub fn init(self, value: T) -> usize {
+        unsafe {
+            ptr::write(self.vec.as_mut_ptr().add(self.index), value);
+            self.vec.set_len(self.index + 1);
+        }
+        self.index
+    }
+}
+
+/// An entry into a vector, similar to `std::collections::hash_map::Entry`.
+pub enum VecEntry<'a, T: 'a> {
+    Vacant(Allocation<'a, T>),
+    Occupied(&'a mut T),
+}
+
+impl<'a, T> VecEntry<'a, T> {
+    #[inline(always)]
+    pub fn set(self, value: T) {
+        match self {
+            VecEntry::Vacant(alloc) => { alloc.init(value); }
+            VecEntry::Occupied(slot) => { *slot = value; }
+        }
+    }
+}
+
+pub trait VecHelper<T> {
+    /// Growns the vector by a single entry, returning the allocation.
+    fn alloc(&mut self) -> Allocation<T>;
+    /// Either returns an existing elemenet, or grows the vector by one.
+    /// Doesn't expect indices to be higher than the current length.
+    fn entry(&mut self, index: usize) -> VecEntry<T>;
+
+    /// Equivalent to `mem::replace(&mut vec, Vec::new())`
+    fn take(&mut self) -> Self;
+
+    /// Call clear and return self (useful for chaining with calls that move the vector).
+    fn cleared(self) -> Self;
+
+    /// Functionally equivalent to `mem::replace(&mut vec, Vec::new())` but tries
+    /// to keep the allocation in the caller if it is empty or replace it with a
+    /// pre-allocated vector.
+    fn take_and_preallocate(&mut self) -> Self;
+}
+
+impl<T> VecHelper<T> for Vec<T> {
+    fn alloc(&mut self) -> Allocation<T> {
+        let index = self.len();
+        if self.capacity() == index {
+            self.reserve(1);
+        }
+        Allocation {
+            vec: self,
+            index,
+        }
+    }
+
+    fn entry(&mut self, index: usize) -> VecEntry<T> {
+        if index < self.len() {
+            VecEntry::Occupied(unsafe {
+                self.get_unchecked_mut(index)
+            })
+        } else {
+            assert_eq!(index, self.len());
+            VecEntry::Vacant(self.alloc())
+        }
+    }
+
+    fn take(&mut self) -> Self {
+        replace(self, Vec::new())
+    }
+
+    fn cleared(mut self) -> Self {
+        self.clear();
+
+        self
+    }
+
+    fn take_and_preallocate(&mut self) -> Self {
+        let len = self.len();
+        if len == 0 {
+            self.clear();
+            return Vec::new();
+        }
+        replace(self, Vec::with_capacity(len + 8))
+    }
+}
+
+
+// Represents an optimized transform where there is only
+// a scale and translation (which are guaranteed to maintain
+// an axis align rectangle under transformation). The
+// scaling is applied first, followed by the translation.
+// TODO(gw): We should try and incorporate F <-> T units here,
+//           but it's a bit tricky to do that now with the
+//           way the current spatial tree works.
+#[derive(Debug, Clone, Copy, MallocSizeOf, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ScaleOffset {
+    pub scale: default::Vector2D<f32>,
+    pub offset: default::Vector2D<f32>,
+}
+
+impl ScaleOffset {
+    pub fn new(sx: f32, sy: f32, tx: f32, ty: f32) -> Self {
+        ScaleOffset {
+            scale: Vector2D::new(sx, sy),
+            offset: Vector2D::new(tx, ty),
+        }
+    }
+
+    pub fn identity() -> Self {
+        ScaleOffset {
+            scale: Vector2D::new(1.0, 1.0),
+            offset: Vector2D::zero(),
+        }
+    }
+
+    // Construct a ScaleOffset from a transform. Returns
+    // None if the matrix is not a pure scale / translation.
+    pub fn from_transform<F, T>(
+        m: &Transform3D<f32, F, T>,
+    ) -> Option<ScaleOffset> {
+
+        // To check that we have a pure scale / translation:
+        // Every field must match an identity matrix, except:
+        //  - Any value present in tx,ty
+        //  - Any value present in sx,sy
+
+        if m.m12.abs() > NEARLY_ZERO ||
+           m.m13.abs() > NEARLY_ZERO ||
+           m.m14.abs() > NEARLY_ZERO ||
+           m.m21.abs() > NEARLY_ZERO ||
+           m.m23.abs() > NEARLY_ZERO ||
+           m.m24.abs() > NEARLY_ZERO ||
+           m.m31.abs() > NEARLY_ZERO ||
+           m.m32.abs() > NEARLY_ZERO ||
+           (m.m33 - 1.0).abs() > NEARLY_ZERO ||
+           m.m34.abs() > NEARLY_ZERO ||
+           m.m43.abs() > NEARLY_ZERO ||
+           (m.m44 - 1.0).abs() > NEARLY_ZERO {
+            return None;
+        }
+
+        Some(ScaleOffset {
+            scale: Vector2D::new(m.m11, m.m22),
+            offset: Vector2D::new(m.m41, m.m42),
+        })
+    }
+
+    pub fn from_offset(offset: default::Vector2D<f32>) -> Self {
+        ScaleOffset {
+            scale: Vector2D::new(1.0, 1.0),
+            offset,
+        }
+    }
+
+    pub fn from_scale(scale: default::Vector2D<f32>) -> Self {
+        ScaleOffset {
+            scale,
+            offset: Vector2D::new(0.0, 0.0),
+        }
+    }
+
+    pub fn inverse(&self) -> Self {
+        ScaleOffset {
+            scale: Vector2D::new(
+                1.0 / self.scale.x,
+                1.0 / self.scale.y,
+            ),
+            offset: Vector2D::new(
+                -self.offset.x / self.scale.x,
+                -self.offset.y / self.scale.y,
+            ),
+        }
+    }
+
+    pub fn offset(&self, offset: default::Vector2D<f32>) -> Self {
+        self.accumulate(
+            &ScaleOffset {
+                scale: Vector2D::new(1.0, 1.0),
+                offset,
+            }
+        )
+    }
+
+    pub fn scale(&self, scale: f32) -> Self {
+        self.accumulate(
+            &ScaleOffset {
+                scale: Vector2D::new(scale, scale),
+                offset: Vector2D::zero(),
+            }
+        )
+    }
+
+    /// Produce a ScaleOffset that includes both self and other.
+    /// The 'self' ScaleOffset is applied after other.
+    /// This is equivalent to `Transform3D::pre_transform`.
+    pub fn accumulate(&self, other: &ScaleOffset) -> Self {
+        ScaleOffset {
+            scale: Vector2D::new(
+                self.scale.x * other.scale.x,
+                self.scale.y * other.scale.y,
+            ),
+            offset: Vector2D::new(
+                self.offset.x + self.scale.x * other.offset.x,
+                self.offset.y + self.scale.y * other.offset.y,
+            ),
+        }
+    }
+
+    pub fn map_rect<F, T>(&self, rect: &Box2D<f32, F>) -> Box2D<f32, T> {
+        // TODO(gw): The logic below can return an unexpected result if the supplied
+        //           rect is invalid (has size < 0). Since Gecko currently supplied
+        //           invalid rects in some cases, adding a max(0) here ensures that
+        //           mapping an invalid rect retains the property that rect.is_empty()
+        //           will return true (the mapped rect output will have size 0 instead
+        //           of a negative size). In future we could catch / assert / fix
+        //           these invalid rects earlier, and assert here instead.
+
+        let w = rect.width().max(0.0);
+        let h = rect.height().max(0.0);
+
+        let mut x0 = rect.min.x * self.scale.x + self.offset.x;
+        let mut y0 = rect.min.y * self.scale.y + self.offset.y;
+
+        let mut sx = w * self.scale.x;
+        let mut sy = h * self.scale.y;
+        // Handle negative scale. Previously, branchless float math was used to find the
+        // min / max vertices and size. However, that sequence of operations was producind
+        // additional floating point accuracy on android emulator builds, causing one test
+        // to fail an assert. Instead, we retain the same math as previously, and adjust
+        // the origin / size if required.
+
+        if self.scale.x < 0.0 {
+            x0 += sx;
+            sx = -sx;
+        }
+        if self.scale.y < 0.0 {
+            y0 += sy;
+            sy = -sy;
+        }
+
+        Box2D::from_origin_and_size(
+            Point2D::new(x0, y0),
+            Size2D::new(sx, sy),
+        )
+    }
+
+    pub fn unmap_rect<F, T>(&self, rect: &Box2D<f32, F>) -> Box2D<f32, T> {
+        // TODO(gw): The logic below can return an unexpected result if the supplied
+        //           rect is invalid (has size < 0). Since Gecko currently supplied
+        //           invalid rects in some cases, adding a max(0) here ensures that
+        //           mapping an invalid rect retains the property that rect.is_empty()
+        //           will return true (the mapped rect output will have size 0 instead
+        //           of a negative size). In future we could catch / assert / fix
+        //           these invalid rects earlier, and assert here instead.
+
+        let w = rect.width().max(0.0);
+        let h = rect.height().max(0.0);
+
+        let mut x0 = (rect.min.x - self.offset.x) / self.scale.x;
+        let mut y0 = (rect.min.y - self.offset.y) / self.scale.y;
+
+        let mut sx = w / self.scale.x;
+        let mut sy = h / self.scale.y;
+
+        // Handle negative scale. Previously, branchless float math was used to find the
+        // min / max vertices and size. However, that sequence of operations was producind
+        // additional floating point accuracy on android emulator builds, causing one test
+        // to fail an assert. Instead, we retain the same math as previously, and adjust
+        // the origin / size if required.
+
+        if self.scale.x < 0.0 {
+            x0 += sx;
+            sx = -sx;
+        }
+        if self.scale.y < 0.0 {
+            y0 += sy;
+            sy = -sy;
+        }
+
+        Box2D::from_origin_and_size(
+            Point2D::new(x0, y0),
+            Size2D::new(sx, sy),
+        )
+    }
+
+    pub fn map_vector<F, T>(&self, vector: &Vector2D<f32, F>) -> Vector2D<f32, T> {
+        Vector2D::new(
+            vector.x * self.scale.x,
+            vector.y * self.scale.y,
+        )
+    }
+
+    pub fn unmap_vector<F, T>(&self, vector: &Vector2D<f32, F>) -> Vector2D<f32, T> {
+        Vector2D::new(
+            vector.x / self.scale.x,
+            vector.y / self.scale.y,
+        )
+    }
+
+    pub fn map_point<F, T>(&self, point: &Point2D<f32, F>) -> Point2D<f32, T> {
+        Point2D::new(
+            point.x * self.scale.x + self.offset.x,
+            point.y * self.scale.y + self.offset.y,
+        )
+    }
+
+    pub fn unmap_point<F, T>(&self, point: &Point2D<f32, F>) -> Point2D<f32, T> {
+        Point2D::new(
+            (point.x - self.offset.x) / self.scale.x,
+            (point.y - self.offset.y) / self.scale.y,
+        )
+    }
+
+    pub fn to_transform<F, T>(&self) -> Transform3D<f32, F, T> {
+        Transform3D::new(
+            self.scale.x,
+            0.0,
+            0.0,
+            0.0,
+
+            0.0,
+            self.scale.y,
+            0.0,
+            0.0,
+
+            0.0,
+            0.0,
+            1.0,
+            0.0,
+
+            self.offset.x,
+            self.offset.y,
+            0.0,
+            1.0,
+        )
+    }
+}
+
+// TODO: Implement these in euclid!
+pub trait MatrixHelpers<Src, Dst> {
+    /// A port of the preserves2dAxisAlignment function in Skia.
+    /// Defined in the SkMatrix44 class.
+    fn preserves_2d_axis_alignment(&self) -> bool;
+    fn has_perspective_component(&self) -> bool;
+    fn has_2d_inverse(&self) -> bool;
+    /// Check if the matrix post-scaling on either the X or Y axes could cause geometry
+    /// transformed by this matrix to have scaling exceeding the supplied limit.
+    fn exceeds_2d_scale(&self, limit: f64) -> bool;
+    fn inverse_project(&self, target: &Point2D<f32, Dst>) -> Option<Point2D<f32, Src>>;
+    fn inverse_rect_footprint(&self, rect: &Box2D<f32, Dst>) -> Option<Box2D<f32, Src>>;
+    fn transform_kind(&self) -> TransformedRectKind;
+    fn is_simple_translation(&self) -> bool;
+    fn is_simple_2d_translation(&self) -> bool;
+    fn is_2d_scale_translation(&self) -> bool;
+    /// Return the determinant of the 2D part of the matrix.
+    fn determinant_2d(&self) -> f32;
+    /// This function returns a point in the `Src` space that projects into zero XY.
+    /// It ignores the Z coordinate and is usable for "flattened" transformations,
+    /// since they are not generally inversible.
+    fn inverse_project_2d_origin(&self) -> Option<Point2D<f32, Src>>;
+    /// Turn Z transformation into identity. This is useful when crossing "flat"
+    /// transform styled stacking contexts upon traversing the coordinate systems.
+    fn flatten_z_output(&mut self);
+
+    fn cast_unit<NewSrc, NewDst>(&self) -> Transform3D<f32, NewSrc, NewDst>;
+}
+
+impl<Src, Dst> MatrixHelpers<Src, Dst> for Transform3D<f32, Src, Dst> {
+    fn preserves_2d_axis_alignment(&self) -> bool {
+        if self.m14 != 0.0 || self.m24 != 0.0 {
+            return false;
+        }
+
+        let mut col0 = 0;
+        let mut col1 = 0;
+        let mut row0 = 0;
+        let mut row1 = 0;
+
+        if self.m11.abs() > NEARLY_ZERO {
+            col0 += 1;
+            row0 += 1;
+        }
+        if self.m12.abs() > NEARLY_ZERO {
+            col1 += 1;
+            row0 += 1;
+        }
+        if self.m21.abs() > NEARLY_ZERO {
+            col0 += 1;
+            row1 += 1;
+        }
+        if self.m22.abs() > NEARLY_ZERO {
+            col1 += 1;
+            row1 += 1;
+        }
+
+        col0 < 2 && col1 < 2 && row0 < 2 && row1 < 2
+    }
+
+    fn has_perspective_component(&self) -> bool {
+         self.m14.abs() > NEARLY_ZERO ||
+         self.m24.abs() > NEARLY_ZERO ||
+         self.m34.abs() > NEARLY_ZERO ||
+         (self.m44 - 1.0).abs() > NEARLY_ZERO
+    }
+
+    fn has_2d_inverse(&self) -> bool {
+        self.determinant_2d() != 0.0
+    }
+
+    fn exceeds_2d_scale(&self, limit: f64) -> bool {
+        let limit2 = (limit * limit) as f32;
+        self.m11 * self.m11 + self.m12 * self.m12 > limit2 ||
+        self.m21 * self.m21 + self.m22 * self.m22 > limit2
+    }
+
+    /// Find out a point in `Src` that would be projected into the `target`.
+    fn inverse_project(&self, target: &Point2D<f32, Dst>) -> Option<Point2D<f32, Src>> {
+        // form the linear equation for the hyperplane intersection
+        let m = Transform2D::<f32, Src, Dst>::new(
+            self.m11 - target.x * self.m14, self.m12 - target.y * self.m14,
+            self.m21 - target.x * self.m24, self.m22 - target.y * self.m24,
+            self.m41 - target.x * self.m44, self.m42 - target.y * self.m44,
+        );
+        let inv = m.inverse()?;
+        // we found the point, now check if it maps to the positive hemisphere
+        if inv.m31 * self.m14 + inv.m32 * self.m24 + self.m44 > 0.0 {
+            Some(Point2D::new(inv.m31, inv.m32))
+        } else {
+            None
+        }
+    }
+
+    fn inverse_rect_footprint(&self, rect: &Box2D<f32, Dst>) -> Option<Box2D<f32, Src>> {
+        Some(Box2D::from_points(&[
+            self.inverse_project(&rect.top_left())?,
+            self.inverse_project(&rect.top_right())?,
+            self.inverse_project(&rect.bottom_left())?,
+            self.inverse_project(&rect.bottom_right())?,
+        ]))
+    }
+
+    fn transform_kind(&self) -> TransformedRectKind {
+        if self.preserves_2d_axis_alignment() {
+            TransformedRectKind::AxisAligned
+        } else {
+            TransformedRectKind::Complex
+        }
+    }
+
+    fn is_simple_translation(&self) -> bool {
+        if (self.m11 - 1.0).abs() > NEARLY_ZERO ||
+            (self.m22 - 1.0).abs() > NEARLY_ZERO ||
+            (self.m33 - 1.0).abs() > NEARLY_ZERO ||
+            (self.m44 - 1.0).abs() > NEARLY_ZERO {
+            return false;
+        }
+
+        self.m12.abs() < NEARLY_ZERO && self.m13.abs() < NEARLY_ZERO &&
+            self.m14.abs() < NEARLY_ZERO && self.m21.abs() < NEARLY_ZERO &&
+            self.m23.abs() < NEARLY_ZERO && self.m24.abs() < NEARLY_ZERO &&
+            self.m31.abs() < NEARLY_ZERO && self.m32.abs() < NEARLY_ZERO &&
+            self.m34.abs() < NEARLY_ZERO
+    }
+
+    fn is_simple_2d_translation(&self) -> bool {
+        if !self.is_simple_translation() {
+            return false;
+        }
+
+        self.m43.abs() < NEARLY_ZERO
+    }
+
+    /*  is this...
+     *  X  0  0  0
+     *  0  Y  0  0
+     *  0  0  1  0
+     *  a  b  0  1
+     */
+    fn is_2d_scale_translation(&self) -> bool {
+        (self.m33 - 1.0).abs() < NEARLY_ZERO &&
+            (self.m44 - 1.0).abs() < NEARLY_ZERO &&
+            self.m12.abs() < NEARLY_ZERO && self.m13.abs() < NEARLY_ZERO && self.m14.abs() < NEARLY_ZERO &&
+            self.m21.abs() < NEARLY_ZERO && self.m23.abs() < NEARLY_ZERO && self.m24.abs() < NEARLY_ZERO &&
+            self.m31.abs() < NEARLY_ZERO && self.m32.abs() < NEARLY_ZERO && self.m34.abs() < NEARLY_ZERO &&
+            self.m43.abs() < NEARLY_ZERO
+    }
+
+    fn determinant_2d(&self) -> f32 {
+        self.m11 * self.m22 - self.m12 * self.m21
+    }
+
+    fn inverse_project_2d_origin(&self) -> Option<Point2D<f32, Src>> {
+        let det = self.determinant_2d();
+        if det != 0.0 {
+            let x = (self.m21 * self.m42 - self.m41 * self.m22) / det;
+            let y = (self.m12 * self.m41 - self.m11 * self.m42) / det;
+            Some(Point2D::new(x, y))
+        } else {
+            None
+        }
+    }
+
+    fn flatten_z_output(&mut self) {
+        self.m13 = 0.0;
+        self.m23 = 0.0;
+        self.m33 = 1.0;
+        self.m43 = 0.0;
+        //Note: we used to zero out m3? as well, see "reftests/flatten-all-flat.yaml" test
+    }
+
+    fn cast_unit<NewSrc, NewDst>(&self) -> Transform3D<f32, NewSrc, NewDst> {
+        Transform3D::new(
+            self.m11, self.m12, self.m13, self.m14,
+            self.m21, self.m22, self.m23, self.m24,
+            self.m31, self.m32, self.m33, self.m34,
+            self.m41, self.m42, self.m43, self.m44,
+        )
+    }
+}
+
+pub trait PointHelpers<U>
+where
+    Self: Sized,
+{
+    fn snap(&self) -> Self;
+}
+
+impl<U> PointHelpers<U> for Point2D<f32, U> {
+    fn snap(&self) -> Self {
+        Point2D::new(
+            (self.x + 0.5).floor(),
+            (self.y + 0.5).floor(),
+        )
+    }
+}
+
+pub trait RectHelpers<U>
+where
+    Self: Sized,
+{
+    fn from_floats(x0: f32, y0: f32, x1: f32, y1: f32) -> Self;
+    fn snap(&self) -> Self;
+}
+
+impl<U> RectHelpers<U> for Rect<f32, U> {
+    fn from_floats(x0: f32, y0: f32, x1: f32, y1: f32) -> Self {
+        Rect::new(
+            Point2D::new(x0, y0),
+            Size2D::new(x1 - x0, y1 - y0),
+        )
+    }
+
+    fn snap(&self) -> Self {
+        let origin = Point2D::new(
+            (self.origin.x + 0.5).floor(),
+            (self.origin.y + 0.5).floor(),
+        );
+        Rect::new(
+            origin,
+            Size2D::new(
+                (self.origin.x + self.size.width + 0.5).floor() - origin.x,
+                (self.origin.y + self.size.height + 0.5).floor() - origin.y,
+            ),
+        )
+    }
+}
+
+impl<U> RectHelpers<U> for Box2D<f32, U> {
+    fn from_floats(x0: f32, y0: f32, x1: f32, y1: f32) -> Self {
+        Box2D {
+            min: Point2D::new(x0, y0),
+            max: Point2D::new(x1, y1),
+        }
+    }
+
+    fn snap(&self) -> Self {
+        self.round()
+    }
+}
+
+pub trait VectorHelpers<U>
+where
+    Self: Sized,
+{
+    fn snap(&self) -> Self;
+}
+
+impl<U> VectorHelpers<U> for Vector2D<f32, U> {
+    fn snap(&self) -> Self {
+        Vector2D::new(
+            (self.x + 0.5).floor(),
+            (self.y + 0.5).floor(),
+        )
+    }
+}
+
+pub fn lerp(a: f32, b: f32, t: f32) -> f32 {
+    (b - a) * t + a
+}
+
+#[repr(u32)]
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum TransformedRectKind {
+    AxisAligned = 0,
+    Complex = 1,
+}
+
+#[inline(always)]
+pub fn pack_as_float(value: u32) -> f32 {
+    value as f32 + 0.5
+}
+
+#[inline]
+fn extract_inner_rect_impl<U>(
+    rect: &Box2D<f32, U>,
+    radii: &BorderRadius,
+    k: f32,
+) -> Option<Box2D<f32, U>> {
+    // `k` defines how much border is taken into account
+    // We enforce the offsets to be rounded to pixel boundaries
+    // by `ceil`-ing and `floor`-ing them
+
+    let xl = (k * radii.top_left.width.max(radii.bottom_left.width)).ceil();
+    let xr = (rect.width() - k * radii.top_right.width.max(radii.bottom_right.width)).floor();
+    let yt = (k * radii.top_left.height.max(radii.top_right.height)).ceil();
+    let yb =
+        (rect.height() - k * radii.bottom_left.height.max(radii.bottom_right.height)).floor();
+
+    if xl <= xr && yt <= yb {
+        Some(Box2D::from_origin_and_size(
+            Point2D::new(rect.min.x + xl, rect.min.y + yt),
+            Size2D::new(xr - xl, yb - yt),
+        ))
+    } else {
+        None
+    }
+}
+
+/// Return an aligned rectangle that is inside the clip region and doesn't intersect
+/// any of the bounding rectangles of the rounded corners.
+pub fn extract_inner_rect_safe<U>(
+    rect: &Box2D<f32, U>,
+    radii: &BorderRadius,
+) -> Option<Box2D<f32, U>> {
+    // value of `k==1.0` is used for extraction of the corner rectangles
+    // see `SEGMENT_CORNER_*` in `clip_shared.glsl`
+    extract_inner_rect_impl(rect, radii, 1.0)
+}
+
+#[cfg(test)]
+use euclid::vec3;
+
+#[cfg(test)]
+pub mod test {
+    use super::*;
+    use euclid::default::{Point2D, Size2D, Transform3D};
+    use euclid::{Angle, approxeq::ApproxEq};
+    use std::f32::consts::PI;
+    use crate::clip::{is_left_of_line, polygon_contains_point};
+    use crate::prim_store::PolygonKey;
+    use api::FillRule;
+
+    #[test]
+    fn inverse_project() {
+        let m0 = Transform3D::identity();
+        let p0 = Point2D::new(1.0, 2.0);
+        // an identical transform doesn't need any inverse projection
+        assert_eq!(m0.inverse_project(&p0), Some(p0));
+        let m1 = Transform3D::rotation(0.0, 1.0, 0.0, Angle::radians(-PI / 3.0));
+        // rotation by 60 degrees would imply scaling of X component by a factor of 2
+        assert_eq!(m1.inverse_project(&p0), Some(Point2D::new(2.0, 2.0)));
+    }
+
+    #[test]
+    fn inverse_project_footprint() {
+        let m = Transform3D::new(
+            0.477499992, 0.135000005, -1.0, 0.000624999986,
+            -0.642787635, 0.766044438, 0.0, 0.0,
+            0.766044438, 0.642787635, 0.0, 0.0,
+            1137.10986, 113.71286, 402.0, 0.748749971,
+        );
+        let r = Box2D::from_size(Size2D::new(804.0, 804.0));
+        {
+            let points = &[
+                r.top_left(),
+                r.top_right(),
+                r.bottom_left(),
+                r.bottom_right(),
+            ];
+            let mi = m.inverse().unwrap();
+            // In this section, we do the forward and backward transformation
+            // to confirm that its bijective.
+            // We also do the inverse projection path, and confirm it functions the same way.
+            info!("Points:");
+            for p in points {
+                let pp = m.transform_point2d_homogeneous(*p);
+                let p3 = pp.to_point3d().unwrap();
+                let pi = mi.transform_point3d_homogeneous(p3);
+                let px = pi.to_point2d().unwrap();
+                let py = m.inverse_project(&pp.to_point2d().unwrap()).unwrap();
+                info!("\t{:?} -> {:?} -> {:?} -> ({:?} -> {:?}, {:?})", p, pp, p3, pi, px, py);
+                assert!(px.approx_eq_eps(p, &Point2D::new(0.001, 0.001)));
+                assert!(py.approx_eq_eps(p, &Point2D::new(0.001, 0.001)));
+            }
+        }
+        // project
+        let rp = project_rect(&m, &r, &Box2D::from_size(Size2D::new(1000.0, 1000.0))).unwrap();
+        info!("Projected {:?}", rp);
+        // one of the points ends up in the negative hemisphere
+        assert_eq!(m.inverse_project(&rp.min), None);
+        // inverse
+        if let Some(ri) = m.inverse_rect_footprint(&rp) {
+            // inverse footprint should be larger, since it doesn't know the original Z
+            assert!(ri.contains_box(&r), "Inverse {:?}", ri);
+        }
+    }
+
+    fn validate_convert(xref: &LayoutTransform) {
+        let so = ScaleOffset::from_transform(xref).unwrap();
+        let xf = so.to_transform();
+        assert!(xref.approx_eq(&xf));
+    }
+
+    #[test]
+    fn negative_scale_map_unmap() {
+        let xref = LayoutTransform::scale(1.0, -1.0, 1.0)
+                        .pre_translate(LayoutVector3D::new(124.0, 38.0, 0.0));
+        let so = ScaleOffset::from_transform(&xref).unwrap();
+        let local_rect = Box2D {
+            min: LayoutPoint::new(50.0, -100.0),
+            max: LayoutPoint::new(250.0, 300.0),
+        };
+
+        let mapped_rect = so.map_rect::<LayoutPixel, DevicePixel>(&local_rect);
+        let xf_rect = project_rect(
+            &xref,
+            &local_rect,
+            &LayoutRect::max_rect(),
+        ).unwrap();
+
+        assert!(mapped_rect.min.x.approx_eq(&xf_rect.min.x));
+        assert!(mapped_rect.min.y.approx_eq(&xf_rect.min.y));
+        assert!(mapped_rect.max.x.approx_eq(&xf_rect.max.x));
+        assert!(mapped_rect.max.y.approx_eq(&xf_rect.max.y));
+
+        let unmapped_rect = so.unmap_rect::<DevicePixel, LayoutPixel>(&mapped_rect);
+        assert!(unmapped_rect.min.x.approx_eq(&local_rect.min.x));
+        assert!(unmapped_rect.min.y.approx_eq(&local_rect.min.y));
+        assert!(unmapped_rect.max.x.approx_eq(&local_rect.max.x));
+        assert!(unmapped_rect.max.y.approx_eq(&local_rect.max.y));
+    }
+
+    #[test]
+    fn scale_offset_convert() {
+        let xref = LayoutTransform::translation(130.0, 200.0, 0.0);
+        validate_convert(&xref);
+
+        let xref = LayoutTransform::scale(13.0, 8.0, 1.0);
+        validate_convert(&xref);
+
+        let xref = LayoutTransform::scale(0.5, 0.5, 1.0)
+                        .pre_translate(LayoutVector3D::new(124.0, 38.0, 0.0));
+        validate_convert(&xref);
+
+        let xref = LayoutTransform::scale(30.0, 11.0, 1.0)
+            .then_translate(vec3(50.0, 240.0, 0.0));
+        validate_convert(&xref);
+    }
+
+    fn validate_inverse(xref: &LayoutTransform) {
+        let s0 = ScaleOffset::from_transform(xref).unwrap();
+        let s1 = s0.inverse().accumulate(&s0);
+        assert!((s1.scale.x - 1.0).abs() < NEARLY_ZERO &&
+                (s1.scale.y - 1.0).abs() < NEARLY_ZERO &&
+                s1.offset.x.abs() < NEARLY_ZERO &&
+                s1.offset.y.abs() < NEARLY_ZERO,
+                "{:?}",
+                s1);
+    }
+
+    #[test]
+    fn scale_offset_inverse() {
+        let xref = LayoutTransform::translation(130.0, 200.0, 0.0);
+        validate_inverse(&xref);
+
+        let xref = LayoutTransform::scale(13.0, 8.0, 1.0);
+        validate_inverse(&xref);
+
+        let xref = LayoutTransform::translation(124.0, 38.0, 0.0).
+            then_scale(0.5, 0.5, 1.0);
+
+        validate_inverse(&xref);
+
+        let xref = LayoutTransform::scale(30.0, 11.0, 1.0)
+            .then_translate(vec3(50.0, 240.0, 0.0));
+        validate_inverse(&xref);
+    }
+
+    fn validate_accumulate(x0: &LayoutTransform, x1: &LayoutTransform) {
+        let x = x1.then(&x0);
+
+        let s0 = ScaleOffset::from_transform(x0).unwrap();
+        let s1 = ScaleOffset::from_transform(x1).unwrap();
+
+        let s = s0.accumulate(&s1).to_transform();
+
+        assert!(x.approx_eq(&s), "{:?}\n{:?}", x, s);
+    }
+
+    #[test]
+    fn scale_offset_accumulate() {
+        let x0 = LayoutTransform::translation(130.0, 200.0, 0.0);
+        let x1 = LayoutTransform::scale(7.0, 3.0, 1.0);
+
+        validate_accumulate(&x0, &x1);
+    }
+
+    #[test]
+    fn inverse_project_2d_origin() {
+        let mut m = Transform3D::identity();
+        assert_eq!(m.inverse_project_2d_origin(), Some(Point2D::zero()));
+        m.m11 = 0.0;
+        assert_eq!(m.inverse_project_2d_origin(), None);
+        m.m21 = -2.0;
+        m.m22 = 0.0;
+        m.m12 = -0.5;
+        m.m41 = 1.0;
+        m.m42 = 0.5;
+        let origin = m.inverse_project_2d_origin().unwrap();
+        assert_eq!(origin, Point2D::new(1.0, 0.5));
+        assert_eq!(m.transform_point2d(origin), Some(Point2D::zero()));
+    }
+
+    #[test]
+    fn polygon_clip_is_left_of_point() {
+        // Define points of a line through (1, -3) and (-2, 6) to test against.
+        // If the triplet consisting of these two points and the test point
+        // form a counter-clockwise triangle, then the test point is on the
+        // left. The easiest way to visualize this is with an "ascending"
+        // line from low-Y to high-Y.
+        let p0_x = 1.0;
+        let p0_y = -3.0;
+        let p1_x = -2.0;
+        let p1_y = 6.0;
+
+        // Test some points to the left of the line.
+        assert!(is_left_of_line(-9.0, 0.0, p0_x, p0_y, p1_x, p1_y) > 0.0);
+        assert!(is_left_of_line(-1.0, 1.0, p0_x, p0_y, p1_x, p1_y) > 0.0);
+        assert!(is_left_of_line(1.0, -4.0, p0_x, p0_y, p1_x, p1_y) > 0.0);
+
+        // Test some points on the line.
+        assert!(is_left_of_line(-3.0, 9.0, p0_x, p0_y, p1_x, p1_y) == 0.0);
+        assert!(is_left_of_line(0.0, 0.0, p0_x, p0_y, p1_x, p1_y) == 0.0);
+        assert!(is_left_of_line(100.0, -300.0, p0_x, p0_y, p1_x, p1_y) == 0.0);
+
+        // Test some points to the right of the line.
+        assert!(is_left_of_line(0.0, 1.0, p0_x, p0_y, p1_x, p1_y) < 0.0);
+        assert!(is_left_of_line(-4.0, 13.0, p0_x, p0_y, p1_x, p1_y) < 0.0);
+        assert!(is_left_of_line(5.0, -12.0, p0_x, p0_y, p1_x, p1_y) < 0.0);
+    }
+
+    #[test]
+    fn polygon_clip_contains_point() {
+        // We define the points of a self-overlapping polygon, which we will
+        // use to create polygons with different windings and fill rules.
+        let p0 = LayoutPoint::new(4.0, 4.0);
+        let p1 = LayoutPoint::new(6.0, 4.0);
+        let p2 = LayoutPoint::new(4.0, 7.0);
+        let p3 = LayoutPoint::new(2.0, 1.0);
+        let p4 = LayoutPoint::new(8.0, 1.0);
+        let p5 = LayoutPoint::new(6.0, 7.0);
+
+        let poly_clockwise_nonzero = PolygonKey::new(
+            &[p5, p4, p3, p2, p1, p0].to_vec(), FillRule::Nonzero
+        );
+        let poly_clockwise_evenodd = PolygonKey::new(
+            &[p5, p4, p3, p2, p1, p0].to_vec(), FillRule::Evenodd
+        );
+        let poly_counter_clockwise_nonzero = PolygonKey::new(
+            &[p0, p1, p2, p3, p4, p5].to_vec(), FillRule::Nonzero
+        );
+        let poly_counter_clockwise_evenodd = PolygonKey::new(
+            &[p0, p1, p2, p3, p4, p5].to_vec(), FillRule::Evenodd
+        );
+
+        // We define a rect that provides a bounding clip area of
+        // the polygon.
+        let rect = LayoutRect::from_size(LayoutSize::new(10.0, 10.0));
+
+        // And we'll test three points of interest.
+        let p_inside_once = LayoutPoint::new(5.0, 3.0);
+        let p_inside_twice = LayoutPoint::new(5.0, 5.0);
+        let p_outside = LayoutPoint::new(9.0, 9.0);
+
+        // We should get the same results for both clockwise and
+        // counter-clockwise polygons.
+        // For nonzero polygons, the inside twice point is considered inside.
+        for poly_nonzero in vec![poly_clockwise_nonzero, poly_counter_clockwise_nonzero].iter() {
+            assert_eq!(polygon_contains_point(&p_inside_once, &rect, &poly_nonzero), true);
+            assert_eq!(polygon_contains_point(&p_inside_twice, &rect, &poly_nonzero), true);
+            assert_eq!(polygon_contains_point(&p_outside, &rect, &poly_nonzero), false);
+        }
+        // For evenodd polygons, the inside twice point is considered outside.
+        for poly_evenodd in vec![poly_clockwise_evenodd, poly_counter_clockwise_evenodd].iter() {
+            assert_eq!(polygon_contains_point(&p_inside_once, &rect, &poly_evenodd), true);
+            assert_eq!(polygon_contains_point(&p_inside_twice, &rect, &poly_evenodd), false);
+            assert_eq!(polygon_contains_point(&p_outside, &rect, &poly_evenodd), false);
+        }
+    }
+}
+
+pub trait MaxRect {
+    fn max_rect() -> Self;
+}
+
+impl MaxRect for DeviceIntRect {
+    fn max_rect() -> Self {
+        DeviceIntRect::from_origin_and_size(
+            DeviceIntPoint::new(i32::MIN / 2, i32::MIN / 2),
+            DeviceIntSize::new(i32::MAX, i32::MAX),
+        )
+    }
+}
+
+impl<U> MaxRect for Rect<f32, U> {
+    fn max_rect() -> Self {
+        // Having an unlimited bounding box is fine up until we try
+        // to cast it to `i32`, where we get `-2147483648` for any
+        // values larger than or equal to 2^31.
+        //
+        // Note: clamping to i32::MIN and i32::MAX is not a solution,
+        // with explanation left as an exercise for the reader.
+        const MAX_COORD: f32 = 1.0e9;
+
+        Rect::new(
+            Point2D::new(-MAX_COORD, -MAX_COORD),
+            Size2D::new(2.0 * MAX_COORD, 2.0 * MAX_COORD),
+        )
+    }
+}
+
+impl<U> MaxRect for Box2D<f32, U> {
+    fn max_rect() -> Self {
+        // Having an unlimited bounding box is fine up until we try
+        // to cast it to `i32`, where we get `-2147483648` for any
+        // values larger than or equal to 2^31.
+        //
+        // Note: clamping to i32::MIN and i32::MAX is not a solution,
+        // with explanation left as an exercise for the reader.
+        const MAX_COORD: f32 = 1.0e9;
+
+        Box2D::new(
+            Point2D::new(-MAX_COORD, -MAX_COORD),
+            Point2D::new(MAX_COORD, MAX_COORD),
+        )
+    }
+}
+
+/// An enum that tries to avoid expensive transformation matrix calculations
+/// when possible when dealing with non-perspective axis-aligned transformations.
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum FastTransform<Src, Dst> {
+    /// A simple offset, which can be used without doing any matrix math.
+    Offset(Vector2D<f32, Src>),
+
+    /// A 2D transformation with an inverse.
+    Transform {
+        transform: Transform3D<f32, Src, Dst>,
+        inverse: Option<Transform3D<f32, Dst, Src>>,
+        is_2d: bool,
+    },
+}
+
+impl<Src, Dst> Clone for FastTransform<Src, Dst> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<Src, Dst> Copy for FastTransform<Src, Dst> { }
+
+impl<Src, Dst> FastTransform<Src, Dst> {
+    pub fn identity() -> Self {
+        FastTransform::Offset(Vector2D::zero())
+    }
+
+    pub fn with_vector(offset: Vector2D<f32, Src>) -> Self {
+        FastTransform::Offset(offset)
+    }
+
+    pub fn with_scale_offset(scale_offset: ScaleOffset) -> Self {
+        if scale_offset.scale == Vector2D::new(1.0, 1.0) {
+            FastTransform::Offset(Vector2D::from_untyped(scale_offset.offset))
+        } else {
+            FastTransform::Transform {
+                transform: scale_offset.to_transform(),
+                inverse: Some(scale_offset.inverse().to_transform()),
+                is_2d: true,
+            }
+        }
+    }
+
+    #[inline(always)]
+    pub fn with_transform(transform: Transform3D<f32, Src, Dst>) -> Self {
+        if transform.is_simple_2d_translation() {
+            return FastTransform::Offset(Vector2D::new(transform.m41, transform.m42));
+        }
+        let inverse = transform.inverse();
+        let is_2d = transform.is_2d();
+        FastTransform::Transform { transform, inverse, is_2d}
+    }
+
+    pub fn to_transform(&self) -> Cow<Transform3D<f32, Src, Dst>> {
+        match *self {
+            FastTransform::Offset(offset) => Cow::Owned(
+                Transform3D::translation(offset.x, offset.y, 0.0)
+            ),
+            FastTransform::Transform { ref transform, .. } => Cow::Borrowed(transform),
+        }
+    }
+
+    /// Return true if this is an identity transform
+    #[allow(unused)]
+    pub fn is_identity(&self)-> bool {
+        match *self {
+            FastTransform::Offset(offset) => {
+                offset == Vector2D::zero()
+            }
+            FastTransform::Transform { ref transform, .. } => {
+                *transform == Transform3D::identity()
+            }
+        }
+    }
+
+    pub fn then<NewDst>(&self, other: &FastTransform<Dst, NewDst>) -> FastTransform<Src, NewDst> {
+        match *self {
+            FastTransform::Offset(offset) => match *other {
+                FastTransform::Offset(other_offset) => {
+                    FastTransform::Offset(offset + other_offset * Scale::<_, _, Src>::new(1.0))
+                }
+                FastTransform::Transform { transform: ref other_transform, .. } => {
+                    FastTransform::with_transform(
+                        other_transform
+                            .with_source::<Src>()
+                            .pre_translate(offset.to_3d())
+                    )
+                }
+            }
+            FastTransform::Transform { ref transform, ref inverse, is_2d } => match *other {
+                FastTransform::Offset(other_offset) => {
+                    FastTransform::with_transform(
+                        transform
+                            .then_translate(other_offset.to_3d())
+                            .with_destination::<NewDst>()
+                    )
+                }
+                FastTransform::Transform { transform: ref other_transform, inverse: ref other_inverse, is_2d: other_is_2d } => {
+                    FastTransform::Transform {
+                        transform: transform.then(other_transform),
+                        inverse: inverse.as_ref().and_then(|self_inv|
+                            other_inverse.as_ref().map(|other_inv| other_inv.then(self_inv))
+                        ),
+                        is_2d: is_2d & other_is_2d,
+                    }
+                }
+            }
+        }
+    }
+
+    pub fn pre_transform<NewSrc>(
+        &self,
+        other: &FastTransform<NewSrc, Src>
+    ) -> FastTransform<NewSrc, Dst> {
+        other.then(self)
+    }
+
+    pub fn pre_translate(&self, other_offset: Vector2D<f32, Src>) -> Self {
+        match *self {
+            FastTransform::Offset(offset) =>
+                FastTransform::Offset(offset + other_offset),
+            FastTransform::Transform { transform, .. } =>
+                FastTransform::with_transform(transform.pre_translate(other_offset.to_3d()))
+        }
+    }
+
+    pub fn then_translate(&self, other_offset: Vector2D<f32, Dst>) -> Self {
+        match *self {
+            FastTransform::Offset(offset) => {
+                FastTransform::Offset(offset + other_offset * Scale::<_, _, Src>::new(1.0))
+            }
+            FastTransform::Transform { ref transform, .. } => {
+                let transform = transform.then_translate(other_offset.to_3d());
+                FastTransform::with_transform(transform)
+            }
+        }
+    }
+
+    #[inline(always)]
+    pub fn is_backface_visible(&self) -> bool {
+        match *self {
+            FastTransform::Offset(..) => false,
+            FastTransform::Transform { inverse: None, .. } => false,
+            //TODO: fix this properly by taking "det|M33| * det|M34| > 0"
+            // see https://www.w3.org/Bugs/Public/show_bug.cgi?id=23014
+            FastTransform::Transform { inverse: Some(ref inverse), .. } => inverse.m33 < 0.0,
+        }
+    }
+
+    #[inline(always)]
+    pub fn transform_point2d(&self, point: Point2D<f32, Src>) -> Option<Point2D<f32, Dst>> {
+        match *self {
+            FastTransform::Offset(offset) => {
+                let new_point = point + offset;
+                Some(Point2D::from_untyped(new_point.to_untyped()))
+            }
+            FastTransform::Transform { ref transform, .. } => transform.transform_point2d(point),
+        }
+    }
+
+    #[inline(always)]
+    pub fn project_point2d(&self, point: Point2D<f32, Src>) -> Option<Point2D<f32, Dst>> {
+        match* self {
+            FastTransform::Offset(..) => self.transform_point2d(point),
+            FastTransform::Transform{ref transform, ..} => {
+                // Find a value for z that will transform to 0.
+
+                // The transformed value of z is computed as:
+                // z' = point.x * self.m13 + point.y * self.m23 + z * self.m33 + self.m43
+
+                // Solving for z when z' = 0 gives us:
+                let z = -(point.x * transform.m13 + point.y * transform.m23 + transform.m43) / transform.m33;
+
+                transform.transform_point3d(point3(point.x, point.y, z)).map(| p3 | point2(p3.x, p3.y))
+            }
+        }
+    }
+
+    #[inline(always)]
+    pub fn inverse(&self) -> Option<FastTransform<Dst, Src>> {
+        match *self {
+            FastTransform::Offset(offset) =>
+                Some(FastTransform::Offset(Vector2D::new(-offset.x, -offset.y))),
+            FastTransform::Transform { transform, inverse: Some(inverse), is_2d, } =>
+                Some(FastTransform::Transform {
+                    transform: inverse,
+                    inverse: Some(transform),
+                    is_2d
+                }),
+            FastTransform::Transform { inverse: None, .. } => None,
+
+        }
+    }
+}
+
+impl<Src, Dst> From<Transform3D<f32, Src, Dst>> for FastTransform<Src, Dst> {
+    fn from(transform: Transform3D<f32, Src, Dst>) -> Self {
+        FastTransform::with_transform(transform)
+    }
+}
+
+impl<Src, Dst> From<Vector2D<f32, Src>> for FastTransform<Src, Dst> {
+    fn from(vector: Vector2D<f32, Src>) -> Self {
+        FastTransform::with_vector(vector)
+    }
+}
+
+pub type LayoutFastTransform = FastTransform<LayoutPixel, LayoutPixel>;
+pub type LayoutToWorldFastTransform = FastTransform<LayoutPixel, WorldPixel>;
+
+pub fn project_rect<F, T>(
+    transform: &Transform3D<f32, F, T>,
+    rect: &Box2D<f32, F>,
+    bounds: &Box2D<f32, T>,
+) -> Option<Box2D<f32, T>>
+ where F: fmt::Debug
+{
+    let homogens = [
+        transform.transform_point2d_homogeneous(rect.top_left()),
+        transform.transform_point2d_homogeneous(rect.top_right()),
+        transform.transform_point2d_homogeneous(rect.bottom_left()),
+        transform.transform_point2d_homogeneous(rect.bottom_right()),
+    ];
+
+    // Note: we only do the full frustum collision when the polygon approaches the camera plane.
+    // Otherwise, it will be clamped to the screen bounds anyway.
+    if homogens.iter().any(|h| h.w <= 0.0 || h.w.is_nan()) {
+        let mut clipper = Clipper::new();
+        let polygon = Polygon::from_rect(rect.to_rect().cast().cast_unit(), 1);
+
+        let planes = match Clipper::<usize>::frustum_planes(
+            &transform.cast_unit().cast(),
+            Some(bounds.to_rect().cast_unit().to_f64()),
+        ) {
+            Ok(planes) => planes,
+            Err(..) => return None,
+        };
+
+        for plane in planes {
+            clipper.add(plane);
+        }
+
+        let results = clipper.clip(polygon);
+        if results.is_empty() {
+            return None
+        }
+
+        Some(Box2D::from_points(results
+            .into_iter()
+            // filter out parts behind the view plane
+            .flat_map(|poly| &poly.points)
+            .map(|p| {
+                let mut homo = transform.transform_point2d_homogeneous(p.to_2d().to_f32().cast_unit());
+                homo.w = homo.w.max(0.00000001); // avoid infinite values
+                homo.to_point2d().unwrap()
+            })
+        ))
+    } else {
+        // we just checked for all the points to be in positive hemisphere, so `unwrap` is valid
+        Some(Box2D::from_points(&[
+            homogens[0].to_point2d().unwrap(),
+            homogens[1].to_point2d().unwrap(),
+            homogens[2].to_point2d().unwrap(),
+            homogens[3].to_point2d().unwrap(),
+        ]))
+    }
+}
+
+/// Run the first callback over all elements in the array. If the callback returns true,
+/// the element is removed from the array and moved to a second callback.
+///
+/// This is a simple implementation waiting for Vec::drain_filter to be stable.
+/// When that happens, code like:
+///
+/// let filter = |op| {
+///     match *op {
+///         Enum::Foo | Enum::Bar => true,
+///         Enum::Baz => false,
+///     }
+/// };
+/// drain_filter(
+///     &mut ops,
+///     filter,
+///     |op| {
+///         match op {
+///             Enum::Foo => { foo(); }
+///             Enum::Bar => { bar(); }
+///             Enum::Baz => { unreachable!(); }
+///         }
+///     },
+/// );
+///
+/// Can be rewritten as:
+///
+/// let filter = |op| {
+///     match *op {
+///         Enum::Foo | Enum::Bar => true,
+///         Enum::Baz => false,
+///     }
+/// };
+/// for op in ops.drain_filter(filter) {
+///     match op {
+///         Enum::Foo => { foo(); }
+///         Enum::Bar => { bar(); }
+///         Enum::Baz => { unreachable!(); }
+///     }
+/// }
+///
+/// See https://doc.rust-lang.org/std/vec/struct.Vec.html#method.drain_filter
+pub fn drain_filter<T, Filter, Action>(
+    vec: &mut Vec<T>,
+    mut filter: Filter,
+    mut action: Action,
+)
+where
+    Filter: FnMut(&mut T) -> bool,
+    Action: FnMut(T)
+{
+    let mut i = 0;
+    while i != vec.len() {
+        if filter(&mut vec[i]) {
+            action(vec.remove(i));
+        } else {
+            i += 1;
+        }
+    }
+}
+
+
+#[derive(Debug)]
+pub struct Recycler {
+    pub num_allocations: usize,
+}
+
+impl Recycler {
+    /// Maximum extra capacity that a recycled vector is allowed to have. If the actual capacity
+    /// is larger, we re-allocate the vector storage with lower capacity.
+    const MAX_EXTRA_CAPACITY_PERCENT: usize = 200;
+    /// Minimum extra capacity to keep when re-allocating the vector storage.
+    const MIN_EXTRA_CAPACITY_PERCENT: usize = 20;
+    /// Minimum sensible vector length to consider for re-allocation.
+    const MIN_VECTOR_LENGTH: usize = 16;
+
+    pub fn new() -> Self {
+        Recycler {
+            num_allocations: 0,
+        }
+    }
+
+    /// Clear a vector for re-use, while retaining the backing memory buffer. May shrink the buffer
+    /// if it's currently much larger than was actually used.
+    pub fn recycle_vec<T>(&mut self, vec: &mut Vec<T>) {
+        let extra_capacity = (vec.capacity() - vec.len()) * 100 / vec.len().max(Self::MIN_VECTOR_LENGTH);
+
+        if extra_capacity > Self::MAX_EXTRA_CAPACITY_PERCENT {
+            // Reduce capacity of the buffer if it is a lot larger than it needs to be. This prevents
+            // a frame with exceptionally large allocations to cause subsequent frames to retain
+            // more memory than they need.
+            //TODO: use `shrink_to` when it's stable
+            *vec = Vec::with_capacity(vec.len() + vec.len() * Self::MIN_EXTRA_CAPACITY_PERCENT / 100);
+            self.num_allocations += 1;
+        } else {
+            vec.clear();
+        }
+    }
+}
+
+/// Record the size of a data structure to preallocate a similar size
+/// at the next frame and avoid growing it too many time.
+#[derive(Copy, Clone, Debug)]
+pub struct Preallocator {
+    size: usize,
+}
+
+impl Preallocator {
+    pub fn new(initial_size: usize) -> Self {
+        Preallocator {
+            size: initial_size,
+        }
+    }
+
+    /// Record the size of a vector to preallocate it the next frame.
+    pub fn record_vec<T>(&mut self, vec: &Vec<T>) {
+        let len = vec.len();
+        if len > self.size {
+            self.size = len;
+        } else {
+            self.size = (self.size + len) / 2;
+        }
+    }
+
+    /// The size that we'll preallocate the vector with.
+    pub fn preallocation_size(&self) -> usize {
+        // Round up to multiple of 16 to avoid small tiny
+        // variations causing reallocations.
+        (self.size + 15) & !15
+    }
+
+    /// Preallocate vector storage.
+    ///
+    /// The preallocated amount depends on the length recorded in the last
+    /// record_vec call.
+    pub fn preallocate_vec<T>(&self, vec: &mut Vec<T>) {
+        let len = vec.len();
+        let cap = self.preallocation_size();
+        if len < cap {
+            vec.reserve(cap - len);
+        }
+    }
+}
+
+impl Default for Preallocator {
+    fn default() -> Self {
+        Self::new(0)
+    }
+}
+
+/// Arc wrapper to support measurement via MallocSizeOf.
+///
+/// Memory reporting for Arcs is tricky because of the risk of double-counting.
+/// One way to measure them is to keep a table of pointers that have already been
+/// traversed. The other way is to use knowledge of the program structure to
+/// identify which Arc instances should be measured and which should be skipped to
+/// avoid double-counting.
+///
+/// This struct implements the second approach. It identifies the "main" pointer
+/// to the Arc-ed resource, and measures the buffer as if it were an owned pointer.
+/// The programmer should ensure that there is at most one PrimaryArc for a given
+/// underlying ArcInner.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+pub struct PrimaryArc<T>(pub Arc<T>);
+
+impl<T> ::std::ops::Deref for PrimaryArc<T> {
+    type Target = Arc<T>;
+
+    #[inline]
+    fn deref(&self) -> &Arc<T> {
+        &self.0
+    }
+}
+
+impl<T> MallocShallowSizeOf for PrimaryArc<T> {
+    fn shallow_size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
+        unsafe {
+            // This is a bit sketchy, but std::sync::Arc doesn't expose the
+            // base pointer.
+            let raw_arc_ptr: *const Arc<T> = &self.0;
+            let raw_ptr_ptr: *const *const c_void = raw_arc_ptr as _;
+            let raw_ptr = *raw_ptr_ptr;
+            (ops.size_of_op)(raw_ptr)
+        }
+    }
+}
+
+impl<T: MallocSizeOf> MallocSizeOf for PrimaryArc<T> {
+    fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
+        self.shallow_size_of(ops) + (**self).size_of(ops)
+    }
+}
+
+/// Computes the scale factors of this matrix; that is,
+/// the amounts each basis vector is scaled by.
+///
+/// This code comes from gecko gfx/2d/Matrix.h with the following
+/// modifications:
+///
+/// * Removed `xMajor` parameter.
+/// * All arithmetics is done with double precision.
+pub fn scale_factors<Src, Dst>(
+    mat: &Transform3D<f32, Src, Dst>
+) -> (f32, f32) {
+    let m11 = mat.m11 as f64;
+    let m12 = mat.m12 as f64;
+    // Determinant is just of the 2D component.
+    let det = m11 * mat.m22 as f64 - m12 * mat.m21 as f64;
+    if det == 0.0 {
+        return (0.0, 0.0);
+    }
+
+    // ignore mirroring
+    let det = det.abs();
+
+    let major = (m11 * m11 + m12 * m12).sqrt();
+    let minor = if major != 0.0 { det / major } else { 0.0 };
+
+    (major as f32, minor as f32)
+}
+
+#[test]
+fn scale_factors_large() {
+    // https://bugzilla.mozilla.org/show_bug.cgi?id=1748499
+    let mat = Transform3D::<f32, (), ()>::new(
+        1.6534229920333123e27, 3.673100922561787e27, 0.0, 0.0,
+        -3.673100922561787e27, 1.6534229920333123e27, 0.0, 0.0,
+        0.0, 0.0, 1.0, 0.0,
+        -828140552192.0, -1771307401216.0, 0.0, 1.0,
+    );
+    let (major, minor) = scale_factors(&mat);
+    assert!(major.is_normal() && minor.is_normal());
+}
+
+/// Clamp scaling factor to a power of two.
+///
+/// This code comes from gecko gfx/thebes/gfxUtils.cpp with the following
+/// modification:
+///
+/// * logs are taken in base 2 instead of base e.
+pub fn clamp_to_scale_factor(val: f32, round_down: bool) -> f32 {
+    // Arbitary scale factor limitation. We can increase this
+    // for better scaling performance at the cost of worse
+    // quality.
+    const SCALE_RESOLUTION: f32 = 2.0;
+
+    // Negative scaling is just a flip and irrelevant to
+    // our resolution calculation.
+    let val = val.abs();
+
+    let (val, inverse) = if val < 1.0 {
+        (1.0 / val, true)
+    } else {
+        (val, false)
+    };
+
+    let power = val.log2() / SCALE_RESOLUTION.log2();
+
+    // If power is within 1e-5 of an integer, round to nearest to
+    // prevent floating point errors, otherwise round up to the
+    // next integer value.
+    let power = if (power - power.round()).abs() < 1e-5 {
+        power.round()
+    } else if inverse != round_down {
+        // Use floor when we are either inverted or rounding down, but
+        // not both.
+        power.floor()
+    } else {
+        // Otherwise, ceil when we are not inverted and not rounding
+        // down, or we are inverted and rounding down.
+        power.ceil()
+    };
+
+    let scale = SCALE_RESOLUTION.powf(power);
+
+    if inverse {
+        1.0 / scale
+    } else {
+        scale
+    }
+}
+
+/// Rounds a value up to the nearest multiple of mul
+pub fn round_up_to_multiple(val: usize, mul: NonZeroUsize) -> usize {
+    match val % mul.get() {
+        0 => val,
+        rem => val - rem + mul.get(),
+    }
+}
+
+
+#[macro_export]
+macro_rules! c_str {
+    ($lit:expr) => {
+        unsafe {
+            std::ffi::CStr::from_ptr(concat!($lit, "\0").as_ptr()
+                                     as *const std::os::raw::c_char)
+        }
+    }
+}
+
+/// This is inspired by the `weak-table` crate.
+/// It holds a Vec of weak pointers that are garbage collected as the Vec
+pub struct WeakTable {
+    inner: Vec<std::sync::Weak<Vec<u8>>>
+}
+
+impl WeakTable {
+    pub fn new() -> WeakTable {
+        WeakTable { inner: Vec::new() }
+    }
+    pub fn insert(&mut self, x: std::sync::Weak<Vec<u8>>) {
+        if self.inner.len() == self.inner.capacity() {
+            self.remove_expired();
+
+            // We want to make sure that we change capacity()
+            // even if remove_expired() removes some entries
+            // so that we don't repeatedly hit remove_expired()
+            if self.inner.len() * 3 < self.inner.capacity() {
+                // We use a different multiple for shrinking then
+                // expanding so that we we don't accidentally
+                // oscilate.
+                self.inner.shrink_to_fit();
+            } else {
+                // Otherwise double our size
+                self.inner.reserve(self.inner.len())
+            }
+        }
+        self.inner.push(x);
+    }
+
+    fn remove_expired(&mut self) {
+        self.inner.retain(|x| x.strong_count() > 0)
+    }
+
+    pub fn iter(&self) -> impl Iterator<Item = Arc<Vec<u8>>> + '_ {
+        self.inner.iter().filter_map(|x| x.upgrade())
+    }
+}
+
+#[test]
+fn weak_table() {
+    let mut tbl = WeakTable::new();
+    let mut things = Vec::new();
+    let target_count = 50;
+    for _ in 0..target_count {
+        things.push(Arc::new(vec![4]));
+    }
+    for i in &things {
+        tbl.insert(Arc::downgrade(i))
+    }
+    assert_eq!(tbl.inner.len(), target_count);
+    drop(things);
+    assert_eq!(tbl.iter().count(), 0);
+
+    // make sure that we shrink the table if it gets too big
+    // by adding a bunch of dead items
+    for _ in 0..target_count*2 {
+        tbl.insert(Arc::downgrade(&Arc::new(vec![5])))
+    }
+    assert!(tbl.inner.capacity() <= 4);
+}
diff --git a/gfx/wr/webrender/src/visibility.rs b/gfx/wr/webrender/src/visibility.rs
new file mode 100644
index 0000000000..fb1fee2944
--- /dev/null
+++ b/gfx/wr/webrender/src/visibility.rs
@@ -0,0 +1,385 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! # Visibility pass
+//!
+//! TODO: document what this pass does!
+//!
+
+use api::{DebugFlags};
+use api::units::*;
+use std::{usize};
+use crate::clip::ClipStore;
+use crate::composite::CompositeState;
+use crate::spatial_tree::{SpatialTree, SpatialNodeIndex};
+use crate::clip::{ClipChainInstance, ClipTree};
+use crate::frame_builder::FrameBuilderConfig;
+use crate::gpu_cache::GpuCache;
+use crate::picture::{PictureCompositeMode, ClusterFlags, SurfaceInfo, TileCacheInstance};
+use crate::picture::{SurfaceIndex, RasterConfig, TileRect, SubSliceIndex};
+use crate::prim_store::{ClipTaskIndex, PictureIndex, PrimitiveInstanceKind};
+use crate::prim_store::{PrimitiveStore, PrimitiveInstance};
+use crate::render_backend::{DataStores, ScratchBuffer};
+use crate::resource_cache::ResourceCache;
+use crate::scene::SceneProperties;
+use crate::space::SpaceMapper;
+use crate::util::{MaxRect};
+
+pub struct FrameVisibilityContext<'a> {
+    pub spatial_tree: &'a SpatialTree,
+    pub global_screen_world_rect: WorldRect,
+    pub global_device_pixel_scale: DevicePixelScale,
+    pub debug_flags: DebugFlags,
+    pub scene_properties: &'a SceneProperties,
+    pub config: FrameBuilderConfig,
+    pub root_spatial_node_index: SpatialNodeIndex,
+}
+
+pub struct FrameVisibilityState<'a> {
+    pub clip_store: &'a mut ClipStore,
+    pub resource_cache: &'a mut ResourceCache,
+    pub gpu_cache: &'a mut GpuCache,
+    pub scratch: &'a mut ScratchBuffer,
+    pub data_stores: &'a mut DataStores,
+    pub clip_tree: &'a mut ClipTree,
+    pub composite_state: &'a mut CompositeState,
+    /// A stack of currently active off-screen surfaces during the
+    /// visibility frame traversal.
+    pub surface_stack: Vec<(PictureIndex, SurfaceIndex)>,
+}
+
+impl<'a> FrameVisibilityState<'a> {
+    pub fn push_surface(
+        &mut self,
+        pic_index: PictureIndex,
+        surface_index: SurfaceIndex,
+    ) {
+        self.surface_stack.push((pic_index, surface_index));
+    }
+
+    pub fn pop_surface(&mut self) {
+        self.surface_stack.pop().unwrap();
+    }
+}
+
+bitflags! {
+    /// A set of bitflags that can be set in the visibility information
+    /// for a primitive instance. This can be used to control how primitives
+    /// are treated during batching.
+    // TODO(gw): We should also move `is_compositor_surface` to be part of
+    //           this flags struct.
+    #[cfg_attr(feature = "capture", derive(Serialize))]
+    pub struct PrimitiveVisibilityFlags: u8 {
+        /// Implies that this primitive covers the entire picture cache slice,
+        /// and can thus be dropped during batching and drawn with clear color.
+        const IS_BACKDROP = 1;
+    }
+}
+
+/// Contains the current state of the primitive's visibility.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub enum VisibilityState {
+    /// Uninitialized - this should never be encountered after prim reset
+    Unset,
+    /// Culled for being off-screen, or not possible to render (e.g. missing image resource)
+    Culled,
+    /// A picture that doesn't have a surface - primitives are composed into the
+    /// parent picture with a surface.
+    PassThrough,
+    /// A primitive that has been found to be visible
+    Visible {
+        /// A set of flags that define how this primitive should be handled
+        /// during batching of visible primitives.
+        vis_flags: PrimitiveVisibilityFlags,
+
+        /// Tiles that this primitive intersects with
+        tile_rect: TileRect,
+
+        /// Sub-slice within the picture cache that this prim exists on
+        sub_slice_index: SubSliceIndex,
+    },
+}
+
+/// Information stored for a visible primitive about the visible
+/// rect and associated clip information.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct PrimitiveVisibility {
+    /// The clip chain instance that was built for this primitive.
+    pub clip_chain: ClipChainInstance,
+
+    /// Current visibility state of the primitive.
+    // TODO(gw): Move more of the fields from this struct into
+    //           the state enum.
+    pub state: VisibilityState,
+
+    /// An index into the clip task instances array in the primitive
+    /// store. If this is ClipTaskIndex::INVALID, then the primitive
+    /// has no clip mask. Otherwise, it may store the offset of the
+    /// global clip mask task for this primitive, or the first of
+    /// a list of clip task ids (one per segment).
+    pub clip_task_index: ClipTaskIndex,
+}
+
+impl PrimitiveVisibility {
+    pub fn new() -> Self {
+        PrimitiveVisibility {
+            state: VisibilityState::Unset,
+            clip_chain: ClipChainInstance::empty(),
+            clip_task_index: ClipTaskIndex::INVALID,
+        }
+    }
+
+    pub fn reset(&mut self) {
+        self.state = VisibilityState::Culled;
+        self.clip_task_index = ClipTaskIndex::INVALID;
+    }
+}
+
+pub fn update_prim_visibility(
+    pic_index: PictureIndex,
+    parent_surface_index: Option<SurfaceIndex>,
+    world_culling_rect: &WorldRect,
+    store: &PrimitiveStore,
+    prim_instances: &mut [PrimitiveInstance],
+    surfaces: &mut [SurfaceInfo],
+    is_root_tile_cache: bool,
+    frame_context: &FrameVisibilityContext,
+    frame_state: &mut FrameVisibilityState,
+    tile_cache: &mut TileCacheInstance,
+ ) {
+    let pic = &store.pictures[pic_index.0];
+
+    let (surface_index, pop_surface) = match pic.raster_config {
+        Some(RasterConfig { surface_index, composite_mode: PictureCompositeMode::TileCache { .. }, .. }) => {
+            (surface_index, false)
+        }
+        Some(ref raster_config) => {
+            frame_state.push_surface(
+                pic_index,
+                raster_config.surface_index,
+            );
+
+            let surface_local_rect = surfaces[raster_config.surface_index.0]
+                .unclipped_local_rect
+                .cast_unit();
+
+            // Let the picture cache know that we are pushing an off-screen
+            // surface, so it can treat dependencies of surface atomically.
+            tile_cache.push_surface(
+                surface_local_rect,
+                pic.spatial_node_index,
+                frame_context.spatial_tree,
+            );
+
+            (raster_config.surface_index, true)
+        }
+        None => {
+            (parent_surface_index.expect("bug: pass-through with no parent"), false)
+        }
+    };
+
+    let surface = &surfaces[surface_index.0 as usize];
+    let device_pixel_scale = surface.device_pixel_scale;
+    let mut map_local_to_surface = surface.map_local_to_surface.clone();
+    let map_surface_to_world = SpaceMapper::new_with_target(
+        frame_context.root_spatial_node_index,
+        surface.surface_spatial_node_index,
+        frame_context.global_screen_world_rect,
+        frame_context.spatial_tree,
+    );
+
+    for cluster in &pic.prim_list.clusters {
+        profile_scope!("cluster");
+
+        // Each prim instance must have reset called each frame, to clear
+        // indices into various scratch buffers. If this doesn't occur,
+        // the primitive may incorrectly be considered visible, which can
+        // cause unexpected conditions to occur later during the frame.
+        // Primitive instances are normally reset in the main loop below,
+        // but we must also reset them in the rare case that the cluster
+        // visibility has changed (due to an invalid transform and/or
+        // backface visibility changing for this cluster).
+        // TODO(gw): This is difficult to test for in CI - as a follow up,
+        //           we should add a debug flag that validates the prim
+        //           instance is always reset every frame to catch similar
+        //           issues in future.
+        for prim_instance in &mut prim_instances[cluster.prim_range()] {
+            prim_instance.reset();
+        }
+
+        // Get the cluster and see if is visible
+        if !cluster.flags.contains(ClusterFlags::IS_VISIBLE) {
+            continue;
+        }
+
+        map_local_to_surface.set_target_spatial_node(
+            cluster.spatial_node_index,
+            frame_context.spatial_tree,
+        );
+
+        for prim_instance_index in cluster.prim_range() {
+            if let PrimitiveInstanceKind::Picture { pic_index, .. } = prim_instances[prim_instance_index].kind {
+                if !store.pictures[pic_index.0].is_visible(frame_context.spatial_tree) {
+                    continue;
+                }
+
+                let is_passthrough = match store.pictures[pic_index.0].raster_config {
+                    Some(..) => false,
+                    None => true,
+                };
+
+                if !is_passthrough {
+                    frame_state.clip_tree.push_clip_root_leaf(
+                        prim_instances[prim_instance_index].clip_leaf_id,
+                    );
+                }
+
+                update_prim_visibility(
+                    pic_index,
+                    Some(surface_index),
+                    world_culling_rect,
+                    store,
+                    prim_instances,
+                    surfaces,
+                    false,
+                    frame_context,
+                    frame_state,
+                    tile_cache,
+                );
+
+                if is_passthrough {
+                    // Pass through pictures are always considered visible in all dirty tiles.
+                    prim_instances[prim_instance_index].vis.state = VisibilityState::PassThrough;
+
+                    continue;
+                } else {
+                    frame_state.clip_tree.pop_clip_root();
+                }
+            }
+
+            let prim_instance = &mut prim_instances[prim_instance_index];
+
+            let local_coverage_rect = frame_state.data_stores.get_local_prim_coverage_rect(
+                prim_instance,
+                &store.pictures,
+                surfaces,
+            );
+
+            frame_state.clip_store.set_active_clips(
+                cluster.spatial_node_index,
+                map_local_to_surface.ref_spatial_node_index,
+                prim_instance.clip_leaf_id,
+                &frame_context.spatial_tree,
+                &frame_state.data_stores.clip,
+                frame_state.clip_tree,
+            );
+
+            let clip_chain = frame_state
+                .clip_store
+                .build_clip_chain_instance(
+                    local_coverage_rect,
+                    &map_local_to_surface,
+                    &map_surface_to_world,
+                    &frame_context.spatial_tree,
+                    frame_state.gpu_cache,
+                    frame_state.resource_cache,
+                    device_pixel_scale,
+                    &world_culling_rect,
+                    &mut frame_state.data_stores.clip,
+                    true,
+                );
+
+            prim_instance.vis.clip_chain = match clip_chain {
+                Some(clip_chain) => clip_chain,
+                None => {
+                    continue;
+                }
+            };
+
+            tile_cache.update_prim_dependencies(
+                prim_instance,
+                cluster.spatial_node_index,
+                // It's OK to pass the local_coverage_rect here as it's only used by primitives
+                // (for compositor surfaces) that don't have inflation anyway.
+                local_coverage_rect,
+                frame_context,
+                frame_state.data_stores,
+                frame_state.clip_store,
+                &store.pictures,
+                frame_state.resource_cache,
+                &store.color_bindings,
+                &frame_state.surface_stack,
+                &mut frame_state.composite_state,
+                &mut frame_state.gpu_cache,
+                &mut frame_state.scratch.primitive,
+                is_root_tile_cache,
+                surfaces,
+            );
+        }
+    }
+
+    if pop_surface {
+        frame_state.pop_surface();
+    }
+
+    if let Some(ref rc) = pic.raster_config {
+        match rc.composite_mode {
+            PictureCompositeMode::TileCache { .. } => {}
+            _ => {
+                // Pop the off-screen surface from the picture cache stack
+                tile_cache.pop_surface();
+            }
+        }
+    }
+}
+
+pub fn compute_conservative_visible_rect(
+    clip_chain: &ClipChainInstance,
+    world_culling_rect: WorldRect,
+    prim_spatial_node_index: SpatialNodeIndex,
+    spatial_tree: &SpatialTree,
+) -> LayoutRect {
+    let root_spatial_node_index = spatial_tree.root_reference_frame_index();
+
+    // Mapping from picture space -> world space
+    let map_pic_to_world: SpaceMapper<PicturePixel, WorldPixel> = SpaceMapper::new_with_target(
+        root_spatial_node_index,
+        clip_chain.pic_spatial_node_index,
+        world_culling_rect,
+        spatial_tree,
+    );
+
+    // Mapping from local space -> picture space
+    let map_local_to_pic: SpaceMapper<LayoutPixel, PicturePixel> = SpaceMapper::new_with_target(
+        clip_chain.pic_spatial_node_index,
+        prim_spatial_node_index,
+        PictureRect::max_rect(),
+        spatial_tree,
+    );
+
+    // Unmap the world culling rect from world -> picture space. If this mapping fails due
+    // to matrix weirdness, best we can do is use the clip chain's local clip rect.
+    let pic_culling_rect = match map_pic_to_world.unmap(&world_culling_rect) {
+        Some(rect) => rect,
+        None => return clip_chain.local_clip_rect,
+    };
+
+    // Intersect the unmapped world culling rect with the primitive's clip chain rect that
+    // is in picture space (the clip-chain already takes into account the bounds of the
+    // primitive local_rect and local_clip_rect). If there is no intersection here, the
+    // primitive is not visible at all.
+    let pic_culling_rect = match pic_culling_rect.intersection(&clip_chain.pic_coverage_rect) {
+        Some(rect) => rect,
+        None => return LayoutRect::zero(),
+    };
+
+    // Unmap the picture culling rect from picture -> local space. If this mapping fails due
+    // to matrix weirdness, best we can do is use the clip chain's local clip rect.
+    match map_local_to_pic.unmap(&pic_culling_rect) {
+        Some(rect) => rect,
+        None => clip_chain.local_clip_rect,
+    }
+}
diff --git a/gfx/wr/webrender/tests/angle_shader_validation.rs b/gfx/wr/webrender/tests/angle_shader_validation.rs
new file mode 100644
index 0000000000..d6fe618de0
--- /dev/null
+++ b/gfx/wr/webrender/tests/angle_shader_validation.rs
@@ -0,0 +1,75 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+extern crate mozangle;
+extern crate webrender;
+extern crate webrender_build;
+
+use mozangle::shaders::{BuiltInResources, Output, ShaderSpec, ShaderValidator};
+use webrender_build::shader::{ShaderFeatureFlags, ShaderVersion, build_shader_strings, get_shader_features};
+
+// from glslang
+const FRAGMENT_SHADER: u32 = 0x8B30;
+const VERTEX_SHADER: u32 = 0x8B31;
+
+#[test]
+fn validate_shaders() {
+    mozangle::shaders::initialize().unwrap();
+
+    let resources = BuiltInResources::default();
+    let vs_validator =
+        ShaderValidator::new(VERTEX_SHADER, ShaderSpec::Gles3, Output::Essl, &resources).unwrap();
+
+    let fs_validator =
+        ShaderValidator::new(FRAGMENT_SHADER, ShaderSpec::Gles3, Output::Essl, &resources).unwrap();
+
+    for (shader, configs) in get_shader_features(ShaderFeatureFlags::GLES) {
+        for config in configs {
+            let features = config.split(",").filter(|f| !f.is_empty()).collect::<Vec<_>>();
+
+            let (vs, fs) = build_shader_strings(
+                ShaderVersion::Gles,
+                &features,
+                shader,
+                &|f| webrender::get_unoptimized_shader_source(f, None)
+            );
+
+            let full_shader_name = format!("{} {}", shader, config);
+            validate(&vs_validator, &full_shader_name, vs);
+            validate(&fs_validator, &full_shader_name, fs);
+        }
+    }
+}
+
+fn validate(validator: &ShaderValidator, name: &str, source: String) {
+    // Check for each `switch` to have a `default`, see
+    // https://github.com/servo/webrender/wiki/Driver-issues#lack-of-default-case-in-a-switch
+    assert_eq!(source.matches("switch").count(), source.matches("default:").count(),
+        "Shader '{}' doesn't have all `switch` covered with `default` cases", name);
+    // Run Angle validator
+    match validator.compile_and_translate(&[&source]) {
+        Ok(_) => {
+            // Ensure that the shader uses at most 16 varying vectors. This counts the number of
+            // vectors assuming that the driver does not perform additional packing. The spec states
+            // that the driver should pack varyings, however, on some Adreno 3xx devices we have
+            // observed that this is not the case. See bug 1695912.
+            let varying_vectors = validator.get_num_unpacked_varying_vectors();
+            let max_varying_vectors = 16;
+            assert!(
+                varying_vectors <= max_varying_vectors,
+                "Shader {} uses {} varying vectors. Max allowed {}",
+                name, varying_vectors, max_varying_vectors
+            );
+
+            println!("Shader translated succesfully: {}", name);
+        }
+        Err(_) => {
+            panic!(
+                "Shader compilation failed: {}\n{}",
+                name,
+                validator.info_log()
+            );
+        }
+    }
+}