1 files changed, 937 insertions, 0 deletions
diff --git a/gfx/wr/webrender/src/gpu_cache.rs b/gfx/wr/webrender/src/gpu_cache.rs
new file mode 100644
index 0000000000..2f158aff2c
--- /dev/null
+++ b/gfx/wr/webrender/src/gpu_cache.rs
@@ -0,0 +1,937 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Overview of the GPU cache.
+//!
+//! The main goal of the GPU cache is to allow on-demand
+//! allocation and construction of GPU resources for the
+//! vertex shaders to consume.
+//!
+//! Every item that wants to be stored in the GPU cache
+//! should create a GpuCacheHandle that is used to refer
+//! to a cached GPU resource. Creating a handle is a
+//! cheap operation, that does *not* allocate room in the
+//! cache.
+//!
+//! On any frame when that data is required, the caller
+//! must request that handle, via ```request```. If the
+//! data is not in the cache, the user provided closure
+//! will be invoked to build the data.
+//!
+//! After ```end_frame``` has occurred, callers can
+//! use the ```get_address``` API to get the allocated
+//! address in the GPU cache of a given resource slot
+//! for this frame.
+
+use api::{DebugFlags, DocumentId, PremultipliedColorF};
+#[cfg(test)]
+use api::IdNamespace;
+use api::units::*;
+use euclid::{HomogeneousVector, Box2D};
+use crate::internal_types::{FastHashMap, FastHashSet, FrameStamp, FrameId};
+use crate::profiler::{self, TransactionProfile};
+use crate::prim_store::VECS_PER_SEGMENT;
+use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH;
+use crate::util::VecHelper;
+use std::{u16, u32};
+use std::num::NonZeroU32;
+use std::ops::Add;
+use std::time::{Duration, Instant};
+
+
+/// At the time of this writing, Firefox uses about 15 GPU cache rows on
+/// startup, and then gradually works its way up to the mid-30s with normal
+/// browsing.
+pub const GPU_CACHE_INITIAL_HEIGHT: i32 = 20;
+const NEW_ROWS_PER_RESIZE: i32 = 10;
+
+/// The number of frames an entry can go unused before being evicted.
+const FRAMES_BEFORE_EVICTION: usize = 10;
+
+/// The ratio of utilized blocks to total blocks for which we start the clock
+/// on reclaiming memory.
+const RECLAIM_THRESHOLD: f32 = 0.2;
+
+/// The amount of time utilization must be below the above threshold before we
+/// blow away the cache and rebuild it.
+const RECLAIM_DELAY_S: u64 = 5;
+
+#[derive(Debug, Copy, Clone, Eq, MallocSizeOf, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct Epoch(u32);
+
+impl Epoch {
+    fn next(&mut self) {
+        *self = Epoch(self.0.wrapping_add(1));
+    }
+}
+
+#[derive(Debug, Copy, Clone, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct CacheLocation {
+    block_index: BlockIndex,
+    epoch: Epoch,
+}
+
+/// A single texel in RGBAF32 texture - 16 bytes.
+#[derive(Copy, Clone, Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuBlockData {
+    data: [f32; 4],
+}
+
+impl GpuBlockData {
+    pub const EMPTY: Self = GpuBlockData { data: [0.0; 4] };
+}
+
+/// Conversion helpers for GpuBlockData
+impl From<PremultipliedColorF> for GpuBlockData {
+    fn from(c: PremultipliedColorF) -> Self {
+        GpuBlockData {
+            data: [c.r, c.g, c.b, c.a],
+        }
+    }
+}
+
+impl From<[f32; 4]> for GpuBlockData {
+    fn from(data: [f32; 4]) -> Self {
+        GpuBlockData { data }
+    }
+}
+
+impl<P> From<Box2D<f32, P>> for GpuBlockData {
+    fn from(r: Box2D<f32, P>) -> Self {
+        GpuBlockData {
+            data: [
+                r.min.x,
+                r.min.y,
+                r.max.x,
+                r.max.y,
+            ],
+        }
+    }
+}
+
+impl<P> From<HomogeneousVector<f32, P>> for GpuBlockData {
+    fn from(v: HomogeneousVector<f32, P>) -> Self {
+        GpuBlockData {
+            data: [
+                v.x,
+                v.y,
+                v.z,
+                v.w,
+            ],
+        }
+    }
+}
+
+impl From<TexelRect> for GpuBlockData {
+    fn from(tr: TexelRect) -> Self {
+        GpuBlockData {
+            data: [tr.uv0.x, tr.uv0.y, tr.uv1.x, tr.uv1.y],
+        }
+    }
+}
+
+
+// A handle to a GPU resource.
+#[derive(Debug, Copy, Clone, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuCacheHandle {
+    location: Option<CacheLocation>,
+}
+
+impl GpuCacheHandle {
+    pub fn new() -> Self {
+        GpuCacheHandle { location: None }
+    }
+
+    pub fn as_int(self, gpu_cache: &GpuCache) -> i32 {
+        gpu_cache.get_address(&self).as_int()
+    }
+}
+
+// A unique address in the GPU cache. These are uploaded
+// as part of the primitive instances, to allow the vertex
+// shader to fetch the specific data.
+#[derive(Copy, Debug, Clone, MallocSizeOf, Eq, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuCacheAddress {
+    pub u: u16,
+    pub v: u16,
+}
+
+impl GpuCacheAddress {
+    fn new(u: usize, v: usize) -> Self {
+        GpuCacheAddress {
+            u: u as u16,
+            v: v as u16,
+        }
+    }
+
+    pub const INVALID: GpuCacheAddress = GpuCacheAddress {
+        u: u16::MAX,
+        v: u16::MAX,
+    };
+
+    pub fn as_int(self) -> i32 {
+        // TODO(gw): Temporarily encode GPU Cache addresses as a single int.
+        //           In the future, we can change the PrimitiveInstanceData struct
+        //           to use 2x u16 for the vertex attribute instead of an i32.
+        self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32
+    }
+}
+
+impl Add<usize> for GpuCacheAddress {
+    type Output = GpuCacheAddress;
+
+    fn add(self, other: usize) -> GpuCacheAddress {
+        GpuCacheAddress {
+            u: self.u + other as u16,
+            v: self.v,
+        }
+    }
+}
+
+// An entry in a free-list of blocks in the GPU cache.
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct Block {
+    // The location in the cache of this block.
+    address: GpuCacheAddress,
+    // The current epoch (generation) of this block.
+    epoch: Epoch,
+    // Index of the next free block in the list it
+    // belongs to (either a free-list or the
+    // occupied list).
+    next: Option<BlockIndex>,
+    // The last frame this block was referenced.
+    last_access_time: FrameId,
+}
+
+impl Block {
+    fn new(
+        address: GpuCacheAddress,
+        next: Option<BlockIndex>,
+        frame_id: FrameId,
+        epoch: Epoch,
+    ) -> Self {
+        Block {
+            address,
+            next,
+            last_access_time: frame_id,
+            epoch,
+        }
+    }
+
+    fn advance_epoch(&mut self, max_epoch: &mut Epoch) {
+        self.epoch.next();
+        if max_epoch.0 < self.epoch.0 {
+            max_epoch.0 = self.epoch.0;
+        }
+    }
+
+    /// Creates an invalid dummy block ID.
+    pub const INVALID: Block = Block {
+        address: GpuCacheAddress { u: 0, v: 0 },
+        epoch: Epoch(0),
+        next: None,
+        last_access_time: FrameId::INVALID,
+    };
+}
+
+/// Represents the index of a Block in the block array. We only create such
+/// structs for blocks that represent the start of a chunk.
+///
+/// Because we use Option<BlockIndex> in a lot of places, we use a NonZeroU32
+/// here and avoid ever using the index zero.
+#[derive(Debug, Copy, Clone, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct BlockIndex(NonZeroU32);
+
+impl BlockIndex {
+    fn new(idx: usize) -> Self {
+        debug_assert!(idx <= u32::MAX as usize);
+        BlockIndex(NonZeroU32::new(idx as u32).expect("Index zero forbidden"))
+    }
+
+    fn get(&self) -> usize {
+        self.0.get() as usize
+    }
+}
+
+// A row in the cache texture.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+struct Row {
+    // The fixed size of blocks that this row supports.
+    // Each row becomes a slab allocator for a fixed block size.
+    // This means no dealing with fragmentation within a cache
+    // row as items are allocated and freed.
+    block_count_per_item: usize,
+}
+
+impl Row {
+    fn new(block_count_per_item: usize) -> Self {
+        Row {
+            block_count_per_item,
+        }
+    }
+}
+
+// A list of update operations that can be applied on the cache
+// this frame. The list of updates is created by the render backend
+// during frame construction. It's passed to the render thread
+// where GL commands can be applied.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub enum GpuCacheUpdate {
+    Copy {
+        block_index: usize,
+        block_count: usize,
+        address: GpuCacheAddress,
+    },
+}
+
+/// Command to inform the debug display in the renderer when chunks are allocated
+/// or freed.
+#[derive(MallocSizeOf)]
+pub enum GpuCacheDebugCmd {
+    /// Describes an allocated chunk.
+    Alloc(GpuCacheDebugChunk),
+    /// Describes a freed chunk.
+    Free(GpuCacheAddress),
+}
+
+#[derive(Clone, MallocSizeOf)]
+pub struct GpuCacheDebugChunk {
+    pub address: GpuCacheAddress,
+    pub size: usize,
+}
+
+#[must_use]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct GpuCacheUpdateList {
+    /// The frame current update list was generated from.
+    pub frame_id: FrameId,
+    /// Whether the texture should be cleared before updates
+    /// are applied.
+    pub clear: bool,
+    /// The current height of the texture. The render thread
+    /// should resize the texture if required.
+    pub height: i32,
+    /// List of updates to apply.
+    pub updates: Vec<GpuCacheUpdate>,
+    /// A flat list of GPU blocks that are pending upload
+    /// to GPU memory.
+    pub blocks: Vec<GpuBlockData>,
+    /// Whole state GPU block metadata for debugging.
+    #[cfg_attr(feature = "serde", serde(skip))]
+    pub debug_commands: Vec<GpuCacheDebugCmd>,
+}
+
+// Holds the free lists of fixed size blocks. Mostly
+// just serves to work around the borrow checker.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+struct FreeBlockLists {
+    free_list_1: Option<BlockIndex>,
+    free_list_2: Option<BlockIndex>,
+    free_list_4: Option<BlockIndex>,
+    free_list_8: Option<BlockIndex>,
+    free_list_16: Option<BlockIndex>,
+    free_list_32: Option<BlockIndex>,
+    free_list_64: Option<BlockIndex>,
+    free_list_128: Option<BlockIndex>,
+    free_list_256: Option<BlockIndex>,
+    free_list_341: Option<BlockIndex>,
+    free_list_512: Option<BlockIndex>,
+    free_list_1024: Option<BlockIndex>,
+}
+
+impl FreeBlockLists {
+    fn new() -> Self {
+        FreeBlockLists {
+            free_list_1: None,
+            free_list_2: None,
+            free_list_4: None,
+            free_list_8: None,
+            free_list_16: None,
+            free_list_32: None,
+            free_list_64: None,
+            free_list_128: None,
+            free_list_256: None,
+            free_list_341: None,
+            free_list_512: None,
+            free_list_1024: None,
+        }
+    }
+
+    fn get_actual_block_count_and_free_list(
+        &mut self,
+        block_count: usize,
+    ) -> (usize, &mut Option<BlockIndex>) {
+        // Find the appropriate free list to use based on the block size.
+        //
+        // Note that we cheat a bit with the 341 bucket, since it's not quite
+        // a divisor of 1024, because purecss-francine allocates many 260-block
+        // chunks, and there's no reason we shouldn't pack these three to a row.
+        // This means the allocation statistics will under-report by one block
+        // for each row using 341-block buckets, which is fine.
+        debug_assert_eq!(MAX_VERTEX_TEXTURE_WIDTH, 1024, "Need to update bucketing");
+        match block_count {
+            0 => panic!("Can't allocate zero sized blocks!"),
+            1 => (1, &mut self.free_list_1),
+            2 => (2, &mut self.free_list_2),
+            3..=4 => (4, &mut self.free_list_4),
+            5..=8 => (8, &mut self.free_list_8),
+            9..=16 => (16, &mut self.free_list_16),
+            17..=32 => (32, &mut self.free_list_32),
+            33..=64 => (64, &mut self.free_list_64),
+            65..=128 => (128, &mut self.free_list_128),
+            129..=256 => (256, &mut self.free_list_256),
+            257..=341 => (341, &mut self.free_list_341),
+            342..=512 => (512, &mut self.free_list_512),
+            513..=1024 => (1024, &mut self.free_list_1024),
+            _ => panic!("Can't allocate > MAX_VERTEX_TEXTURE_WIDTH per resource!"),
+        }
+    }
+}
+
+// CPU-side representation of the GPU resource cache texture.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+struct Texture {
+    // Current texture height
+    height: i32,
+    // All blocks that have been created for this texture
+    blocks: Vec<Block>,
+    // Metadata about each allocated row.
+    rows: Vec<Row>,
+    // The base Epoch for this texture.
+    base_epoch: Epoch,
+    // The maximum epoch reached. We track this along with the above so
+    // that we can rebuild the Texture and avoid collisions with handles
+    // allocated for the old texture.
+    max_epoch: Epoch,
+    // Free lists of available blocks for each supported
+    // block size in the texture. These are intrusive
+    // linked lists.
+    free_lists: FreeBlockLists,
+    // Linked list of currently occupied blocks. This
+    // makes it faster to iterate blocks looking for
+    // candidates to be evicted from the cache.
+    occupied_list_heads: FastHashMap<DocumentId, BlockIndex>,
+    // Pending blocks that have been written this frame
+    // and will need to be sent to the GPU.
+    pending_blocks: Vec<GpuBlockData>,
+    // Pending update commands.
+    updates: Vec<GpuCacheUpdate>,
+    // Profile stats
+    allocated_block_count: usize,
+    // The stamp at which we first reached our threshold for reclaiming `GpuCache`
+    // memory, or `None` if the threshold hasn't been reached.
+    #[cfg_attr(feature = "serde", serde(skip))]
+    reached_reclaim_threshold: Option<Instant>,
+    // List of debug commands to be sent to the renderer when the GPU cache
+    // debug display is enabled.
+    #[cfg_attr(feature = "serde", serde(skip))]
+    debug_commands: Vec<GpuCacheDebugCmd>,
+    // The current debug flags for the system.
+    debug_flags: DebugFlags,
+}
+
+impl Texture {
+    fn new(base_epoch: Epoch, debug_flags: DebugFlags) -> Self {
+        // Pre-fill the block array with one invalid block so that we never use
+        // 0 for a BlockIndex. This lets us use NonZeroU32 for BlockIndex, which
+        // saves memory.
+        let blocks = vec![Block::INVALID];
+
+        Texture {
+            height: GPU_CACHE_INITIAL_HEIGHT,
+            blocks,
+            rows: Vec::new(),
+            base_epoch,
+            max_epoch: base_epoch,
+            free_lists: FreeBlockLists::new(),
+            pending_blocks: Vec::new(),
+            updates: Vec::new(),
+            occupied_list_heads: FastHashMap::default(),
+            allocated_block_count: 0,
+            reached_reclaim_threshold: None,
+            debug_commands: Vec::new(),
+            debug_flags,
+        }
+    }
+
+    // Push new data into the cache. The ```pending_block_index``` field represents
+    // where the data was pushed into the texture ```pending_blocks``` array.
+    // Return the allocated address for this data.
+    fn push_data(
+        &mut self,
+        pending_block_index: Option<usize>,
+        block_count: usize,
+        frame_stamp: FrameStamp
+    ) -> CacheLocation {
+        debug_assert!(frame_stamp.is_valid());
+        // Find the appropriate free list to use based on the block size.
+        let (alloc_size, free_list) = self.free_lists
+            .get_actual_block_count_and_free_list(block_count);
+
+        // See if we need a new row (if free-list has nothing available)
+        if free_list.is_none() {
+            if self.rows.len() as i32 == self.height {
+                self.height += NEW_ROWS_PER_RESIZE;
+            }
+
+            // Create a new row.
+            let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / alloc_size;
+            let row_index = self.rows.len();
+            self.rows.push(Row::new(alloc_size));
+
+            // Create a ```Block``` for each possible allocation address
+            // in this row, and link it in to the free-list for this
+            // block size.
+            let mut prev_block_index = None;
+            for i in 0 .. items_per_row {
+                let address = GpuCacheAddress::new(i * alloc_size, row_index);
+                let block_index = BlockIndex::new(self.blocks.len());
+                let block = Block::new(address, prev_block_index, frame_stamp.frame_id(), self.base_epoch);
+                self.blocks.push(block);
+                prev_block_index = Some(block_index);
+            }
+
+            *free_list = prev_block_index;
+        }
+
+        // Given the code above, it's now guaranteed that there is a block
+        // available in the appropriate free-list. Pull a block from the
+        // head of the list.
+        let free_block_index = free_list.take().unwrap();
+        let block = &mut self.blocks[free_block_index.get()];
+        *free_list = block.next;
+
+        // Add the block to the occupied linked list.
+        block.next = self.occupied_list_heads.get(&frame_stamp.document_id()).cloned();
+        block.last_access_time = frame_stamp.frame_id();
+        self.occupied_list_heads.insert(frame_stamp.document_id(), free_block_index);
+        self.allocated_block_count += alloc_size;
+
+        if let Some(pending_block_index) = pending_block_index {
+            // Add this update to the pending list of blocks that need
+            // to be updated on the GPU.
+            self.updates.push(GpuCacheUpdate::Copy {
+                block_index: pending_block_index,
+                block_count,
+                address: block.address,
+            });
+        }
+
+        // If we're using the debug display, communicate the allocation to the
+        // renderer thread. Note that we do this regardless of whether or not
+        // pending_block_index is None (if it is, the renderer thread will fill
+        // in the data via a deferred resolve, but the block is still considered
+        // allocated).
+        if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
+            self.debug_commands.push(GpuCacheDebugCmd::Alloc(GpuCacheDebugChunk {
+                address: block.address,
+                size: block_count,
+            }));
+        }
+
+        CacheLocation {
+            block_index: free_block_index,
+            epoch: block.epoch,
+        }
+    }
+
+    // Run through the list of occupied cache blocks and evict
+    // any old blocks that haven't been referenced for a while.
+    fn evict_old_blocks(&mut self, frame_stamp: FrameStamp) {
+        debug_assert!(frame_stamp.is_valid());
+        // Prune any old items from the list to make room.
+        // Traverse the occupied linked list and see
+        // which items have not been used for a long time.
+        let mut current_block = self.occupied_list_heads.get(&frame_stamp.document_id()).map(|x| *x);
+        let mut prev_block: Option<BlockIndex> = None;
+
+        while let Some(index) = current_block {
+            let (next_block, should_unlink) = {
+                let block = &mut self.blocks[index.get()];
+
+                let next_block = block.next;
+                let mut should_unlink = false;
+
+                // If this resource has not been used in the last
+                // few frames, free it from the texture and mark
+                // as empty.
+                if block.last_access_time + FRAMES_BEFORE_EVICTION < frame_stamp.frame_id() {
+                    should_unlink = true;
+
+                    // Get the row metadata from the address.
+                    let row = &mut self.rows[block.address.v as usize];
+
+                    // Use the row metadata to determine which free-list
+                    // this block belongs to.
+                    let (_, free_list) = self.free_lists
+                        .get_actual_block_count_and_free_list(row.block_count_per_item);
+
+                    block.advance_epoch(&mut self.max_epoch);
+                    block.next = *free_list;
+                    *free_list = Some(index);
+
+                    self.allocated_block_count -= row.block_count_per_item;
+
+                    if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
+                        let cmd = GpuCacheDebugCmd::Free(block.address);
+                        self.debug_commands.push(cmd);
+                    }
+                };
+
+                (next_block, should_unlink)
+            };
+
+            // If the block was released, we will need to remove it
+            // from the occupied linked list.
+            if should_unlink {
+                match prev_block {
+                    Some(prev_block) => {
+                        self.blocks[prev_block.get()].next = next_block;
+                    }
+                    None => {
+                        match next_block {
+                            Some(next_block) => {
+                                self.occupied_list_heads.insert(frame_stamp.document_id(), next_block);
+                            }
+                            None => {
+                                self.occupied_list_heads.remove(&frame_stamp.document_id());
+                            }
+                        }
+                    }
+                }
+            } else {
+                prev_block = current_block;
+            }
+
+            current_block = next_block;
+        }
+    }
+
+    /// Returns the ratio of utilized blocks.
+    fn utilization(&self) -> f32 {
+        let total_blocks = self.rows.len() * MAX_VERTEX_TEXTURE_WIDTH;
+        debug_assert!(total_blocks > 0);
+        let ratio = self.allocated_block_count as f32 / total_blocks as f32;
+        debug_assert!(0.0 <= ratio && ratio <= 1.0, "Bad ratio: {}", ratio);
+        ratio
+    }
+}
+
+
+/// A wrapper object for GPU data requests,
+/// works as a container that can only grow.
+#[must_use]
+pub struct GpuDataRequest<'a> {
+    //TODO: remove this, see
+    // https://bugzilla.mozilla.org/show_bug.cgi?id=1690546
+    #[allow(dead_code)]
+    handle: &'a mut GpuCacheHandle,
+    frame_stamp: FrameStamp,
+    start_index: usize,
+    max_block_count: usize,
+    texture: &'a mut Texture,
+}
+
+impl<'a> GpuDataRequest<'a> {
+    pub fn push<B>(&mut self, block: B)
+    where
+        B: Into<GpuBlockData>,
+    {
+        self.texture.pending_blocks.push(block.into());
+    }
+
+    // Write the GPU cache data for an individual segment.
+    pub fn write_segment(
+        &mut self,
+        local_rect: LayoutRect,
+        extra_data: [f32; 4],
+    ) {
+        let _ = VECS_PER_SEGMENT;
+        self.push(local_rect);
+        self.push(extra_data);
+    }
+
+    pub fn current_used_block_num(&self) -> usize {
+        self.texture.pending_blocks.len() - self.start_index
+    }
+}
+
+impl<'a> Drop for GpuDataRequest<'a> {
+    fn drop(&mut self) {
+        // Push the data to the texture pending updates list.
+        let block_count = self.current_used_block_num();
+        debug_assert!(block_count <= self.max_block_count);
+
+        let location = self.texture
+            .push_data(Some(self.start_index), block_count, self.frame_stamp);
+        self.handle.location = Some(location);
+    }
+}
+
+
+/// The main LRU cache interface.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct GpuCache {
+    /// Current FrameId.
+    now: FrameStamp,
+    /// CPU-side texture allocator.
+    texture: Texture,
+    /// Number of blocks requested this frame that don't
+    /// need to be re-uploaded.
+    saved_block_count: usize,
+    /// The current debug flags for the system.
+    debug_flags: DebugFlags,
+    /// Whether there is a pending clear to send with the
+    /// next update.
+    pending_clear: bool,
+    /// Indicates that prepare_for_frames has been called for this group of frames.
+    /// Used for sanity checks.
+    prepared_for_frames: bool,
+    /// This indicates that we performed a cleanup operation which requires all
+    /// documents to build a frame.
+    requires_frame_build: bool,
+    /// The set of documents which have had frames built in this update. Used for
+    /// sanity checks.
+    document_frames_to_build: FastHashSet<DocumentId>,
+}
+
+impl GpuCache {
+    pub fn new() -> Self {
+        let debug_flags = DebugFlags::empty();
+        GpuCache {
+            now: FrameStamp::INVALID,
+            texture: Texture::new(Epoch(0), debug_flags),
+            saved_block_count: 0,
+            debug_flags,
+            pending_clear: false,
+            prepared_for_frames: false,
+            requires_frame_build: false,
+            document_frames_to_build: FastHashSet::default(),
+        }
+    }
+
+    /// Creates a GpuCache and sets it up with a valid `FrameStamp`, which
+    /// is useful for avoiding panics when instantiating the `GpuCache`
+    /// directly from unit test code.
+    #[cfg(test)]
+    pub fn new_for_testing() -> Self {
+        let mut cache = Self::new();
+        let mut now = FrameStamp::first(DocumentId::new(IdNamespace(1), 1));
+        now.advance();
+        cache.prepared_for_frames = true;
+        cache.begin_frame(now);
+        cache
+    }
+
+    /// Drops everything in the GPU cache. Must not be called once gpu cache entries
+    /// for the next frame have already been requested.
+    pub fn clear(&mut self) {
+        assert!(self.texture.updates.is_empty(), "Clearing with pending updates");
+        let mut next_base_epoch = self.texture.max_epoch;
+        next_base_epoch.next();
+        self.texture = Texture::new(next_base_epoch, self.debug_flags);
+        self.saved_block_count = 0;
+        self.pending_clear = true;
+        self.requires_frame_build = true;
+    }
+
+    pub fn requires_frame_build(&self) -> bool {
+        self.requires_frame_build
+    }
+
+    pub fn prepare_for_frames(&mut self) {
+        self.prepared_for_frames = true;
+        if self.should_reclaim_memory() {
+            self.clear();
+            debug_assert!(self.document_frames_to_build.is_empty());
+            for &document_id in self.texture.occupied_list_heads.keys() {
+                self.document_frames_to_build.insert(document_id);
+            }
+        }
+    }
+
+    pub fn bookkeep_after_frames(&mut self) {
+        assert!(self.document_frames_to_build.is_empty());
+        assert!(self.prepared_for_frames);
+        self.requires_frame_build = false;
+        self.prepared_for_frames = false;
+    }
+
+    /// Begin a new frame.
+    pub fn begin_frame(&mut self, stamp: FrameStamp) {
+        debug_assert!(self.texture.pending_blocks.is_empty());
+        assert!(self.prepared_for_frames);
+        profile_scope!("begin_frame");
+        self.now = stamp;
+        self.texture.evict_old_blocks(self.now);
+        self.saved_block_count = 0;
+    }
+
+    // Invalidate a (possibly) existing block in the cache.
+    // This means the next call to request() for this location
+    // will rebuild the data and upload it to the GPU.
+    pub fn invalidate(&mut self, handle: &GpuCacheHandle) {
+        if let Some(ref location) = handle.location {
+            // don't invalidate blocks that are already re-assigned
+            if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
+                if block.epoch == location.epoch {
+                    block.advance_epoch(&mut self.texture.max_epoch);
+                }
+            }
+        }
+    }
+
+    /// Request a resource be added to the cache. If the resource
+    /// is already in the cache, `None` will be returned.
+    pub fn request<'a>(&'a mut self, handle: &'a mut GpuCacheHandle) -> Option<GpuDataRequest<'a>> {
+        let mut max_block_count = MAX_VERTEX_TEXTURE_WIDTH;
+        // Check if the allocation for this handle is still valid.
+        if let Some(ref location) = handle.location {
+            if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
+                if block.epoch == location.epoch {
+                    max_block_count = self.texture.rows[block.address.v as usize].block_count_per_item;
+                    if block.last_access_time != self.now.frame_id() {
+                        // Mark last access time to avoid evicting this block.
+                        block.last_access_time = self.now.frame_id();
+                        self.saved_block_count += max_block_count;
+                    }
+                    return None;
+                }
+            }
+        }
+
+        debug_assert!(self.now.is_valid());
+        Some(GpuDataRequest {
+            handle,
+            frame_stamp: self.now,
+            start_index: self.texture.pending_blocks.len(),
+            texture: &mut self.texture,
+            max_block_count,
+        })
+    }
+
+    // Push an array of data blocks to be uploaded to the GPU
+    // unconditionally for this frame. The cache handle will
+    // assert if the caller tries to retrieve the address
+    // of this handle on a subsequent frame. This is typically
+    // used for uploading data that changes every frame, and
+    // therefore makes no sense to try and cache.
+    pub fn push_per_frame_blocks(&mut self, blocks: &[GpuBlockData]) -> GpuCacheHandle {
+        let start_index = self.texture.pending_blocks.len();
+        self.texture.pending_blocks.extend_from_slice(blocks);
+        let location = self.texture
+            .push_data(Some(start_index), blocks.len(), self.now);
+        GpuCacheHandle {
+            location: Some(location),
+        }
+    }
+
+    // Reserve space in the cache for per-frame blocks that
+    // will be resolved by the render thread via the
+    // external image callback.
+    pub fn push_deferred_per_frame_blocks(&mut self, block_count: usize) -> GpuCacheHandle {
+        let location = self.texture.push_data(None, block_count, self.now);
+        GpuCacheHandle {
+            location: Some(location),
+        }
+    }
+
+    /// End the frame. Return the list of updates to apply to the
+    /// device specific cache texture.
+    pub fn end_frame(
+        &mut self,
+        profile: &mut TransactionProfile,
+    ) -> FrameStamp {
+        profile_scope!("end_frame");
+        profile.set(profiler::GPU_CACHE_ROWS_TOTAL, self.texture.rows.len());
+        profile.set(profiler::GPU_CACHE_BLOCKS_TOTAL, self.texture.allocated_block_count);
+        profile.set(profiler::GPU_CACHE_BLOCKS_SAVED, self.saved_block_count);
+
+        let reached_threshold =
+            self.texture.rows.len() > (GPU_CACHE_INITIAL_HEIGHT as usize) &&
+            self.texture.utilization() < RECLAIM_THRESHOLD;
+        if reached_threshold {
+            self.texture.reached_reclaim_threshold.get_or_insert_with(Instant::now);
+        } else {
+            self.texture.reached_reclaim_threshold = None;
+        }
+
+        self.document_frames_to_build.remove(&self.now.document_id());
+        self.now
+    }
+
+    /// Returns true if utilization has been low enough for long enough that we
+    /// should blow the cache away and rebuild it.
+    pub fn should_reclaim_memory(&self) -> bool {
+        self.texture.reached_reclaim_threshold
+            .map_or(false, |t| t.elapsed() > Duration::from_secs(RECLAIM_DELAY_S))
+    }
+
+    /// Extract the pending updates from the cache.
+    pub fn extract_updates(&mut self) -> GpuCacheUpdateList {
+        let clear = self.pending_clear;
+        self.pending_clear = false;
+        GpuCacheUpdateList {
+            frame_id: self.now.frame_id(),
+            clear,
+            height: self.texture.height,
+            debug_commands: self.texture.debug_commands.take_and_preallocate(),
+            updates: self.texture.updates.take_and_preallocate(),
+            blocks: self.texture.pending_blocks.take_and_preallocate(),
+        }
+    }
+
+    /// Sets the current debug flags for the system.
+    pub fn set_debug_flags(&mut self, flags: DebugFlags) {
+        self.debug_flags = flags;
+        self.texture.debug_flags = flags;
+    }
+
+    /// Get the actual GPU address in the texture for a given slot ID.
+    /// It's assumed at this point that the given slot has been requested
+    /// and built for this frame. Attempting to get the address for a
+    /// freed or pending slot will panic!
+    pub fn get_address(&self, id: &GpuCacheHandle) -> GpuCacheAddress {
+        let location = id.location.expect("handle not requested or allocated!");
+        let block = &self.texture.blocks[location.block_index.get()];
+        debug_assert_eq!(block.epoch, location.epoch);
+        debug_assert_eq!(block.last_access_time, self.now.frame_id());
+        block.address
+    }
+}
+
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_struct_sizes() {
+    use std::mem;
+    // We can end up with a lot of blocks stored in the global vec, and keeping
+    // them small helps reduce memory overhead.
+    assert_eq!(mem::size_of::<Block>(), 24, "Block size changed");
+}