938 lines
32 KiB
Rust
938 lines
32 KiB
Rust
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
//! Overview of the GPU cache.
|
|
//!
|
|
//! The main goal of the GPU cache is to allow on-demand
|
|
//! allocation and construction of GPU resources for the
|
|
//! vertex shaders to consume.
|
|
//!
|
|
//! Every item that wants to be stored in the GPU cache
|
|
//! should create a GpuCacheHandle that is used to refer
|
|
//! to a cached GPU resource. Creating a handle is a
|
|
//! cheap operation, that does *not* allocate room in the
|
|
//! cache.
|
|
//!
|
|
//! On any frame when that data is required, the caller
|
|
//! must request that handle, via ```request```. If the
|
|
//! data is not in the cache, the user provided closure
|
|
//! will be invoked to build the data.
|
|
//!
|
|
//! After ```end_frame``` has occurred, callers can
|
|
//! use the ```get_address``` API to get the allocated
|
|
//! address in the GPU cache of a given resource slot
|
|
//! for this frame.
|
|
|
|
use api::{DebugFlags, DocumentId, PremultipliedColorF};
|
|
#[cfg(test)]
|
|
use api::IdNamespace;
|
|
use api::units::*;
|
|
use euclid::{HomogeneousVector, Box2D};
|
|
use crate::internal_types::{FastHashMap, FastHashSet, FrameStamp, FrameId};
|
|
use crate::profiler::{self, TransactionProfile};
|
|
use crate::prim_store::VECS_PER_SEGMENT;
|
|
use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH;
|
|
use crate::util::VecHelper;
|
|
use std::{u16, u32};
|
|
use std::num::NonZeroU32;
|
|
use std::ops::Add;
|
|
use std::time::{Duration, Instant};
|
|
|
|
|
|
/// At the time of this writing, Firefox uses about 15 GPU cache rows on
|
|
/// startup, and then gradually works its way up to the mid-30s with normal
|
|
/// browsing.
|
|
pub const GPU_CACHE_INITIAL_HEIGHT: i32 = 20;
|
|
const NEW_ROWS_PER_RESIZE: i32 = 10;
|
|
|
|
/// The number of frames an entry can go unused before being evicted.
|
|
const FRAMES_BEFORE_EVICTION: u64 = 10;
|
|
|
|
/// The ratio of utilized blocks to total blocks for which we start the clock
|
|
/// on reclaiming memory.
|
|
const RECLAIM_THRESHOLD: f32 = 0.2;
|
|
|
|
/// The amount of time utilization must be below the above threshold before we
|
|
/// blow away the cache and rebuild it.
|
|
const RECLAIM_DELAY_S: u64 = 5;
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, MallocSizeOf, PartialEq)]
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
struct Epoch(u32);
|
|
|
|
impl Epoch {
|
|
fn next(&mut self) {
|
|
*self = Epoch(self.0.wrapping_add(1));
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Copy, Clone, MallocSizeOf)]
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
struct CacheLocation {
|
|
block_index: BlockIndex,
|
|
epoch: Epoch,
|
|
}
|
|
|
|
/// A single texel in RGBAF32 texture - 16 bytes.
|
|
#[derive(Copy, Clone, Debug, MallocSizeOf)]
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
pub struct GpuBlockData {
|
|
data: [f32; 4],
|
|
}
|
|
|
|
impl GpuBlockData {
|
|
pub const EMPTY: Self = GpuBlockData { data: [0.0; 4] };
|
|
}
|
|
|
|
/// Conversion helpers for GpuBlockData
|
|
impl From<PremultipliedColorF> for GpuBlockData {
|
|
fn from(c: PremultipliedColorF) -> Self {
|
|
GpuBlockData {
|
|
data: [c.r, c.g, c.b, c.a],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<[f32; 4]> for GpuBlockData {
|
|
fn from(data: [f32; 4]) -> Self {
|
|
GpuBlockData { data }
|
|
}
|
|
}
|
|
|
|
impl<P> From<Box2D<f32, P>> for GpuBlockData {
|
|
fn from(r: Box2D<f32, P>) -> Self {
|
|
GpuBlockData {
|
|
data: [
|
|
r.min.x,
|
|
r.min.y,
|
|
r.max.x,
|
|
r.max.y,
|
|
],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<P> From<HomogeneousVector<f32, P>> for GpuBlockData {
|
|
fn from(v: HomogeneousVector<f32, P>) -> Self {
|
|
GpuBlockData {
|
|
data: [
|
|
v.x,
|
|
v.y,
|
|
v.z,
|
|
v.w,
|
|
],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<TexelRect> for GpuBlockData {
|
|
fn from(tr: TexelRect) -> Self {
|
|
GpuBlockData {
|
|
data: [tr.uv0.x, tr.uv0.y, tr.uv1.x, tr.uv1.y],
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// A handle to a GPU resource.
|
|
#[derive(Debug, Copy, Clone, MallocSizeOf)]
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
pub struct GpuCacheHandle {
|
|
location: Option<CacheLocation>,
|
|
}
|
|
|
|
impl GpuCacheHandle {
|
|
pub fn new() -> Self {
|
|
GpuCacheHandle { location: None }
|
|
}
|
|
|
|
pub fn as_int(self, gpu_cache: &GpuCache) -> i32 {
|
|
gpu_cache.get_address(&self).as_int()
|
|
}
|
|
}
|
|
|
|
// A unique address in the GPU cache. These are uploaded
|
|
// as part of the primitive instances, to allow the vertex
|
|
// shader to fetch the specific data.
|
|
#[repr(C)]
|
|
#[derive(Copy, Debug, Clone, MallocSizeOf, Eq, PartialEq)]
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
pub struct GpuCacheAddress {
|
|
pub u: u16,
|
|
pub v: u16,
|
|
}
|
|
|
|
impl GpuCacheAddress {
|
|
fn new(u: usize, v: usize) -> Self {
|
|
GpuCacheAddress {
|
|
u: u as u16,
|
|
v: v as u16,
|
|
}
|
|
}
|
|
|
|
pub const INVALID: GpuCacheAddress = GpuCacheAddress {
|
|
u: u16::MAX,
|
|
v: u16::MAX,
|
|
};
|
|
|
|
pub fn as_int(self) -> i32 {
|
|
// TODO(gw): Temporarily encode GPU Cache addresses as a single int.
|
|
// In the future, we can change the PrimitiveInstanceData struct
|
|
// to use 2x u16 for the vertex attribute instead of an i32.
|
|
self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32
|
|
}
|
|
}
|
|
|
|
impl Add<usize> for GpuCacheAddress {
|
|
type Output = GpuCacheAddress;
|
|
|
|
fn add(self, other: usize) -> GpuCacheAddress {
|
|
GpuCacheAddress {
|
|
u: self.u + other as u16,
|
|
v: self.v,
|
|
}
|
|
}
|
|
}
|
|
|
|
// An entry in a free-list of blocks in the GPU cache.
|
|
#[derive(Debug, MallocSizeOf)]
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
struct Block {
|
|
// The location in the cache of this block.
|
|
address: GpuCacheAddress,
|
|
// The current epoch (generation) of this block.
|
|
epoch: Epoch,
|
|
// Index of the next free block in the list it
|
|
// belongs to (either a free-list or the
|
|
// occupied list).
|
|
next: Option<BlockIndex>,
|
|
// The last frame this block was referenced.
|
|
last_access_time: FrameId,
|
|
}
|
|
|
|
impl Block {
|
|
fn new(
|
|
address: GpuCacheAddress,
|
|
next: Option<BlockIndex>,
|
|
frame_id: FrameId,
|
|
epoch: Epoch,
|
|
) -> Self {
|
|
Block {
|
|
address,
|
|
next,
|
|
last_access_time: frame_id,
|
|
epoch,
|
|
}
|
|
}
|
|
|
|
fn advance_epoch(&mut self, max_epoch: &mut Epoch) {
|
|
self.epoch.next();
|
|
if max_epoch.0 < self.epoch.0 {
|
|
max_epoch.0 = self.epoch.0;
|
|
}
|
|
}
|
|
|
|
/// Creates an invalid dummy block ID.
|
|
pub const INVALID: Block = Block {
|
|
address: GpuCacheAddress { u: 0, v: 0 },
|
|
epoch: Epoch(0),
|
|
next: None,
|
|
last_access_time: FrameId::INVALID,
|
|
};
|
|
}
|
|
|
|
/// Represents the index of a Block in the block array. We only create such
|
|
/// structs for blocks that represent the start of a chunk.
|
|
///
|
|
/// Because we use Option<BlockIndex> in a lot of places, we use a NonZeroU32
|
|
/// here and avoid ever using the index zero.
|
|
#[derive(Debug, Copy, Clone, MallocSizeOf)]
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
struct BlockIndex(NonZeroU32);
|
|
|
|
impl BlockIndex {
|
|
fn new(idx: usize) -> Self {
|
|
debug_assert!(idx <= u32::MAX as usize);
|
|
BlockIndex(NonZeroU32::new(idx as u32).expect("Index zero forbidden"))
|
|
}
|
|
|
|
fn get(&self) -> usize {
|
|
self.0.get() as usize
|
|
}
|
|
}
|
|
|
|
// A row in the cache texture.
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
#[derive(MallocSizeOf)]
|
|
struct Row {
|
|
// The fixed size of blocks that this row supports.
|
|
// Each row becomes a slab allocator for a fixed block size.
|
|
// This means no dealing with fragmentation within a cache
|
|
// row as items are allocated and freed.
|
|
block_count_per_item: usize,
|
|
}
|
|
|
|
impl Row {
|
|
fn new(block_count_per_item: usize) -> Self {
|
|
Row {
|
|
block_count_per_item,
|
|
}
|
|
}
|
|
}
|
|
|
|
// A list of update operations that can be applied on the cache
|
|
// this frame. The list of updates is created by the render backend
|
|
// during frame construction. It's passed to the render thread
|
|
// where GL commands can be applied.
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
#[derive(MallocSizeOf)]
|
|
pub enum GpuCacheUpdate {
|
|
Copy {
|
|
block_index: usize,
|
|
block_count: usize,
|
|
address: GpuCacheAddress,
|
|
},
|
|
}
|
|
|
|
/// Command to inform the debug display in the renderer when chunks are allocated
|
|
/// or freed.
|
|
#[derive(MallocSizeOf)]
|
|
pub enum GpuCacheDebugCmd {
|
|
/// Describes an allocated chunk.
|
|
Alloc(GpuCacheDebugChunk),
|
|
/// Describes a freed chunk.
|
|
Free(GpuCacheAddress),
|
|
}
|
|
|
|
#[derive(Clone, MallocSizeOf)]
|
|
pub struct GpuCacheDebugChunk {
|
|
pub address: GpuCacheAddress,
|
|
pub size: usize,
|
|
}
|
|
|
|
#[must_use]
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
#[derive(MallocSizeOf)]
|
|
pub struct GpuCacheUpdateList {
|
|
/// The frame current update list was generated from.
|
|
pub frame_id: FrameId,
|
|
/// Whether the texture should be cleared before updates
|
|
/// are applied.
|
|
pub clear: bool,
|
|
/// The current height of the texture. The render thread
|
|
/// should resize the texture if required.
|
|
pub height: i32,
|
|
/// List of updates to apply.
|
|
pub updates: Vec<GpuCacheUpdate>,
|
|
/// A flat list of GPU blocks that are pending upload
|
|
/// to GPU memory.
|
|
pub blocks: Vec<GpuBlockData>,
|
|
/// Whole state GPU block metadata for debugging.
|
|
#[cfg_attr(feature = "serde", serde(skip))]
|
|
pub debug_commands: Vec<GpuCacheDebugCmd>,
|
|
}
|
|
|
|
// Holds the free lists of fixed size blocks. Mostly
|
|
// just serves to work around the borrow checker.
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
#[derive(MallocSizeOf)]
|
|
struct FreeBlockLists {
|
|
free_list_1: Option<BlockIndex>,
|
|
free_list_2: Option<BlockIndex>,
|
|
free_list_4: Option<BlockIndex>,
|
|
free_list_8: Option<BlockIndex>,
|
|
free_list_16: Option<BlockIndex>,
|
|
free_list_32: Option<BlockIndex>,
|
|
free_list_64: Option<BlockIndex>,
|
|
free_list_128: Option<BlockIndex>,
|
|
free_list_256: Option<BlockIndex>,
|
|
free_list_341: Option<BlockIndex>,
|
|
free_list_512: Option<BlockIndex>,
|
|
free_list_1024: Option<BlockIndex>,
|
|
}
|
|
|
|
impl FreeBlockLists {
|
|
fn new() -> Self {
|
|
FreeBlockLists {
|
|
free_list_1: None,
|
|
free_list_2: None,
|
|
free_list_4: None,
|
|
free_list_8: None,
|
|
free_list_16: None,
|
|
free_list_32: None,
|
|
free_list_64: None,
|
|
free_list_128: None,
|
|
free_list_256: None,
|
|
free_list_341: None,
|
|
free_list_512: None,
|
|
free_list_1024: None,
|
|
}
|
|
}
|
|
|
|
fn get_actual_block_count_and_free_list(
|
|
&mut self,
|
|
block_count: usize,
|
|
) -> (usize, &mut Option<BlockIndex>) {
|
|
// Find the appropriate free list to use based on the block size.
|
|
//
|
|
// Note that we cheat a bit with the 341 bucket, since it's not quite
|
|
// a divisor of 1024, because purecss-francine allocates many 260-block
|
|
// chunks, and there's no reason we shouldn't pack these three to a row.
|
|
// This means the allocation statistics will under-report by one block
|
|
// for each row using 341-block buckets, which is fine.
|
|
debug_assert_eq!(MAX_VERTEX_TEXTURE_WIDTH, 1024, "Need to update bucketing");
|
|
match block_count {
|
|
0 => panic!("Can't allocate zero sized blocks!"),
|
|
1 => (1, &mut self.free_list_1),
|
|
2 => (2, &mut self.free_list_2),
|
|
3..=4 => (4, &mut self.free_list_4),
|
|
5..=8 => (8, &mut self.free_list_8),
|
|
9..=16 => (16, &mut self.free_list_16),
|
|
17..=32 => (32, &mut self.free_list_32),
|
|
33..=64 => (64, &mut self.free_list_64),
|
|
65..=128 => (128, &mut self.free_list_128),
|
|
129..=256 => (256, &mut self.free_list_256),
|
|
257..=341 => (341, &mut self.free_list_341),
|
|
342..=512 => (512, &mut self.free_list_512),
|
|
513..=1024 => (1024, &mut self.free_list_1024),
|
|
_ => panic!("Can't allocate > MAX_VERTEX_TEXTURE_WIDTH per resource!"),
|
|
}
|
|
}
|
|
}
|
|
|
|
// CPU-side representation of the GPU resource cache texture.
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
#[derive(MallocSizeOf)]
|
|
struct Texture {
|
|
// Current texture height
|
|
height: i32,
|
|
// All blocks that have been created for this texture
|
|
blocks: Vec<Block>,
|
|
// Metadata about each allocated row.
|
|
rows: Vec<Row>,
|
|
// The base Epoch for this texture.
|
|
base_epoch: Epoch,
|
|
// The maximum epoch reached. We track this along with the above so
|
|
// that we can rebuild the Texture and avoid collisions with handles
|
|
// allocated for the old texture.
|
|
max_epoch: Epoch,
|
|
// Free lists of available blocks for each supported
|
|
// block size in the texture. These are intrusive
|
|
// linked lists.
|
|
free_lists: FreeBlockLists,
|
|
// Linked list of currently occupied blocks. This
|
|
// makes it faster to iterate blocks looking for
|
|
// candidates to be evicted from the cache.
|
|
occupied_list_heads: FastHashMap<DocumentId, BlockIndex>,
|
|
// Pending blocks that have been written this frame
|
|
// and will need to be sent to the GPU.
|
|
pending_blocks: Vec<GpuBlockData>,
|
|
// Pending update commands.
|
|
updates: Vec<GpuCacheUpdate>,
|
|
// Profile stats
|
|
allocated_block_count: usize,
|
|
// The stamp at which we first reached our threshold for reclaiming `GpuCache`
|
|
// memory, or `None` if the threshold hasn't been reached.
|
|
#[cfg_attr(feature = "serde", serde(skip))]
|
|
reached_reclaim_threshold: Option<Instant>,
|
|
// List of debug commands to be sent to the renderer when the GPU cache
|
|
// debug display is enabled.
|
|
#[cfg_attr(feature = "serde", serde(skip))]
|
|
debug_commands: Vec<GpuCacheDebugCmd>,
|
|
// The current debug flags for the system.
|
|
debug_flags: DebugFlags,
|
|
}
|
|
|
|
impl Texture {
|
|
fn new(base_epoch: Epoch, debug_flags: DebugFlags) -> Self {
|
|
// Pre-fill the block array with one invalid block so that we never use
|
|
// 0 for a BlockIndex. This lets us use NonZeroU32 for BlockIndex, which
|
|
// saves memory.
|
|
let blocks = vec![Block::INVALID];
|
|
|
|
Texture {
|
|
height: GPU_CACHE_INITIAL_HEIGHT,
|
|
blocks,
|
|
rows: Vec::new(),
|
|
base_epoch,
|
|
max_epoch: base_epoch,
|
|
free_lists: FreeBlockLists::new(),
|
|
pending_blocks: Vec::new(),
|
|
updates: Vec::new(),
|
|
occupied_list_heads: FastHashMap::default(),
|
|
allocated_block_count: 0,
|
|
reached_reclaim_threshold: None,
|
|
debug_commands: Vec::new(),
|
|
debug_flags,
|
|
}
|
|
}
|
|
|
|
// Push new data into the cache. The ```pending_block_index``` field represents
|
|
// where the data was pushed into the texture ```pending_blocks``` array.
|
|
// Return the allocated address for this data.
|
|
fn push_data(
|
|
&mut self,
|
|
pending_block_index: Option<usize>,
|
|
block_count: usize,
|
|
frame_stamp: FrameStamp
|
|
) -> CacheLocation {
|
|
debug_assert!(frame_stamp.is_valid());
|
|
// Find the appropriate free list to use based on the block size.
|
|
let (alloc_size, free_list) = self.free_lists
|
|
.get_actual_block_count_and_free_list(block_count);
|
|
|
|
// See if we need a new row (if free-list has nothing available)
|
|
if free_list.is_none() {
|
|
if self.rows.len() as i32 == self.height {
|
|
self.height += NEW_ROWS_PER_RESIZE;
|
|
}
|
|
|
|
// Create a new row.
|
|
let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / alloc_size;
|
|
let row_index = self.rows.len();
|
|
self.rows.push(Row::new(alloc_size));
|
|
|
|
// Create a ```Block``` for each possible allocation address
|
|
// in this row, and link it in to the free-list for this
|
|
// block size.
|
|
let mut prev_block_index = None;
|
|
for i in 0 .. items_per_row {
|
|
let address = GpuCacheAddress::new(i * alloc_size, row_index);
|
|
let block_index = BlockIndex::new(self.blocks.len());
|
|
let block = Block::new(address, prev_block_index, frame_stamp.frame_id(), self.base_epoch);
|
|
self.blocks.push(block);
|
|
prev_block_index = Some(block_index);
|
|
}
|
|
|
|
*free_list = prev_block_index;
|
|
}
|
|
|
|
// Given the code above, it's now guaranteed that there is a block
|
|
// available in the appropriate free-list. Pull a block from the
|
|
// head of the list.
|
|
let free_block_index = free_list.take().unwrap();
|
|
let block = &mut self.blocks[free_block_index.get()];
|
|
*free_list = block.next;
|
|
|
|
// Add the block to the occupied linked list.
|
|
block.next = self.occupied_list_heads.get(&frame_stamp.document_id()).cloned();
|
|
block.last_access_time = frame_stamp.frame_id();
|
|
self.occupied_list_heads.insert(frame_stamp.document_id(), free_block_index);
|
|
self.allocated_block_count += alloc_size;
|
|
|
|
if let Some(pending_block_index) = pending_block_index {
|
|
// Add this update to the pending list of blocks that need
|
|
// to be updated on the GPU.
|
|
self.updates.push(GpuCacheUpdate::Copy {
|
|
block_index: pending_block_index,
|
|
block_count,
|
|
address: block.address,
|
|
});
|
|
}
|
|
|
|
// If we're using the debug display, communicate the allocation to the
|
|
// renderer thread. Note that we do this regardless of whether or not
|
|
// pending_block_index is None (if it is, the renderer thread will fill
|
|
// in the data via a deferred resolve, but the block is still considered
|
|
// allocated).
|
|
if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
|
|
self.debug_commands.push(GpuCacheDebugCmd::Alloc(GpuCacheDebugChunk {
|
|
address: block.address,
|
|
size: block_count,
|
|
}));
|
|
}
|
|
|
|
CacheLocation {
|
|
block_index: free_block_index,
|
|
epoch: block.epoch,
|
|
}
|
|
}
|
|
|
|
// Run through the list of occupied cache blocks and evict
|
|
// any old blocks that haven't been referenced for a while.
|
|
fn evict_old_blocks(&mut self, frame_stamp: FrameStamp) {
|
|
debug_assert!(frame_stamp.is_valid());
|
|
// Prune any old items from the list to make room.
|
|
// Traverse the occupied linked list and see
|
|
// which items have not been used for a long time.
|
|
let mut current_block = self.occupied_list_heads.get(&frame_stamp.document_id()).map(|x| *x);
|
|
let mut prev_block: Option<BlockIndex> = None;
|
|
|
|
while let Some(index) = current_block {
|
|
let (next_block, should_unlink) = {
|
|
let block = &mut self.blocks[index.get()];
|
|
|
|
let next_block = block.next;
|
|
let mut should_unlink = false;
|
|
|
|
// If this resource has not been used in the last
|
|
// few frames, free it from the texture and mark
|
|
// as empty.
|
|
if block.last_access_time + FRAMES_BEFORE_EVICTION < frame_stamp.frame_id() {
|
|
should_unlink = true;
|
|
|
|
// Get the row metadata from the address.
|
|
let row = &mut self.rows[block.address.v as usize];
|
|
|
|
// Use the row metadata to determine which free-list
|
|
// this block belongs to.
|
|
let (_, free_list) = self.free_lists
|
|
.get_actual_block_count_and_free_list(row.block_count_per_item);
|
|
|
|
block.advance_epoch(&mut self.max_epoch);
|
|
block.next = *free_list;
|
|
*free_list = Some(index);
|
|
|
|
self.allocated_block_count -= row.block_count_per_item;
|
|
|
|
if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
|
|
let cmd = GpuCacheDebugCmd::Free(block.address);
|
|
self.debug_commands.push(cmd);
|
|
}
|
|
};
|
|
|
|
(next_block, should_unlink)
|
|
};
|
|
|
|
// If the block was released, we will need to remove it
|
|
// from the occupied linked list.
|
|
if should_unlink {
|
|
match prev_block {
|
|
Some(prev_block) => {
|
|
self.blocks[prev_block.get()].next = next_block;
|
|
}
|
|
None => {
|
|
match next_block {
|
|
Some(next_block) => {
|
|
self.occupied_list_heads.insert(frame_stamp.document_id(), next_block);
|
|
}
|
|
None => {
|
|
self.occupied_list_heads.remove(&frame_stamp.document_id());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
prev_block = current_block;
|
|
}
|
|
|
|
current_block = next_block;
|
|
}
|
|
}
|
|
|
|
/// Returns the ratio of utilized blocks.
|
|
fn utilization(&self) -> f32 {
|
|
let total_blocks = self.rows.len() * MAX_VERTEX_TEXTURE_WIDTH;
|
|
debug_assert!(total_blocks > 0);
|
|
let ratio = self.allocated_block_count as f32 / total_blocks as f32;
|
|
debug_assert!(0.0 <= ratio && ratio <= 1.0, "Bad ratio: {}", ratio);
|
|
ratio
|
|
}
|
|
}
|
|
|
|
|
|
/// A wrapper object for GPU data requests,
|
|
/// works as a container that can only grow.
|
|
#[must_use]
|
|
pub struct GpuDataRequest<'a> {
|
|
//TODO: remove this, see
|
|
// https://bugzilla.mozilla.org/show_bug.cgi?id=1690546
|
|
#[allow(dead_code)]
|
|
handle: &'a mut GpuCacheHandle,
|
|
frame_stamp: FrameStamp,
|
|
start_index: usize,
|
|
max_block_count: usize,
|
|
texture: &'a mut Texture,
|
|
}
|
|
|
|
impl<'a> GpuDataRequest<'a> {
|
|
pub fn push<B>(&mut self, block: B)
|
|
where
|
|
B: Into<GpuBlockData>,
|
|
{
|
|
self.texture.pending_blocks.push(block.into());
|
|
}
|
|
|
|
// Write the GPU cache data for an individual segment.
|
|
pub fn write_segment(
|
|
&mut self,
|
|
local_rect: LayoutRect,
|
|
extra_data: [f32; 4],
|
|
) {
|
|
let _ = VECS_PER_SEGMENT;
|
|
self.push(local_rect);
|
|
self.push(extra_data);
|
|
}
|
|
|
|
pub fn current_used_block_num(&self) -> usize {
|
|
self.texture.pending_blocks.len() - self.start_index
|
|
}
|
|
}
|
|
|
|
impl<'a> Drop for GpuDataRequest<'a> {
|
|
fn drop(&mut self) {
|
|
// Push the data to the texture pending updates list.
|
|
let block_count = self.current_used_block_num();
|
|
debug_assert!(block_count <= self.max_block_count);
|
|
|
|
let location = self.texture
|
|
.push_data(Some(self.start_index), block_count, self.frame_stamp);
|
|
self.handle.location = Some(location);
|
|
}
|
|
}
|
|
|
|
|
|
/// The main LRU cache interface.
|
|
#[cfg_attr(feature = "capture", derive(Serialize))]
|
|
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
|
#[derive(MallocSizeOf)]
|
|
pub struct GpuCache {
|
|
/// Current FrameId.
|
|
now: FrameStamp,
|
|
/// CPU-side texture allocator.
|
|
texture: Texture,
|
|
/// Number of blocks requested this frame that don't
|
|
/// need to be re-uploaded.
|
|
saved_block_count: usize,
|
|
/// The current debug flags for the system.
|
|
debug_flags: DebugFlags,
|
|
/// Whether there is a pending clear to send with the
|
|
/// next update.
|
|
pending_clear: bool,
|
|
/// Indicates that prepare_for_frames has been called for this group of frames.
|
|
/// Used for sanity checks.
|
|
prepared_for_frames: bool,
|
|
/// This indicates that we performed a cleanup operation which requires all
|
|
/// documents to build a frame.
|
|
requires_frame_build: bool,
|
|
/// The set of documents which have had frames built in this update. Used for
|
|
/// sanity checks.
|
|
document_frames_to_build: FastHashSet<DocumentId>,
|
|
}
|
|
|
|
impl GpuCache {
|
|
pub fn new() -> Self {
|
|
let debug_flags = DebugFlags::empty();
|
|
GpuCache {
|
|
now: FrameStamp::INVALID,
|
|
texture: Texture::new(Epoch(0), debug_flags),
|
|
saved_block_count: 0,
|
|
debug_flags,
|
|
pending_clear: false,
|
|
prepared_for_frames: false,
|
|
requires_frame_build: false,
|
|
document_frames_to_build: FastHashSet::default(),
|
|
}
|
|
}
|
|
|
|
/// Creates a GpuCache and sets it up with a valid `FrameStamp`, which
|
|
/// is useful for avoiding panics when instantiating the `GpuCache`
|
|
/// directly from unit test code.
|
|
#[cfg(test)]
|
|
pub fn new_for_testing() -> Self {
|
|
let mut cache = Self::new();
|
|
let mut now = FrameStamp::first(DocumentId::new(IdNamespace(1), 1));
|
|
now.advance();
|
|
cache.prepared_for_frames = true;
|
|
cache.begin_frame(now);
|
|
cache
|
|
}
|
|
|
|
/// Drops everything in the GPU cache. Must not be called once gpu cache entries
|
|
/// for the next frame have already been requested.
|
|
pub fn clear(&mut self) {
|
|
assert!(self.texture.updates.is_empty(), "Clearing with pending updates");
|
|
let mut next_base_epoch = self.texture.max_epoch;
|
|
next_base_epoch.next();
|
|
self.texture = Texture::new(next_base_epoch, self.debug_flags);
|
|
self.saved_block_count = 0;
|
|
self.pending_clear = true;
|
|
self.requires_frame_build = true;
|
|
}
|
|
|
|
pub fn requires_frame_build(&self) -> bool {
|
|
self.requires_frame_build
|
|
}
|
|
|
|
pub fn prepare_for_frames(&mut self) {
|
|
self.prepared_for_frames = true;
|
|
if self.should_reclaim_memory() {
|
|
self.clear();
|
|
debug_assert!(self.document_frames_to_build.is_empty());
|
|
for &document_id in self.texture.occupied_list_heads.keys() {
|
|
self.document_frames_to_build.insert(document_id);
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn bookkeep_after_frames(&mut self) {
|
|
assert!(self.document_frames_to_build.is_empty());
|
|
assert!(self.prepared_for_frames);
|
|
self.requires_frame_build = false;
|
|
self.prepared_for_frames = false;
|
|
}
|
|
|
|
/// Begin a new frame.
|
|
pub fn begin_frame(&mut self, stamp: FrameStamp) {
|
|
debug_assert!(self.texture.pending_blocks.is_empty());
|
|
assert!(self.prepared_for_frames);
|
|
profile_scope!("begin_frame");
|
|
self.now = stamp;
|
|
self.texture.evict_old_blocks(self.now);
|
|
self.saved_block_count = 0;
|
|
}
|
|
|
|
// Invalidate a (possibly) existing block in the cache.
|
|
// This means the next call to request() for this location
|
|
// will rebuild the data and upload it to the GPU.
|
|
pub fn invalidate(&mut self, handle: &GpuCacheHandle) {
|
|
if let Some(ref location) = handle.location {
|
|
// don't invalidate blocks that are already re-assigned
|
|
if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
|
|
if block.epoch == location.epoch {
|
|
block.advance_epoch(&mut self.texture.max_epoch);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Request a resource be added to the cache. If the resource
|
|
/// is already in the cache, `None` will be returned.
|
|
pub fn request<'a>(&'a mut self, handle: &'a mut GpuCacheHandle) -> Option<GpuDataRequest<'a>> {
|
|
let mut max_block_count = MAX_VERTEX_TEXTURE_WIDTH;
|
|
// Check if the allocation for this handle is still valid.
|
|
if let Some(ref location) = handle.location {
|
|
if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
|
|
if block.epoch == location.epoch {
|
|
max_block_count = self.texture.rows[block.address.v as usize].block_count_per_item;
|
|
if block.last_access_time != self.now.frame_id() {
|
|
// Mark last access time to avoid evicting this block.
|
|
block.last_access_time = self.now.frame_id();
|
|
self.saved_block_count += max_block_count;
|
|
}
|
|
return None;
|
|
}
|
|
}
|
|
}
|
|
|
|
debug_assert!(self.now.is_valid());
|
|
Some(GpuDataRequest {
|
|
handle,
|
|
frame_stamp: self.now,
|
|
start_index: self.texture.pending_blocks.len(),
|
|
texture: &mut self.texture,
|
|
max_block_count,
|
|
})
|
|
}
|
|
|
|
// Push an array of data blocks to be uploaded to the GPU
|
|
// unconditionally for this frame. The cache handle will
|
|
// assert if the caller tries to retrieve the address
|
|
// of this handle on a subsequent frame. This is typically
|
|
// used for uploading data that changes every frame, and
|
|
// therefore makes no sense to try and cache.
|
|
pub fn push_per_frame_blocks(&mut self, blocks: &[GpuBlockData]) -> GpuCacheHandle {
|
|
let start_index = self.texture.pending_blocks.len();
|
|
self.texture.pending_blocks.extend_from_slice(blocks);
|
|
let location = self.texture
|
|
.push_data(Some(start_index), blocks.len(), self.now);
|
|
GpuCacheHandle {
|
|
location: Some(location),
|
|
}
|
|
}
|
|
|
|
// Reserve space in the cache for per-frame blocks that
|
|
// will be resolved by the render thread via the
|
|
// external image callback.
|
|
pub fn push_deferred_per_frame_blocks(&mut self, block_count: usize) -> GpuCacheHandle {
|
|
let location = self.texture.push_data(None, block_count, self.now);
|
|
GpuCacheHandle {
|
|
location: Some(location),
|
|
}
|
|
}
|
|
|
|
/// End the frame. Return the list of updates to apply to the
|
|
/// device specific cache texture.
|
|
pub fn end_frame(
|
|
&mut self,
|
|
profile: &mut TransactionProfile,
|
|
) -> FrameStamp {
|
|
profile_scope!("end_frame");
|
|
profile.set(profiler::GPU_CACHE_ROWS_TOTAL, self.texture.rows.len());
|
|
profile.set(profiler::GPU_CACHE_BLOCKS_TOTAL, self.texture.allocated_block_count);
|
|
profile.set(profiler::GPU_CACHE_BLOCKS_SAVED, self.saved_block_count);
|
|
|
|
let reached_threshold =
|
|
self.texture.rows.len() > (GPU_CACHE_INITIAL_HEIGHT as usize) &&
|
|
self.texture.utilization() < RECLAIM_THRESHOLD;
|
|
if reached_threshold {
|
|
self.texture.reached_reclaim_threshold.get_or_insert_with(Instant::now);
|
|
} else {
|
|
self.texture.reached_reclaim_threshold = None;
|
|
}
|
|
|
|
self.document_frames_to_build.remove(&self.now.document_id());
|
|
self.now
|
|
}
|
|
|
|
/// Returns true if utilization has been low enough for long enough that we
|
|
/// should blow the cache away and rebuild it.
|
|
pub fn should_reclaim_memory(&self) -> bool {
|
|
self.texture.reached_reclaim_threshold
|
|
.map_or(false, |t| t.elapsed() > Duration::from_secs(RECLAIM_DELAY_S))
|
|
}
|
|
|
|
/// Extract the pending updates from the cache.
|
|
pub fn extract_updates(&mut self) -> GpuCacheUpdateList {
|
|
let clear = self.pending_clear;
|
|
self.pending_clear = false;
|
|
GpuCacheUpdateList {
|
|
frame_id: self.now.frame_id(),
|
|
clear,
|
|
height: self.texture.height,
|
|
debug_commands: self.texture.debug_commands.take_and_preallocate(),
|
|
updates: self.texture.updates.take_and_preallocate(),
|
|
blocks: self.texture.pending_blocks.take_and_preallocate(),
|
|
}
|
|
}
|
|
|
|
/// Sets the current debug flags for the system.
|
|
pub fn set_debug_flags(&mut self, flags: DebugFlags) {
|
|
self.debug_flags = flags;
|
|
self.texture.debug_flags = flags;
|
|
}
|
|
|
|
/// Get the actual GPU address in the texture for a given slot ID.
|
|
/// It's assumed at this point that the given slot has been requested
|
|
/// and built for this frame. Attempting to get the address for a
|
|
/// freed or pending slot will panic!
|
|
pub fn get_address(&self, id: &GpuCacheHandle) -> GpuCacheAddress {
|
|
let location = id.location.expect("handle not requested or allocated!");
|
|
let block = &self.texture.blocks[location.block_index.get()];
|
|
debug_assert_eq!(block.epoch, location.epoch);
|
|
debug_assert_eq!(block.last_access_time, self.now.frame_id());
|
|
block.address
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
#[cfg(target_pointer_width = "64")]
|
|
fn test_struct_sizes() {
|
|
use std::mem;
|
|
// We can end up with a lot of blocks stored in the global vec, and keeping
|
|
// them small helps reduce memory overhead.
|
|
assert_eq!(mem::size_of::<Block>(), 24, "Block size changed");
|
|
}
|