summaryrefslogtreecommitdiffstats
path: root/gfx/wr/webrender/src/renderer/gpu_cache.rs
diff options
context:
space:
mode:
Diffstat (limited to 'gfx/wr/webrender/src/renderer/gpu_cache.rs')
-rw-r--r--gfx/wr/webrender/src/renderer/gpu_cache.rs507
1 files changed, 507 insertions, 0 deletions
diff --git a/gfx/wr/webrender/src/renderer/gpu_cache.rs b/gfx/wr/webrender/src/renderer/gpu_cache.rs
new file mode 100644
index 0000000000..f9975675ca
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/gpu_cache.rs
@@ -0,0 +1,507 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use std::{cmp, mem};
+use api::units::*;
+use malloc_size_of::MallocSizeOfOps;
+use crate::{
+ device::{CustomVAO, Device, DrawTarget, Program, ReadTarget, Texture, TextureFilter, UploadPBOPool, VBO},
+ gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList},
+ internal_types::{RenderTargetInfo, Swizzle},
+ prim_store::DeferredResolve,
+ profiler,
+ render_api::MemoryReport,
+ render_backend::FrameId,
+};
+
+/// Enabling this toggle would force the GPU cache scattered texture to
+/// be resized every frame, which enables GPU debuggers to see if this
+/// is performed correctly.
+const GPU_CACHE_RESIZE_TEST: bool = false;
+
+/// Tracks the state of each row in the GPU cache texture.
+struct CacheRow {
+ /// Mirrored block data on CPU for this row. We store a copy of
+ /// the data on the CPU side to improve upload batching.
+ cpu_blocks: Box<[GpuBlockData; super::MAX_VERTEX_TEXTURE_WIDTH]>,
+ /// The first offset in this row that is dirty.
+ min_dirty: u16,
+ /// The last offset in this row that is dirty.
+ max_dirty: u16,
+}
+
+impl CacheRow {
+ fn new() -> Self {
+ CacheRow {
+ cpu_blocks: Box::new([GpuBlockData::EMPTY; super::MAX_VERTEX_TEXTURE_WIDTH]),
+ min_dirty: super::MAX_VERTEX_TEXTURE_WIDTH as _,
+ max_dirty: 0,
+ }
+ }
+
+ fn is_dirty(&self) -> bool {
+ return self.min_dirty < self.max_dirty;
+ }
+
+ fn clear_dirty(&mut self) {
+ self.min_dirty = super::MAX_VERTEX_TEXTURE_WIDTH as _;
+ self.max_dirty = 0;
+ }
+
+ fn add_dirty(&mut self, block_offset: usize, block_count: usize) {
+ self.min_dirty = self.min_dirty.min(block_offset as _);
+ self.max_dirty = self.max_dirty.max((block_offset + block_count) as _);
+ }
+
+ fn dirty_blocks(&self) -> &[GpuBlockData] {
+ return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize];
+ }
+}
+
+/// The bus over which CPU and GPU versions of the GPU cache
+/// get synchronized.
+enum GpuCacheBus {
+ /// PBO-based updates, currently operate on a row granularity.
+ /// Therefore, are subject to fragmentation issues.
+ PixelBuffer {
+ /// Per-row data.
+ rows: Vec<CacheRow>,
+ },
+ /// Shader-based scattering updates. Currently rendered by a set
+ /// of points into the GPU texture, each carrying a `GpuBlockData`.
+ Scatter {
+ /// Special program to run the scattered update.
+ program: Program,
+ /// VAO containing the source vertex buffers.
+ vao: CustomVAO,
+ /// VBO for positional data, supplied as normalized `u16`.
+ buf_position: VBO<[u16; 2]>,
+ /// VBO for gpu block data.
+ buf_value: VBO<GpuBlockData>,
+ /// Currently stored block count.
+ count: usize,
+ },
+}
+
+/// The device-specific representation of the cache texture in gpu_cache.rs
+pub struct GpuCacheTexture {
+ texture: Option<Texture>,
+ bus: GpuCacheBus,
+}
+
+impl GpuCacheTexture {
+ /// Ensures that we have an appropriately-sized texture. Returns true if a
+ /// new texture was created.
+ fn ensure_texture(&mut self, device: &mut Device, height: i32) {
+ // If we already have a texture that works, we're done.
+ if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
+ if GPU_CACHE_RESIZE_TEST {
+ // Special debug mode - resize the texture even though it's fine.
+ } else {
+ return;
+ }
+ }
+
+ // Take the old texture, if any.
+ let blit_source = self.texture.take();
+
+ // Create the new texture.
+ assert!(height >= 2, "Height is too small for ANGLE");
+ let new_size = DeviceIntSize::new(super::MAX_VERTEX_TEXTURE_WIDTH as _, height);
+ // If glCopyImageSubData is supported, this texture doesn't need
+ // to be a render target. This prevents GL errors due to framebuffer
+ // incompleteness on devices that don't support RGBAF32 render targets.
+ // TODO(gw): We still need a proper solution for the subset of devices
+ // that don't support glCopyImageSubData *OR* rendering to a
+ // RGBAF32 render target. These devices will currently fail
+ // to resize the GPU cache texture.
+ let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data;
+ let rt_info = match self.bus {
+ GpuCacheBus::PixelBuffer { .. } if supports_copy_image_sub_data => None,
+ _ => Some(RenderTargetInfo { has_depth: false }),
+ };
+ let mut texture = device.create_texture(
+ api::ImageBufferKind::Texture2D,
+ api::ImageFormat::RGBAF32,
+ new_size.width,
+ new_size.height,
+ TextureFilter::Nearest,
+ rt_info,
+ 1,
+ );
+
+ // Copy the contents of the previous texture, if applicable.
+ if let Some(blit_source) = blit_source {
+ device.copy_entire_texture(&mut texture, &blit_source);
+ device.delete_texture(blit_source);
+ }
+
+ self.texture = Some(texture);
+ }
+
+ pub fn new(device: &mut Device, use_scatter: bool) -> Result<Self, super::RendererError> {
+ use super::desc::GPU_CACHE_UPDATE;
+
+ let bus = if use_scatter {
+ let program = device.create_program_linked(
+ "gpu_cache_update",
+ &[],
+ &GPU_CACHE_UPDATE,
+ )?;
+ let buf_position = device.create_vbo();
+ let buf_value = device.create_vbo();
+ //Note: the vertex attributes have to be supplied in the same order
+ // as for program creation, but each assigned to a different stream.
+ let vao = device.create_custom_vao(&[
+ buf_position.stream_with(&GPU_CACHE_UPDATE.vertex_attributes[0..1]),
+ buf_value .stream_with(&GPU_CACHE_UPDATE.vertex_attributes[1..2]),
+ ]);
+ GpuCacheBus::Scatter {
+ program,
+ vao,
+ buf_position,
+ buf_value,
+ count: 0,
+ }
+ } else {
+ GpuCacheBus::PixelBuffer {
+ rows: Vec::new(),
+ }
+ };
+
+ Ok(GpuCacheTexture {
+ texture: None,
+ bus,
+ })
+ }
+
+ pub fn deinit(mut self, device: &mut Device) {
+ if let Some(t) = self.texture.take() {
+ device.delete_texture(t);
+ }
+ if let GpuCacheBus::Scatter { program, vao, buf_position, buf_value, .. } = self.bus {
+ device.delete_program(program);
+ device.delete_custom_vao(vao);
+ device.delete_vbo(buf_position);
+ device.delete_vbo(buf_value);
+ }
+ }
+
+ pub fn get_height(&self) -> i32 {
+ self.texture.as_ref().map_or(0, |t| t.get_dimensions().height)
+ }
+
+ #[cfg(feature = "capture")]
+ pub fn get_texture(&self) -> &Texture {
+ self.texture.as_ref().unwrap()
+ }
+
+ fn prepare_for_updates(
+ &mut self,
+ device: &mut Device,
+ total_block_count: usize,
+ max_height: i32,
+ ) {
+ self.ensure_texture(device, max_height);
+ match self.bus {
+ GpuCacheBus::PixelBuffer { .. } => {},
+ GpuCacheBus::Scatter {
+ ref mut buf_position,
+ ref mut buf_value,
+ ref mut count,
+ ..
+ } => {
+ *count = 0;
+ if total_block_count > buf_value.allocated_count() {
+ device.allocate_vbo(buf_position, total_block_count, super::ONE_TIME_USAGE_HINT);
+ device.allocate_vbo(buf_value, total_block_count, super::ONE_TIME_USAGE_HINT);
+ }
+ }
+ }
+ }
+
+ pub fn invalidate(&mut self) {
+ match self.bus {
+ GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
+ info!("Invalidating GPU caches");
+ for row in rows {
+ row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
+ }
+ }
+ GpuCacheBus::Scatter { .. } => {
+ warn!("Unable to invalidate scattered GPU cache");
+ }
+ }
+ }
+
+ fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
+ match self.bus {
+ GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
+ for update in &updates.updates {
+ match *update {
+ GpuCacheUpdate::Copy {
+ block_index,
+ block_count,
+ address,
+ } => {
+ let row = address.v as usize;
+
+ // Ensure that the CPU-side shadow copy of the GPU cache data has enough
+ // rows to apply this patch.
+ while rows.len() <= row {
+ // Add a new row.
+ rows.push(CacheRow::new());
+ }
+
+ // Copy the blocks from the patch array in the shadow CPU copy.
+ let block_offset = address.u as usize;
+ let data = &mut rows[row].cpu_blocks;
+ for i in 0 .. block_count {
+ data[block_offset + i] = updates.blocks[block_index + i];
+ }
+
+ // This row is dirty (needs to be updated in GPU texture).
+ rows[row].add_dirty(block_offset, block_count);
+ }
+ }
+ }
+ }
+ GpuCacheBus::Scatter {
+ ref buf_position,
+ ref buf_value,
+ ref mut count,
+ ..
+ } => {
+ //TODO: re-use this heap allocation
+ // Unused positions will be left as 0xFFFF, which translates to
+ // (1.0, 1.0) in the vertex output position and gets culled out
+ let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
+ let size = self.texture.as_ref().unwrap().get_dimensions().to_usize();
+
+ for update in &updates.updates {
+ match *update {
+ GpuCacheUpdate::Copy {
+ block_index,
+ block_count,
+ address,
+ } => {
+ // Convert the absolute texel position into normalized
+ let y = ((2*address.v as usize + 1) << 15) / size.height;
+ for i in 0 .. block_count {
+ let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
+ position_data[block_index + i] = [x as _, y as _];
+ }
+ }
+ }
+ }
+
+ device.fill_vbo(buf_value, &updates.blocks, *count);
+ device.fill_vbo(buf_position, &position_data, *count);
+ *count += position_data.len();
+ }
+ }
+ }
+
+ fn flush(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool) -> usize {
+ let texture = self.texture.as_ref().unwrap();
+ match self.bus {
+ GpuCacheBus::PixelBuffer { ref mut rows } => {
+ let rows_dirty = rows
+ .iter()
+ .filter(|row| row.is_dirty())
+ .count();
+ if rows_dirty == 0 {
+ return 0
+ }
+
+ let mut uploader = device.upload_texture(pbo_pool);
+
+ for (row_index, row) in rows.iter_mut().enumerate() {
+ if !row.is_dirty() {
+ continue;
+ }
+
+ let blocks = row.dirty_blocks();
+ let rect = DeviceIntRect::new(
+ DeviceIntPoint::new(row.min_dirty as i32, row_index as i32),
+ DeviceIntSize::new(blocks.len() as i32, 1),
+ );
+
+ uploader.upload(device, texture, rect, 0, None, None, blocks.as_ptr(), blocks.len());
+
+ row.clear_dirty();
+ }
+
+ uploader.flush(device);
+
+ rows_dirty
+ }
+ GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
+ device.disable_depth();
+ device.set_blend(false);
+ device.bind_program(program);
+ device.bind_custom_vao(vao);
+ device.bind_draw_target(
+ DrawTarget::from_texture(
+ texture,
+ 0,
+ false,
+ ),
+ );
+ device.draw_nonindexed_points(0, count as _);
+ 0
+ }
+ }
+ }
+
+ #[cfg(feature = "replay")]
+ pub fn remove_texture(&mut self, device: &mut Device) {
+ if let Some(t) = self.texture.take() {
+ device.delete_texture(t);
+ }
+ }
+
+ #[cfg(feature = "replay")]
+ pub fn load_from_data(&mut self, texture: Texture, data: Vec<u8>) {
+ assert!(self.texture.is_none());
+ match self.bus {
+ GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
+ let dim = texture.get_dimensions();
+ let blocks = unsafe {
+ std::slice::from_raw_parts(
+ data.as_ptr() as *const GpuBlockData,
+ data.len() / mem::size_of::<GpuBlockData>(),
+ )
+ };
+ // fill up the CPU cache from the contents we just loaded
+ rows.clear();
+ rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
+ let chunks = blocks.chunks(super::MAX_VERTEX_TEXTURE_WIDTH);
+ debug_assert_eq!(chunks.len(), rows.len());
+ for (row, chunk) in rows.iter_mut().zip(chunks) {
+ row.cpu_blocks.copy_from_slice(chunk);
+ }
+ }
+ GpuCacheBus::Scatter { .. } => {}
+ }
+ self.texture = Some(texture);
+ }
+
+ pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
+ if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.bus {
+ for row in rows.iter() {
+ report.gpu_cache_cpu_mirror += unsafe { (size_op_funs.size_of_op)(row.cpu_blocks.as_ptr() as *const _) };
+ }
+ }
+
+ // GPU cache GPU memory.
+ report.gpu_cache_textures +=
+ self.texture.as_ref().map_or(0, |t| t.size_in_bytes());
+ }
+}
+
+impl super::Renderer {
+ pub fn update_gpu_cache(&mut self) {
+ let _gm = self.gpu_profiler.start_marker("gpu cache update");
+
+ // For an artificial stress test of GPU cache resizing,
+ // always pass an extra update list with at least one block in it.
+ let gpu_cache_height = self.gpu_cache_texture.get_height();
+ if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST {
+ self.pending_gpu_cache_updates.push(GpuCacheUpdateList {
+ frame_id: FrameId::INVALID,
+ clear: false,
+ height: gpu_cache_height,
+ blocks: vec![[1f32; 4].into()],
+ updates: Vec::new(),
+ debug_commands: Vec::new(),
+ });
+ }
+
+ let (updated_blocks, max_requested_height) = self
+ .pending_gpu_cache_updates
+ .iter()
+ .fold((0, gpu_cache_height), |(count, height), list| {
+ (count + list.blocks.len(), cmp::max(height, list.height))
+ });
+
+ if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow {
+ self.gpu_cache_overflow = true;
+ self.renderer_errors.push(super::RendererError::MaxTextureSize);
+ }
+
+ // Note: if we decide to switch to scatter-style GPU cache update
+ // permanently, we can have this code nicer with `BufferUploader` kind
+ // of helper, similarly to how `TextureUploader` API is used.
+ self.gpu_cache_texture.prepare_for_updates(
+ &mut self.device,
+ updated_blocks,
+ max_requested_height,
+ );
+
+ for update_list in self.pending_gpu_cache_updates.drain(..) {
+ assert!(update_list.height <= max_requested_height);
+ if update_list.frame_id > self.gpu_cache_frame_id {
+ self.gpu_cache_frame_id = update_list.frame_id
+ }
+ self.gpu_cache_texture
+ .update(&mut self.device, &update_list);
+ }
+
+ self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME);
+ let updated_rows = self.gpu_cache_texture.flush(
+ &mut self.device,
+ &mut self.texture_upload_pbo_pool
+ );
+ self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME);
+
+ self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows);
+ self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks);
+ }
+
+ pub fn prepare_gpu_cache(
+ &mut self,
+ deferred_resolves: &[DeferredResolve],
+ ) -> Result<(), super::RendererError> {
+ if self.pending_gpu_cache_clear {
+ let use_scatter =
+ matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
+ let new_cache = GpuCacheTexture::new(&mut self.device, use_scatter)?;
+ let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
+ old_cache.deinit(&mut self.device);
+ self.pending_gpu_cache_clear = false;
+ }
+
+ let deferred_update_list = self.update_deferred_resolves(deferred_resolves);
+ self.pending_gpu_cache_updates.extend(deferred_update_list);
+
+ self.update_gpu_cache();
+
+ // Note: the texture might have changed during the `update`,
+ // so we need to bind it here.
+ self.device.bind_texture(
+ super::TextureSampler::GpuCache,
+ self.gpu_cache_texture.texture.as_ref().unwrap(),
+ Swizzle::default(),
+ );
+
+ Ok(())
+ }
+
+ pub fn read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>) {
+ let texture = self.gpu_cache_texture.texture.as_ref().unwrap();
+ let size = device_size_as_framebuffer_size(texture.get_dimensions());
+ let mut texels = vec![0; (size.width * size.height * 16) as usize];
+ self.device.begin_frame();
+ self.device.bind_read_target(ReadTarget::from_texture(texture, 0));
+ self.device.read_pixels_into(
+ size.into(),
+ api::ImageFormat::RGBAF32,
+ &mut texels,
+ );
+ self.device.reset_read_target();
+ self.device.end_frame();
+ (texture.get_dimensions(), texels)
+ }
+}