diff options
Diffstat (limited to 'gfx/wr/webrender/src/renderer')
-rw-r--r-- | gfx/wr/webrender/src/renderer/debug.rs | 415 | ||||
-rw-r--r-- | gfx/wr/webrender/src/renderer/gpu_buffer.rs | 328 | ||||
-rw-r--r-- | gfx/wr/webrender/src/renderer/gpu_cache.rs | 525 | ||||
-rw-r--r-- | gfx/wr/webrender/src/renderer/init.rs | 788 | ||||
-rw-r--r-- | gfx/wr/webrender/src/renderer/mod.rs | 5994 | ||||
-rw-r--r-- | gfx/wr/webrender/src/renderer/shade.rs | 1507 | ||||
-rw-r--r-- | gfx/wr/webrender/src/renderer/upload.rs | 847 | ||||
-rw-r--r-- | gfx/wr/webrender/src/renderer/vertex.rs | 1154 |
8 files changed, 11558 insertions, 0 deletions
diff --git a/gfx/wr/webrender/src/renderer/debug.rs b/gfx/wr/webrender/src/renderer/debug.rs new file mode 100644 index 0000000000..7e16d15d76 --- /dev/null +++ b/gfx/wr/webrender/src/renderer/debug.rs @@ -0,0 +1,415 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use api::{ColorU, ImageFormat, ImageBufferKind}; +use api::units::*; +use crate::debug_font_data; +use crate::device::{Device, Program, Texture, TextureSlot, VertexDescriptor, ShaderError, VAO}; +use crate::device::{TextureFilter, VertexAttribute, VertexAttributeKind, VertexUsageHint}; +use euclid::{Point2D, Rect, Size2D, Transform3D, default}; +use crate::internal_types::Swizzle; +use std::f32; + +#[derive(Debug, Copy, Clone)] +enum DebugSampler { + Font, +} + +impl Into<TextureSlot> for DebugSampler { + fn into(self) -> TextureSlot { + match self { + DebugSampler::Font => TextureSlot(0), + } + } +} + +const DESC_FONT: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[ + VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aColor", + count: 4, + kind: VertexAttributeKind::U8Norm, + }, + VertexAttribute { + name: "aColorTexCoord", + count: 2, + kind: VertexAttributeKind::F32, + }, + ], + instance_attributes: &[], +}; + +const DESC_COLOR: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[ + VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aColor", + count: 4, + kind: VertexAttributeKind::U8Norm, + }, + ], + instance_attributes: &[], +}; + +#[repr(C)] +pub struct DebugFontVertex { + pub x: f32, + pub y: f32, + pub color: ColorU, + pub u: f32, + pub v: f32, +} + +impl DebugFontVertex { + pub fn new(x: f32, y: f32, u: f32, v: f32, color: ColorU) -> DebugFontVertex { + DebugFontVertex { x, y, color, u, v } + } +} + +#[repr(C)] +pub struct DebugColorVertex { + pub x: f32, + pub y: f32, + pub color: ColorU, +} + +impl DebugColorVertex { + pub fn new(x: f32, y: f32, color: ColorU) -> DebugColorVertex { + DebugColorVertex { x, y, color } + } +} + +pub struct DebugRenderer { + font_vertices: Vec<DebugFontVertex>, + font_indices: Vec<u32>, + font_program: Program, + font_vao: VAO, + font_texture: Texture, + + tri_vertices: Vec<DebugColorVertex>, + tri_indices: Vec<u32>, + tri_vao: VAO, + line_vertices: Vec<DebugColorVertex>, + line_vao: VAO, + color_program: Program, +} + +impl DebugRenderer { + pub fn new(device: &mut Device) -> Result<Self, ShaderError> { + let font_program = device.create_program_linked( + "debug_font", + &[], + &DESC_FONT, + )?; + device.bind_program(&font_program); + device.bind_shader_samplers(&font_program, &[("sColor0", DebugSampler::Font)]); + + let color_program = device.create_program_linked( + "debug_color", + &[], + &DESC_COLOR, + )?; + + let font_vao = device.create_vao(&DESC_FONT, 1); + let line_vao = device.create_vao(&DESC_COLOR, 1); + let tri_vao = device.create_vao(&DESC_COLOR, 1); + + let font_texture = device.create_texture( + ImageBufferKind::Texture2D, + ImageFormat::R8, + debug_font_data::BMP_WIDTH, + debug_font_data::BMP_HEIGHT, + TextureFilter::Linear, + None, + ); + device.upload_texture_immediate( + &font_texture, + &debug_font_data::FONT_BITMAP + ); + + Ok(DebugRenderer { + font_vertices: Vec::new(), + font_indices: Vec::new(), + line_vertices: Vec::new(), + tri_vao, + tri_vertices: Vec::new(), + tri_indices: Vec::new(), + font_program, + color_program, + font_vao, + line_vao, + font_texture, + }) + } + + pub fn deinit(self, device: &mut Device) { + device.delete_texture(self.font_texture); + device.delete_program(self.font_program); + device.delete_program(self.color_program); + device.delete_vao(self.tri_vao); + device.delete_vao(self.line_vao); + device.delete_vao(self.font_vao); + } + + pub fn line_height(&self) -> f32 { + debug_font_data::FONT_SIZE as f32 * 1.1 + } + + /// Draws a line of text at the provided starting coordinates. + /// + /// If |bounds| is specified, glyphs outside the bounds are discarded. + /// + /// Y-coordinates is relative to screen top, along with everything else in + /// this file. + pub fn add_text( + &mut self, + x: f32, + y: f32, + text: &str, + color: ColorU, + bounds: Option<DeviceRect>, + ) -> default::Rect<f32> { + let mut x_start = x; + let ipw = 1.0 / debug_font_data::BMP_WIDTH as f32; + let iph = 1.0 / debug_font_data::BMP_HEIGHT as f32; + + let mut min_x = f32::MAX; + let mut max_x = -f32::MAX; + let mut min_y = f32::MAX; + let mut max_y = -f32::MAX; + + for c in text.chars() { + let c = c as usize - debug_font_data::FIRST_GLYPH_INDEX as usize; + if c < debug_font_data::GLYPHS.len() { + let glyph = &debug_font_data::GLYPHS[c]; + + let x0 = (x_start + glyph.xo + 0.5).floor(); + let y0 = (y + glyph.yo + 0.5).floor(); + + let x1 = x0 + glyph.x1 as f32 - glyph.x0 as f32; + let y1 = y0 + glyph.y1 as f32 - glyph.y0 as f32; + + // If either corner of the glyph will end up out of bounds, drop it. + if let Some(b) = bounds { + let rect = DeviceRect { + min: DevicePoint::new(x0, y0), + max: DevicePoint::new(x1, y1), + }; + if !b.contains_box(&rect) { + continue; + } + } + + let s0 = glyph.x0 as f32 * ipw; + let t0 = glyph.y0 as f32 * iph; + let s1 = glyph.x1 as f32 * ipw; + let t1 = glyph.y1 as f32 * iph; + + x_start += glyph.xa; + + let vertex_count = self.font_vertices.len() as u32; + + self.font_vertices + .push(DebugFontVertex::new(x0, y0, s0, t0, color)); + self.font_vertices + .push(DebugFontVertex::new(x1, y0, s1, t0, color)); + self.font_vertices + .push(DebugFontVertex::new(x0, y1, s0, t1, color)); + self.font_vertices + .push(DebugFontVertex::new(x1, y1, s1, t1, color)); + + self.font_indices.push(vertex_count + 0); + self.font_indices.push(vertex_count + 1); + self.font_indices.push(vertex_count + 2); + self.font_indices.push(vertex_count + 2); + self.font_indices.push(vertex_count + 1); + self.font_indices.push(vertex_count + 3); + + min_x = min_x.min(x0); + max_x = max_x.max(x1); + min_y = min_y.min(y0); + max_y = max_y.max(y1); + } + } + + Rect::new( + Point2D::new(min_x, min_y), + Size2D::new(max_x - min_x, max_y - min_y), + ) + } + + pub fn add_quad( + &mut self, + x0: f32, + y0: f32, + x1: f32, + y1: f32, + color_top: ColorU, + color_bottom: ColorU, + ) { + let vertex_count = self.tri_vertices.len() as u32; + + self.tri_vertices + .push(DebugColorVertex::new(x0, y0, color_top)); + self.tri_vertices + .push(DebugColorVertex::new(x1, y0, color_top)); + self.tri_vertices + .push(DebugColorVertex::new(x0, y1, color_bottom)); + self.tri_vertices + .push(DebugColorVertex::new(x1, y1, color_bottom)); + + self.tri_indices.push(vertex_count + 0); + self.tri_indices.push(vertex_count + 1); + self.tri_indices.push(vertex_count + 2); + self.tri_indices.push(vertex_count + 2); + self.tri_indices.push(vertex_count + 1); + self.tri_indices.push(vertex_count + 3); + } + + #[allow(dead_code)] + pub fn add_line(&mut self, x0: i32, y0: i32, color0: ColorU, x1: i32, y1: i32, color1: ColorU) { + self.line_vertices + .push(DebugColorVertex::new(x0 as f32, y0 as f32, color0)); + self.line_vertices + .push(DebugColorVertex::new(x1 as f32, y1 as f32, color1)); + } + + + pub fn add_rect(&mut self, rect: &DeviceIntRect, color: ColorU) { + let p0 = rect.min; + let p1 = rect.max; + self.add_line(p0.x, p0.y, color, p1.x, p0.y, color); + self.add_line(p1.x, p0.y, color, p1.x, p1.y, color); + self.add_line(p1.x, p1.y, color, p0.x, p1.y, color); + self.add_line(p0.x, p1.y, color, p0.x, p0.y, color); + } + + pub fn render( + &mut self, + device: &mut Device, + viewport_size: Option<DeviceIntSize>, + scale: f32, + surface_origin_is_top_left: bool, + ) { + if let Some(viewport_size) = viewport_size { + device.disable_depth(); + device.set_blend(true); + device.set_blend_mode_premultiplied_alpha(); + + let (bottom, top) = if surface_origin_is_top_left { + (0.0, viewport_size.height as f32 * scale) + } else { + (viewport_size.height as f32 * scale, 0.0) + }; + + let projection = Transform3D::ortho( + 0.0, + viewport_size.width as f32 * scale, + bottom, + top, + device.ortho_near_plane(), + device.ortho_far_plane(), + ); + + // Triangles + if !self.tri_vertices.is_empty() { + device.bind_program(&self.color_program); + device.set_uniforms(&self.color_program, &projection); + device.bind_vao(&self.tri_vao); + device.update_vao_indices(&self.tri_vao, &self.tri_indices, VertexUsageHint::Dynamic); + device.update_vao_main_vertices( + &self.tri_vao, + &self.tri_vertices, + VertexUsageHint::Dynamic, + ); + device.draw_triangles_u32(0, self.tri_indices.len() as i32); + } + + // Lines + if !self.line_vertices.is_empty() { + device.bind_program(&self.color_program); + device.set_uniforms(&self.color_program, &projection); + device.bind_vao(&self.line_vao); + device.update_vao_main_vertices( + &self.line_vao, + &self.line_vertices, + VertexUsageHint::Dynamic, + ); + device.draw_nonindexed_lines(0, self.line_vertices.len() as i32); + } + + // Glyph + if !self.font_indices.is_empty() { + device.bind_program(&self.font_program); + device.set_uniforms(&self.font_program, &projection); + device.bind_texture(DebugSampler::Font, &self.font_texture, Swizzle::default()); + device.bind_vao(&self.font_vao); + device.update_vao_indices(&self.font_vao, &self.font_indices, VertexUsageHint::Dynamic); + device.update_vao_main_vertices( + &self.font_vao, + &self.font_vertices, + VertexUsageHint::Dynamic, + ); + device.draw_triangles_u32(0, self.font_indices.len() as i32); + } + } + + self.font_indices.clear(); + self.font_vertices.clear(); + self.line_vertices.clear(); + self.tri_vertices.clear(); + self.tri_indices.clear(); + } +} + +pub struct LazyInitializedDebugRenderer { + debug_renderer: Option<DebugRenderer>, + failed: bool, +} + +impl LazyInitializedDebugRenderer { + pub fn new() -> Self { + Self { + debug_renderer: None, + failed: false, + } + } + + pub fn get_mut<'a>(&'a mut self, device: &mut Device) -> Option<&'a mut DebugRenderer> { + if self.failed { + return None; + } + if self.debug_renderer.is_none() { + match DebugRenderer::new(device) { + Ok(renderer) => { self.debug_renderer = Some(renderer); } + Err(_) => { + // The shader compilation code already logs errors. + self.failed = true; + } + } + } + + self.debug_renderer.as_mut() + } + + /// Returns mut ref to `debug::DebugRenderer` if one already exists, otherwise returns `None`. + pub fn try_get_mut<'a>(&'a mut self) -> Option<&'a mut DebugRenderer> { + self.debug_renderer.as_mut() + } + + pub fn deinit(self, device: &mut Device) { + if let Some(debug_renderer) = self.debug_renderer { + debug_renderer.deinit(device); + } + } +} diff --git a/gfx/wr/webrender/src/renderer/gpu_buffer.rs b/gfx/wr/webrender/src/renderer/gpu_buffer.rs new file mode 100644 index 0000000000..58a8aa6cbd --- /dev/null +++ b/gfx/wr/webrender/src/renderer/gpu_buffer.rs @@ -0,0 +1,328 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + + TODO: + Recycle GpuBuffers in a pool (support return from render thread) + Efficiently allow writing to buffer (better push interface) + Support other texel types (e.g. i32) + + */ + +use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH; +use api::units::{DeviceIntRect, DeviceIntSize, LayoutRect, PictureRect, DeviceRect}; +use api::{PremultipliedColorF}; +use crate::device::Texel; +use crate::render_task_graph::{RenderTaskGraph, RenderTaskId}; + + +unsafe impl Texel for GpuBufferBlock {} + +/// A single texel in RGBAF32 texture - 16 bytes. +#[derive(Copy, Clone, Debug, MallocSizeOf)] +#[cfg_attr(feature = "capture", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub struct GpuBufferBlock { + data: [f32; 4], +} + +#[derive(Copy, Debug, Clone, MallocSizeOf, Eq, PartialEq)] +#[cfg_attr(feature = "capture", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub struct GpuBufferAddress { + pub u: u16, + pub v: u16, +} + +impl GpuBufferAddress { + #[allow(dead_code)] + pub fn as_int(self) -> i32 { + // TODO(gw): Temporarily encode GPU Cache addresses as a single int. + // In the future, we can change the PrimitiveInstanceData struct + // to use 2x u16 for the vertex attribute instead of an i32. + self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32 + } + + pub const INVALID: GpuBufferAddress = GpuBufferAddress { u: !0, v: !0 }; +} + +impl GpuBufferBlock { + pub const EMPTY: Self = GpuBufferBlock { data: [0.0; 4] }; +} + +impl Into<GpuBufferBlock> for LayoutRect { + fn into(self) -> GpuBufferBlock { + GpuBufferBlock { + data: [ + self.min.x, + self.min.y, + self.max.x, + self.max.y, + ], + } + } +} + +impl Into<GpuBufferBlock> for PictureRect { + fn into(self) -> GpuBufferBlock { + GpuBufferBlock { + data: [ + self.min.x, + self.min.y, + self.max.x, + self.max.y, + ], + } + } +} + +impl Into<GpuBufferBlock> for DeviceRect { + fn into(self) -> GpuBufferBlock { + GpuBufferBlock { + data: [ + self.min.x, + self.min.y, + self.max.x, + self.max.y, + ], + } + } +} + +impl Into<GpuBufferBlock> for PremultipliedColorF { + fn into(self) -> GpuBufferBlock { + GpuBufferBlock { + data: [ + self.r, + self.g, + self.b, + self.a, + ], + } + } +} + +impl Into<GpuBufferBlock> for DeviceIntRect { + fn into(self) -> GpuBufferBlock { + GpuBufferBlock { + data: [ + self.min.x as f32, + self.min.y as f32, + self.max.x as f32, + self.max.y as f32, + ], + } + } +} + +impl Into<GpuBufferBlock> for [f32; 4] { + fn into(self) -> GpuBufferBlock { + GpuBufferBlock { + data: self, + } + } +} + +/// Record a patch to the GPU buffer for a render task +struct DeferredBlock { + task_id: RenderTaskId, + index: usize, +} + +/// Interface to allow writing multiple GPU blocks, possibly of different types +pub struct GpuBufferWriter<'a> { + buffer: &'a mut Vec<GpuBufferBlock>, + deferred: &'a mut Vec<DeferredBlock>, + index: usize, + block_count: usize, +} + +impl<'a> GpuBufferWriter<'a> { + fn new( + buffer: &'a mut Vec<GpuBufferBlock>, + deferred: &'a mut Vec<DeferredBlock>, + index: usize, + block_count: usize, + ) -> Self { + GpuBufferWriter { + buffer, + deferred, + index, + block_count, + } + } + + /// Push one (16 byte) block of data in to the writer + pub fn push_one<B>(&mut self, block: B) where B: Into<GpuBufferBlock> { + self.buffer.push(block.into()); + } + + /// Push a reference to a render task in to the writer. Once the render + /// task graph is resolved, this will be patched with the UV rect of the task + pub fn push_render_task(&mut self, task_id: RenderTaskId) { + self.deferred.push(DeferredBlock { + task_id, + index: self.buffer.len(), + }); + self.buffer.push(GpuBufferBlock::EMPTY); + } + + /// Close this writer, returning the GPU address of this set of block(s). + pub fn finish(self) -> GpuBufferAddress { + assert_eq!(self.buffer.len(), self.index + self.block_count); + + GpuBufferAddress { + u: (self.index % MAX_VERTEX_TEXTURE_WIDTH) as u16, + v: (self.index / MAX_VERTEX_TEXTURE_WIDTH) as u16, + } + } +} + +impl<'a> Drop for GpuBufferWriter<'a> { + fn drop(&mut self) { + assert_eq!(self.buffer.len(), self.index + self.block_count, "Claimed block_count was not written"); + } +} + +pub struct GpuBufferBuilder { + data: Vec<GpuBufferBlock>, + deferred: Vec<DeferredBlock>, +} + +impl GpuBufferBuilder { + pub fn new() -> Self { + GpuBufferBuilder { + data: Vec::new(), + deferred: Vec::new(), + } + } + + #[allow(dead_code)] + pub fn push( + &mut self, + blocks: &[GpuBufferBlock], + ) -> GpuBufferAddress { + assert!(blocks.len() <= MAX_VERTEX_TEXTURE_WIDTH); + + if (self.data.len() % MAX_VERTEX_TEXTURE_WIDTH) + blocks.len() > MAX_VERTEX_TEXTURE_WIDTH { + while self.data.len() % MAX_VERTEX_TEXTURE_WIDTH != 0 { + self.data.push(GpuBufferBlock::EMPTY); + } + } + + let index = self.data.len(); + + self.data.extend_from_slice(blocks); + + GpuBufferAddress { + u: (index % MAX_VERTEX_TEXTURE_WIDTH) as u16, + v: (index / MAX_VERTEX_TEXTURE_WIDTH) as u16, + } + } + + /// Begin writing a specific number of blocks + pub fn write_blocks( + &mut self, + block_count: usize, + ) -> GpuBufferWriter { + assert!(block_count <= MAX_VERTEX_TEXTURE_WIDTH); + + if (self.data.len() % MAX_VERTEX_TEXTURE_WIDTH) + block_count > MAX_VERTEX_TEXTURE_WIDTH { + while self.data.len() % MAX_VERTEX_TEXTURE_WIDTH != 0 { + self.data.push(GpuBufferBlock::EMPTY); + } + } + + let index = self.data.len(); + + GpuBufferWriter::new( + &mut self.data, + &mut self.deferred, + index, + block_count, + ) + } + + pub fn finalize( + mut self, + render_tasks: &RenderTaskGraph, + ) -> GpuBuffer { + let required_len = (self.data.len() + MAX_VERTEX_TEXTURE_WIDTH-1) & !(MAX_VERTEX_TEXTURE_WIDTH-1); + + for _ in 0 .. required_len - self.data.len() { + self.data.push(GpuBufferBlock::EMPTY); + } + + let len = self.data.len(); + assert!(len % MAX_VERTEX_TEXTURE_WIDTH == 0); + + // At this point, we know that the render task graph has been built, and we can + // query the location of any dynamic (render target) or static (texture cache) + // task. This allows us to patch the UV rects in to the GPU buffer before upload + // to the GPU. + for block in self.deferred.drain(..) { + let render_task = &render_tasks[block.task_id]; + let target_rect = render_task.get_target_rect(); + self.data[block.index] = target_rect.into(); + } + + GpuBuffer { + data: self.data, + size: DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as i32, (len / MAX_VERTEX_TEXTURE_WIDTH) as i32), + } + } +} + +#[cfg_attr(feature = "capture", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub struct GpuBuffer { + pub data: Vec<GpuBufferBlock>, + pub size: DeviceIntSize, +} + +impl GpuBuffer { + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } +} + + +#[test] +fn test_gpu_buffer_sizing_push() { + let render_task_graph = RenderTaskGraph::new_for_testing(); + let mut builder = GpuBufferBuilder::new(); + + let row = vec![GpuBufferBlock::EMPTY; MAX_VERTEX_TEXTURE_WIDTH]; + builder.push(&row); + + builder.push(&[GpuBufferBlock::EMPTY]); + builder.push(&[GpuBufferBlock::EMPTY]); + + let buffer = builder.finalize(&render_task_graph); + assert_eq!(buffer.data.len(), MAX_VERTEX_TEXTURE_WIDTH * 2); +} + +#[test] +fn test_gpu_buffer_sizing_writer() { + let render_task_graph = RenderTaskGraph::new_for_testing(); + let mut builder = GpuBufferBuilder::new(); + + let mut writer = builder.write_blocks(MAX_VERTEX_TEXTURE_WIDTH); + for _ in 0 .. MAX_VERTEX_TEXTURE_WIDTH { + writer.push_one(GpuBufferBlock::EMPTY); + } + writer.finish(); + + let mut writer = builder.write_blocks(1); + writer.push_one(GpuBufferBlock::EMPTY); + writer.finish(); + + let mut writer = builder.write_blocks(1); + writer.push_one(GpuBufferBlock::EMPTY); + writer.finish(); + + let buffer = builder.finalize(&render_task_graph); + assert_eq!(buffer.data.len(), MAX_VERTEX_TEXTURE_WIDTH * 2); +} diff --git a/gfx/wr/webrender/src/renderer/gpu_cache.rs b/gfx/wr/webrender/src/renderer/gpu_cache.rs new file mode 100644 index 0000000000..fde649cb08 --- /dev/null +++ b/gfx/wr/webrender/src/renderer/gpu_cache.rs @@ -0,0 +1,525 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use std::{cmp, mem}; +use api::units::*; +use malloc_size_of::MallocSizeOfOps; +use crate::{ + device::{CustomVAO, Device, DrawTarget, Program, ReadTarget, Texture, TextureFilter, UploadPBOPool, VBO}, + gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList}, + internal_types::{RenderTargetInfo, Swizzle}, + prim_store::DeferredResolve, + profiler, + render_api::MemoryReport, + internal_types::FrameId, +}; + +/// Enabling this toggle would force the GPU cache scattered texture to +/// be resized every frame, which enables GPU debuggers to see if this +/// is performed correctly. +const GPU_CACHE_RESIZE_TEST: bool = false; + +/// Tracks the state of each row in the GPU cache texture. +struct CacheRow { + /// Mirrored block data on CPU for this row. We store a copy of + /// the data on the CPU side to improve upload batching. + cpu_blocks: Box<[GpuBlockData; super::MAX_VERTEX_TEXTURE_WIDTH]>, + /// The first offset in this row that is dirty. + min_dirty: u16, + /// The last offset in this row that is dirty. + max_dirty: u16, +} + +impl CacheRow { + fn new() -> Self { + CacheRow { + cpu_blocks: Box::new([GpuBlockData::EMPTY; super::MAX_VERTEX_TEXTURE_WIDTH]), + min_dirty: super::MAX_VERTEX_TEXTURE_WIDTH as _, + max_dirty: 0, + } + } + + fn is_dirty(&self) -> bool { + return self.min_dirty < self.max_dirty; + } + + fn clear_dirty(&mut self) { + self.min_dirty = super::MAX_VERTEX_TEXTURE_WIDTH as _; + self.max_dirty = 0; + } + + fn add_dirty(&mut self, block_offset: usize, block_count: usize) { + self.min_dirty = self.min_dirty.min(block_offset as _); + self.max_dirty = self.max_dirty.max((block_offset + block_count) as _); + } + + fn dirty_blocks(&self) -> &[GpuBlockData] { + return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize]; + } +} + +/// The bus over which CPU and GPU versions of the GPU cache +/// get synchronized. +enum GpuCacheBus { + /// PBO-based updates, currently operate on a row granularity. + /// Therefore, are subject to fragmentation issues. + PixelBuffer { + /// Per-row data. + rows: Vec<CacheRow>, + }, + /// Shader-based scattering updates. Currently rendered by a set + /// of points into the GPU texture, each carrying a `GpuBlockData`. + Scatter { + /// Special program to run the scattered update. + program: Program, + /// VAO containing the source vertex buffers. + vao: CustomVAO, + /// VBO for positional data, supplied as normalized `u16`. + buf_position: VBO<[u16; 2]>, + /// VBO for gpu block data. + buf_value: VBO<GpuBlockData>, + /// Currently stored block count. + count: usize, + }, +} + +/// The device-specific representation of the cache texture in gpu_cache.rs +pub struct GpuCacheTexture { + texture: Option<Texture>, + bus: GpuCacheBus, +} + +impl GpuCacheTexture { + /// Ensures that we have an appropriately-sized texture. + fn ensure_texture(&mut self, device: &mut Device, height: i32) { + // If we already have a texture that works, we're done. + if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) { + if GPU_CACHE_RESIZE_TEST { + // Special debug mode - resize the texture even though it's fine. + } else { + return; + } + } + + // Take the old texture, if any. + let blit_source = self.texture.take(); + + // Create the new texture. + assert!(height >= 2, "Height is too small for ANGLE"); + let new_size = DeviceIntSize::new(super::MAX_VERTEX_TEXTURE_WIDTH as _, height); + // GpuCacheBus::Scatter always requires the texture to be a render target. For + // GpuCacheBus::PixelBuffer, we only create the texture with a render target if + // RGBAF32 render targets are actually supported, and only if glCopyImageSubData + // is not. glCopyImageSubData does not require a render target to copy the texture + // data, and if neither RGBAF32 render targets nor glCopyImageSubData is supported, + // we simply re-upload the entire contents rather than copying upon resize. + let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data; + let supports_color_buffer_float = device.get_capabilities().supports_color_buffer_float; + let rt_info = if matches!(self.bus, GpuCacheBus::PixelBuffer { .. }) + && (supports_copy_image_sub_data || !supports_color_buffer_float) + { + None + } else { + Some(RenderTargetInfo { has_depth: false }) + }; + let mut texture = device.create_texture( + api::ImageBufferKind::Texture2D, + api::ImageFormat::RGBAF32, + new_size.width, + new_size.height, + TextureFilter::Nearest, + rt_info, + ); + + // Copy the contents of the previous texture, if applicable. + if let Some(blit_source) = blit_source { + if !supports_copy_image_sub_data && !supports_color_buffer_float { + // Cannot copy texture, so must re-upload everything. + match self.bus { + GpuCacheBus::PixelBuffer { ref mut rows } => { + for row in rows { + row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH); + } + } + GpuCacheBus::Scatter { .. } => { + panic!("Texture must be copyable to use scatter GPU cache bus method"); + } + } + } else { + device.copy_entire_texture(&mut texture, &blit_source); + } + device.delete_texture(blit_source); + } + + self.texture = Some(texture); + } + + pub fn new(device: &mut Device, use_scatter: bool) -> Result<Self, super::RendererError> { + use super::desc::GPU_CACHE_UPDATE; + + let bus = if use_scatter { + assert!( + device.get_capabilities().supports_color_buffer_float, + "GpuCache scatter method requires EXT_color_buffer_float", + ); + let program = device.create_program_linked( + "gpu_cache_update", + &[], + &GPU_CACHE_UPDATE, + )?; + let buf_position = device.create_vbo(); + let buf_value = device.create_vbo(); + //Note: the vertex attributes have to be supplied in the same order + // as for program creation, but each assigned to a different stream. + let vao = device.create_custom_vao(&[ + buf_position.stream_with(&GPU_CACHE_UPDATE.vertex_attributes[0..1]), + buf_value .stream_with(&GPU_CACHE_UPDATE.vertex_attributes[1..2]), + ]); + GpuCacheBus::Scatter { + program, + vao, + buf_position, + buf_value, + count: 0, + } + } else { + GpuCacheBus::PixelBuffer { + rows: Vec::new(), + } + }; + + Ok(GpuCacheTexture { + texture: None, + bus, + }) + } + + pub fn deinit(mut self, device: &mut Device) { + if let Some(t) = self.texture.take() { + device.delete_texture(t); + } + if let GpuCacheBus::Scatter { program, vao, buf_position, buf_value, .. } = self.bus { + device.delete_program(program); + device.delete_custom_vao(vao); + device.delete_vbo(buf_position); + device.delete_vbo(buf_value); + } + } + + pub fn get_height(&self) -> i32 { + self.texture.as_ref().map_or(0, |t| t.get_dimensions().height) + } + + #[cfg(feature = "capture")] + pub fn get_texture(&self) -> &Texture { + self.texture.as_ref().unwrap() + } + + fn prepare_for_updates( + &mut self, + device: &mut Device, + total_block_count: usize, + max_height: i32, + ) { + self.ensure_texture(device, max_height); + match self.bus { + GpuCacheBus::PixelBuffer { .. } => {}, + GpuCacheBus::Scatter { + ref mut buf_position, + ref mut buf_value, + ref mut count, + .. + } => { + *count = 0; + if total_block_count > buf_value.allocated_count() { + device.allocate_vbo(buf_position, total_block_count, super::ONE_TIME_USAGE_HINT); + device.allocate_vbo(buf_value, total_block_count, super::ONE_TIME_USAGE_HINT); + } + } + } + } + + pub fn invalidate(&mut self) { + match self.bus { + GpuCacheBus::PixelBuffer { ref mut rows, .. } => { + info!("Invalidating GPU caches"); + for row in rows { + row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH); + } + } + GpuCacheBus::Scatter { .. } => { + warn!("Unable to invalidate scattered GPU cache"); + } + } + } + + fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) { + match self.bus { + GpuCacheBus::PixelBuffer { ref mut rows, .. } => { + for update in &updates.updates { + match *update { + GpuCacheUpdate::Copy { + block_index, + block_count, + address, + } => { + let row = address.v as usize; + + // Ensure that the CPU-side shadow copy of the GPU cache data has enough + // rows to apply this patch. + while rows.len() <= row { + // Add a new row. + rows.push(CacheRow::new()); + } + + // Copy the blocks from the patch array in the shadow CPU copy. + let block_offset = address.u as usize; + let data = &mut rows[row].cpu_blocks; + for i in 0 .. block_count { + data[block_offset + i] = updates.blocks[block_index + i]; + } + + // This row is dirty (needs to be updated in GPU texture). + rows[row].add_dirty(block_offset, block_count); + } + } + } + } + GpuCacheBus::Scatter { + ref buf_position, + ref buf_value, + ref mut count, + .. + } => { + //TODO: re-use this heap allocation + // Unused positions will be left as 0xFFFF, which translates to + // (1.0, 1.0) in the vertex output position and gets culled out + let mut position_data = vec![[!0u16; 2]; updates.blocks.len()]; + let size = self.texture.as_ref().unwrap().get_dimensions().to_usize(); + + for update in &updates.updates { + match *update { + GpuCacheUpdate::Copy { + block_index, + block_count, + address, + } => { + // Convert the absolute texel position into normalized + let y = ((2*address.v as usize + 1) << 15) / size.height; + for i in 0 .. block_count { + let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width; + position_data[block_index + i] = [x as _, y as _]; + } + } + } + } + + device.fill_vbo(buf_value, &updates.blocks, *count); + device.fill_vbo(buf_position, &position_data, *count); + *count += position_data.len(); + } + } + } + + fn flush(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool) -> usize { + let texture = self.texture.as_ref().unwrap(); + match self.bus { + GpuCacheBus::PixelBuffer { ref mut rows } => { + let rows_dirty = rows + .iter() + .filter(|row| row.is_dirty()) + .count(); + if rows_dirty == 0 { + return 0 + } + + let mut uploader = device.upload_texture(pbo_pool); + + for (row_index, row) in rows.iter_mut().enumerate() { + if !row.is_dirty() { + continue; + } + + let blocks = row.dirty_blocks(); + let rect = DeviceIntRect::from_origin_and_size( + DeviceIntPoint::new(row.min_dirty as i32, row_index as i32), + DeviceIntSize::new(blocks.len() as i32, 1), + ); + + uploader.upload(device, texture, rect, None, None, blocks.as_ptr(), blocks.len()); + + row.clear_dirty(); + } + + uploader.flush(device); + + rows_dirty + } + GpuCacheBus::Scatter { ref program, ref vao, count, .. } => { + device.disable_depth(); + device.set_blend(false); + device.bind_program(program); + device.bind_custom_vao(vao); + device.bind_draw_target( + DrawTarget::from_texture( + texture, + false, + ), + ); + device.draw_nonindexed_points(0, count as _); + 0 + } + } + } + + #[cfg(feature = "replay")] + pub fn remove_texture(&mut self, device: &mut Device) { + if let Some(t) = self.texture.take() { + device.delete_texture(t); + } + } + + #[cfg(feature = "replay")] + pub fn load_from_data(&mut self, texture: Texture, data: Vec<u8>) { + assert!(self.texture.is_none()); + match self.bus { + GpuCacheBus::PixelBuffer { ref mut rows, .. } => { + let dim = texture.get_dimensions(); + let blocks = unsafe { + std::slice::from_raw_parts( + data.as_ptr() as *const GpuBlockData, + data.len() / mem::size_of::<GpuBlockData>(), + ) + }; + // fill up the CPU cache from the contents we just loaded + rows.clear(); + rows.extend((0 .. dim.height).map(|_| CacheRow::new())); + let chunks = blocks.chunks(super::MAX_VERTEX_TEXTURE_WIDTH); + debug_assert_eq!(chunks.len(), rows.len()); + for (row, chunk) in rows.iter_mut().zip(chunks) { + row.cpu_blocks.copy_from_slice(chunk); + } + } + GpuCacheBus::Scatter { .. } => {} + } + self.texture = Some(texture); + } + + pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) { + if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.bus { + for row in rows.iter() { + report.gpu_cache_cpu_mirror += unsafe { (size_op_funs.size_of_op)(row.cpu_blocks.as_ptr() as *const _) }; + } + } + + // GPU cache GPU memory. + report.gpu_cache_textures += + self.texture.as_ref().map_or(0, |t| t.size_in_bytes()); + } +} + +impl super::Renderer { + pub fn update_gpu_cache(&mut self) { + let _gm = self.gpu_profiler.start_marker("gpu cache update"); + + // For an artificial stress test of GPU cache resizing, + // always pass an extra update list with at least one block in it. + let gpu_cache_height = self.gpu_cache_texture.get_height(); + if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST { + self.pending_gpu_cache_updates.push(GpuCacheUpdateList { + frame_id: FrameId::INVALID, + clear: false, + height: gpu_cache_height, + blocks: vec![[1f32; 4].into()], + updates: Vec::new(), + debug_commands: Vec::new(), + }); + } + + let (updated_blocks, max_requested_height) = self + .pending_gpu_cache_updates + .iter() + .fold((0, gpu_cache_height), |(count, height), list| { + (count + list.blocks.len(), cmp::max(height, list.height)) + }); + + if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow { + self.gpu_cache_overflow = true; + self.renderer_errors.push(super::RendererError::MaxTextureSize); + } + + // Note: if we decide to switch to scatter-style GPU cache update + // permanently, we can have this code nicer with `BufferUploader` kind + // of helper, similarly to how `TextureUploader` API is used. + self.gpu_cache_texture.prepare_for_updates( + &mut self.device, + updated_blocks, + max_requested_height, + ); + + for update_list in self.pending_gpu_cache_updates.drain(..) { + assert!(update_list.height <= max_requested_height); + if update_list.frame_id > self.gpu_cache_frame_id { + self.gpu_cache_frame_id = update_list.frame_id + } + self.gpu_cache_texture + .update(&mut self.device, &update_list); + } + + self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME); + let updated_rows = self.gpu_cache_texture.flush( + &mut self.device, + &mut self.texture_upload_pbo_pool + ); + self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME); + + self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows); + self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks); + } + + pub fn prepare_gpu_cache( + &mut self, + deferred_resolves: &[DeferredResolve], + ) -> Result<(), super::RendererError> { + if self.pending_gpu_cache_clear { + let use_scatter = + matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. }); + let new_cache = GpuCacheTexture::new(&mut self.device, use_scatter)?; + let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache); + old_cache.deinit(&mut self.device); + self.pending_gpu_cache_clear = false; + } + + let deferred_update_list = self.update_deferred_resolves(deferred_resolves); + self.pending_gpu_cache_updates.extend(deferred_update_list); + + self.update_gpu_cache(); + + // Note: the texture might have changed during the `update`, + // so we need to bind it here. + self.device.bind_texture( + super::TextureSampler::GpuCache, + self.gpu_cache_texture.texture.as_ref().unwrap(), + Swizzle::default(), + ); + + Ok(()) + } + + pub fn read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>) { + let texture = self.gpu_cache_texture.texture.as_ref().unwrap(); + let size = device_size_as_framebuffer_size(texture.get_dimensions()); + let mut texels = vec![0; (size.width * size.height * 16) as usize]; + self.device.begin_frame(); + self.device.bind_read_target(ReadTarget::from_texture(texture)); + self.device.read_pixels_into( + size.into(), + api::ImageFormat::RGBAF32, + &mut texels, + ); + self.device.reset_read_target(); + self.device.end_frame(); + (texture.get_dimensions(), texels) + } +} diff --git a/gfx/wr/webrender/src/renderer/init.rs b/gfx/wr/webrender/src/renderer/init.rs new file mode 100644 index 0000000000..ac7498cc9d --- /dev/null +++ b/gfx/wr/webrender/src/renderer/init.rs @@ -0,0 +1,788 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use api::{BlobImageHandler, ColorF, IdNamespace, DocumentId, CrashAnnotator}; +use api::{VoidPtrToSizeFn, FontRenderMode, ImageFormat}; +use api::{RenderNotifier, ImageBufferKind}; +use api::units::*; +use api::channel::unbounded_channel; +pub use api::DebugFlags; + +use crate::render_api::{RenderApiSender, FrameMsg}; +use crate::composite::{CompositorKind, CompositorConfig}; +use crate::device::{ + UploadMethod, UploadPBOPool, VertexUsageHint, Device, ProgramCache, TextureFilter +}; +use crate::frame_builder::FrameBuilderConfig; +use crate::glyph_cache::GlyphCache; +use glyph_rasterizer::{GlyphRasterizer, SharedFontResources}; +use crate::gpu_types::PrimitiveInstanceData; +use crate::internal_types::{FastHashMap, FastHashSet, FrameId}; +use crate::picture; +use crate::profiler::{self, Profiler, TransactionProfile}; +use crate::device::query::{GpuProfiler, GpuDebugMethod}; +use crate::render_backend::RenderBackend; +use crate::resource_cache::ResourceCache; +use crate::scene_builder_thread::{SceneBuilderThread, SceneBuilderThreadChannels, LowPrioritySceneBuilderThread}; +use crate::texture_cache::{TextureCache, TextureCacheConfig}; +use crate::picture_textures::PictureTextures; +use crate::renderer::{ + debug, gpu_cache, vertex, gl, + Renderer, DebugOverlayState, BufferDamageTracker, PipelineInfo, TextureResolver, + RendererError, ShaderPrecacheFlags, VERTEX_DATA_TEXTURE_COUNT, + upload::UploadTexturePool, + shade::{Shaders, SharedShaders}, +}; + +use std::{ + mem, + thread, + cell::RefCell, + collections::VecDeque, + rc::Rc, + sync::{Arc, atomic::{AtomicBool, Ordering}}, + num::NonZeroUsize, + path::PathBuf, +}; + +use tracy_rs::register_thread_with_profiler; +use rayon::{ThreadPool, ThreadPoolBuilder}; +use malloc_size_of::MallocSizeOfOps; + +/// Use this hint for all vertex data re-initialization. This allows +/// the driver to better re-use RBOs internally. +pub const ONE_TIME_USAGE_HINT: VertexUsageHint = VertexUsageHint::Stream; + +/// Is only false if no WR instances have ever been created. +static HAS_BEEN_INITIALIZED: AtomicBool = AtomicBool::new(false); + +/// Returns true if a WR instance has ever been initialized in this process. +pub fn wr_has_been_initialized() -> bool { + HAS_BEEN_INITIALIZED.load(Ordering::SeqCst) +} + +/// Allows callers to hook in at certain points of the async scene build. These +/// functions are all called from the scene builder thread. +pub trait SceneBuilderHooks { + /// This is called exactly once, when the scene builder thread is started + /// and before it processes anything. + fn register(&self); + /// This is called before each scene build starts. + fn pre_scene_build(&self); + /// This is called before each scene swap occurs. + fn pre_scene_swap(&self); + /// This is called after each scene swap occurs. The PipelineInfo contains + /// the updated epochs and pipelines removed in the new scene compared to + /// the old scene. + fn post_scene_swap(&self, document_id: &Vec<DocumentId>, info: PipelineInfo); + /// This is called after a resource update operation on the scene builder + /// thread, in the case where resource updates were applied without a scene + /// build. + fn post_resource_update(&self, document_ids: &Vec<DocumentId>); + /// This is called after a scene build completes without any changes being + /// made. We guarantee that each pre_scene_build call will be matched with + /// exactly one of post_scene_swap, post_resource_update or + /// post_empty_scene_build. + fn post_empty_scene_build(&self); + /// This is a generic callback which provides an opportunity to run code + /// on the scene builder thread. This is called as part of the main message + /// loop of the scene builder thread, but outside of any specific message + /// handler. + fn poke(&self); + /// This is called exactly once, when the scene builder thread is about to + /// terminate. + fn deregister(&self); +} + +/// Allows callers to hook into the main render_backend loop and provide +/// additional frame ops for generate_frame transactions. These functions +/// are all called from the render backend thread. +pub trait AsyncPropertySampler { + /// This is called exactly once, when the render backend thread is started + /// and before it processes anything. + fn register(&self); + /// This is called for each transaction with the generate_frame flag set + /// (i.e. that will trigger a render). The list of frame messages returned + /// are processed as though they were part of the original transaction. + fn sample(&self, document_id: DocumentId, generated_frame_id: Option<u64>) -> Vec<FrameMsg>; + /// This is called exactly once, when the render backend thread is about to + /// terminate. + fn deregister(&self); +} + +pub struct WebRenderOptions { + pub resource_override_path: Option<PathBuf>, + /// Whether to use shaders that have been optimized at build time. + pub use_optimized_shaders: bool, + pub enable_aa: bool, + pub enable_dithering: bool, + pub max_recorded_profiles: usize, + pub precache_flags: ShaderPrecacheFlags, + /// Enable sub-pixel anti-aliasing if a fast implementation is available. + pub enable_subpixel_aa: bool, + pub clear_color: ColorF, + pub enable_clear_scissor: Option<bool>, + pub max_internal_texture_size: Option<i32>, + pub image_tiling_threshold: i32, + pub upload_method: UploadMethod, + /// The default size in bytes for PBOs used to upload texture data. + pub upload_pbo_default_size: usize, + pub batched_upload_threshold: i32, + pub workers: Option<Arc<ThreadPool>>, + pub enable_multithreading: bool, + pub blob_image_handler: Option<Box<dyn BlobImageHandler>>, + pub crash_annotator: Option<Box<dyn CrashAnnotator>>, + pub size_of_op: Option<VoidPtrToSizeFn>, + pub enclosing_size_of_op: Option<VoidPtrToSizeFn>, + pub cached_programs: Option<Rc<ProgramCache>>, + pub debug_flags: DebugFlags, + pub renderer_id: Option<u64>, + pub scene_builder_hooks: Option<Box<dyn SceneBuilderHooks + Send>>, + pub sampler: Option<Box<dyn AsyncPropertySampler + Send>>, + pub support_low_priority_transactions: bool, + pub namespace_alloc_by_client: bool, + /// If namespaces are allocated by the client, then the namespace for fonts + /// must also be allocated by the client to avoid namespace collisions with + /// the backend. + pub shared_font_namespace: Option<IdNamespace>, + pub testing: bool, + /// Set to true if this GPU supports hardware fast clears as a performance + /// optimization. Likely requires benchmarking on various GPUs to see if + /// it is a performance win. The default is false, which tends to be best + /// performance on lower end / integrated GPUs. + pub gpu_supports_fast_clears: bool, + pub allow_dual_source_blending: bool, + pub allow_advanced_blend_equation: bool, + /// If true, allow textures to be initialized with glTexStorage. + /// This affects VRAM consumption and data upload paths. + pub allow_texture_storage_support: bool, + /// If true, we allow the data uploaded in a different format from the + /// one expected by the driver, pretending the format is matching, and + /// swizzling the components on all the shader sampling. + pub allow_texture_swizzling: bool, + /// Use `ps_clear` shader with batched quad rendering to clear the rects + /// in texture cache and picture cache tasks. + /// This helps to work around some Intel drivers + /// that incorrectly synchronize clears to following draws. + pub clear_caches_with_quads: bool, + /// Output the source of the shader with the given name. + pub dump_shader_source: Option<String>, + pub surface_origin_is_top_left: bool, + /// The configuration options defining how WR composites the final scene. + pub compositor_config: CompositorConfig, + pub enable_gpu_markers: bool, + /// If true, panic whenever a GL error occurs. This has a significant + /// performance impact, so only use when debugging specific problems! + pub panic_on_gl_error: bool, + pub picture_tile_size: Option<DeviceIntSize>, + pub texture_cache_config: TextureCacheConfig, + /// If true, we'll use instanced vertex attributes. Each instace is a quad. + /// If false, we'll duplicate the instance attributes per vertex and issue + /// regular indexed draws instead. + pub enable_instancing: bool, + /// If true, we'll reject contexts backed by a software rasterizer, except + /// Software WebRender. + pub reject_software_rasterizer: bool, + /// If enabled, pinch-zoom will apply the zoom factor during compositing + /// of picture cache tiles. This is higher performance (tiles are not + /// re-rasterized during zoom) but lower quality result. For most display + /// items, if the zoom factor is relatively small, bilinear filtering should + /// make the result look quite close to the high-quality zoom, except for glyphs. + pub low_quality_pinch_zoom: bool, + pub max_shared_surface_size: i32, +} + +impl WebRenderOptions { + /// Number of batches to look back in history for adding the current + /// transparent instance into. + const BATCH_LOOKBACK_COUNT: usize = 10; + + /// Since we are re-initializing the instance buffers on every draw call, + /// the driver has to internally manage PBOs in flight. + /// It's typically done by bucketing up to a specific limit, and then + /// just individually managing the largest buffers. + /// Having a limit here allows the drivers to more easily manage + /// the PBOs for us. + const MAX_INSTANCE_BUFFER_SIZE: usize = 0x20000; // actual threshold in macOS GL drivers +} + +impl Default for WebRenderOptions { + fn default() -> Self { + WebRenderOptions { + resource_override_path: None, + use_optimized_shaders: false, + enable_aa: true, + enable_dithering: false, + debug_flags: DebugFlags::empty(), + max_recorded_profiles: 0, + precache_flags: ShaderPrecacheFlags::empty(), + enable_subpixel_aa: false, + clear_color: ColorF::new(1.0, 1.0, 1.0, 1.0), + enable_clear_scissor: None, + max_internal_texture_size: None, + image_tiling_threshold: 4096, + // This is best as `Immediate` on Angle, or `Pixelbuffer(Dynamic)` on GL, + // but we are unable to make this decision here, so picking the reasonable medium. + upload_method: UploadMethod::PixelBuffer(ONE_TIME_USAGE_HINT), + upload_pbo_default_size: 512 * 512 * 4, + batched_upload_threshold: 512 * 512, + workers: None, + enable_multithreading: true, + blob_image_handler: None, + crash_annotator: None, + size_of_op: None, + enclosing_size_of_op: None, + renderer_id: None, + cached_programs: None, + scene_builder_hooks: None, + sampler: None, + support_low_priority_transactions: false, + namespace_alloc_by_client: false, + shared_font_namespace: None, + testing: false, + gpu_supports_fast_clears: false, + allow_dual_source_blending: true, + allow_advanced_blend_equation: false, + allow_texture_storage_support: true, + allow_texture_swizzling: true, + clear_caches_with_quads: true, + dump_shader_source: None, + surface_origin_is_top_left: false, + compositor_config: CompositorConfig::default(), + enable_gpu_markers: true, + panic_on_gl_error: false, + picture_tile_size: None, + texture_cache_config: TextureCacheConfig::DEFAULT, + // Disabling instancing means more vertex data to upload and potentially + // process by the vertex shaders. + enable_instancing: true, + reject_software_rasterizer: false, + low_quality_pinch_zoom: false, + max_shared_surface_size: 2048, + } + } +} + +/// Initializes WebRender and creates a `Renderer` and `RenderApiSender`. +/// +/// # Examples +/// Initializes a `Renderer` with some reasonable values. For more information see +/// [`WebRenderOptions`][WebRenderOptions]. +/// +/// ```rust,ignore +/// # use webrender::renderer::Renderer; +/// # use std::path::PathBuf; +/// let opts = webrender::WebRenderOptions { +/// device_pixel_ratio: 1.0, +/// resource_override_path: None, +/// enable_aa: false, +/// }; +/// let (renderer, sender) = Renderer::new(opts); +/// ``` +/// [WebRenderOptions]: struct.WebRenderOptions.html +pub fn create_webrender_instance( + gl: Rc<dyn gl::Gl>, + notifier: Box<dyn RenderNotifier>, + mut options: WebRenderOptions, + shaders: Option<&SharedShaders>, +) -> Result<(Renderer, RenderApiSender), RendererError> { + if !wr_has_been_initialized() { + // If the profiler feature is enabled, try to load the profiler shared library + // if the path was provided. + #[cfg(feature = "profiler")] + unsafe { + if let Ok(ref tracy_path) = std::env::var("WR_TRACY_PATH") { + let ok = tracy_rs::load(tracy_path); + info!("Load tracy from {} -> {}", tracy_path, ok); + } + } + + register_thread_with_profiler("Compositor".to_owned()); + } + + HAS_BEEN_INITIALIZED.store(true, Ordering::SeqCst); + + let (api_tx, api_rx) = unbounded_channel(); + let (result_tx, result_rx) = unbounded_channel(); + let gl_type = gl.get_type(); + + let mut device = Device::new( + gl, + options.crash_annotator.clone(), + options.resource_override_path.clone(), + options.use_optimized_shaders, + options.upload_method.clone(), + options.batched_upload_threshold, + options.cached_programs.take(), + options.allow_texture_storage_support, + options.allow_texture_swizzling, + options.dump_shader_source.take(), + options.surface_origin_is_top_left, + options.panic_on_gl_error, + ); + + let color_cache_formats = device.preferred_color_formats(); + let swizzle_settings = device.swizzle_settings(); + let use_dual_source_blending = + device.get_capabilities().supports_dual_source_blending && + options.allow_dual_source_blending; + let ext_blend_equation_advanced = + options.allow_advanced_blend_equation && + device.get_capabilities().supports_advanced_blend_equation; + let ext_blend_equation_advanced_coherent = + device.supports_extension("GL_KHR_blend_equation_advanced_coherent"); + + let enable_clear_scissor = options + .enable_clear_scissor + .unwrap_or(device.get_capabilities().prefers_clear_scissor); + + // 2048 is the minimum that the texture cache can work with. + const MIN_TEXTURE_SIZE: i32 = 2048; + let mut max_internal_texture_size = device.max_texture_size(); + if max_internal_texture_size < MIN_TEXTURE_SIZE { + // Broken GL contexts can return a max texture size of zero (See #1260). + // Better to gracefully fail now than panic as soon as a texture is allocated. + error!( + "Device reporting insufficient max texture size ({})", + max_internal_texture_size + ); + return Err(RendererError::MaxTextureSize); + } + if let Some(internal_limit) = options.max_internal_texture_size { + assert!(internal_limit >= MIN_TEXTURE_SIZE); + max_internal_texture_size = max_internal_texture_size.min(internal_limit); + } + + if options.reject_software_rasterizer { + let renderer_name_lc = device.get_capabilities().renderer_name.to_lowercase(); + if renderer_name_lc.contains("llvmpipe") || renderer_name_lc.contains("softpipe") || renderer_name_lc.contains("software rasterizer") { + return Err(RendererError::SoftwareRasterizer); + } + } + + let image_tiling_threshold = options.image_tiling_threshold + .min(max_internal_texture_size); + + device.begin_frame(); + + let shaders = match shaders { + Some(shaders) => Rc::clone(shaders), + None => Rc::new(RefCell::new(Shaders::new(&mut device, gl_type, &options)?)), + }; + + let dither_matrix_texture = if options.enable_dithering { + let dither_matrix: [u8; 64] = [ + 0, + 48, + 12, + 60, + 3, + 51, + 15, + 63, + 32, + 16, + 44, + 28, + 35, + 19, + 47, + 31, + 8, + 56, + 4, + 52, + 11, + 59, + 7, + 55, + 40, + 24, + 36, + 20, + 43, + 27, + 39, + 23, + 2, + 50, + 14, + 62, + 1, + 49, + 13, + 61, + 34, + 18, + 46, + 30, + 33, + 17, + 45, + 29, + 10, + 58, + 6, + 54, + 9, + 57, + 5, + 53, + 42, + 26, + 38, + 22, + 41, + 25, + 37, + 21, + ]; + + let texture = device.create_texture( + ImageBufferKind::Texture2D, + ImageFormat::R8, + 8, + 8, + TextureFilter::Nearest, + None, + ); + device.upload_texture_immediate(&texture, &dither_matrix); + + Some(texture) + } else { + None + }; + + let max_primitive_instance_count = + WebRenderOptions::MAX_INSTANCE_BUFFER_SIZE / mem::size_of::<PrimitiveInstanceData>(); + let vaos = vertex::RendererVAOs::new( + &mut device, + if options.enable_instancing { None } else { NonZeroUsize::new(max_primitive_instance_count) }, + ); + + let texture_upload_pbo_pool = UploadPBOPool::new(&mut device, options.upload_pbo_default_size); + let staging_texture_pool = UploadTexturePool::new(); + let texture_resolver = TextureResolver::new(&mut device); + + let mut vertex_data_textures = Vec::new(); + for _ in 0 .. VERTEX_DATA_TEXTURE_COUNT { + vertex_data_textures.push(vertex::VertexDataTextures::new()); + } + + // On some (mostly older, integrated) GPUs, the normal GPU texture cache update path + // doesn't work well when running on ANGLE, causing CPU stalls inside D3D and/or the + // GPU driver. See https://bugzilla.mozilla.org/show_bug.cgi?id=1576637 for much + // more detail. To reduce the number of code paths we have active that require testing, + // we will enable the GPU cache scatter update path on all devices running with ANGLE. + // We want a better solution long-term, but for now this is a significant performance + // improvement on HD4600 era GPUs, and shouldn't hurt performance in a noticeable + // way on other systems running under ANGLE. + let is_software = device.get_capabilities().renderer_name.starts_with("Software"); + + // On other GL platforms, like macOS or Android, creating many PBOs is very inefficient. + // This is what happens in GPU cache updates in PBO path. Instead, we switch everything + // except software GL to use the GPU scattered updates. + let supports_scatter = device.get_capabilities().supports_color_buffer_float; + let gpu_cache_texture = gpu_cache::GpuCacheTexture::new( + &mut device, + supports_scatter && !is_software, + )?; + + device.end_frame(); + + let backend_notifier = notifier.clone(); + + let clear_alpha_targets_with_quads = !device.get_capabilities().supports_alpha_target_clears; + + let prefer_subpixel_aa = options.enable_subpixel_aa && use_dual_source_blending; + let default_font_render_mode = match (options.enable_aa, prefer_subpixel_aa) { + (true, true) => FontRenderMode::Subpixel, + (true, false) => FontRenderMode::Alpha, + (false, _) => FontRenderMode::Mono, + }; + + let compositor_kind = match options.compositor_config { + CompositorConfig::Draw { max_partial_present_rects, draw_previous_partial_present_regions, .. } => { + CompositorKind::Draw { max_partial_present_rects, draw_previous_partial_present_regions } + } + CompositorConfig::Native { ref compositor } => { + let capabilities = compositor.get_capabilities(&mut device); + + CompositorKind::Native { + capabilities, + } + } + }; + + let config = FrameBuilderConfig { + default_font_render_mode, + dual_source_blending_is_supported: use_dual_source_blending, + testing: options.testing, + gpu_supports_fast_clears: options.gpu_supports_fast_clears, + gpu_supports_advanced_blend: ext_blend_equation_advanced, + advanced_blend_is_coherent: ext_blend_equation_advanced_coherent, + gpu_supports_render_target_partial_update: device.get_capabilities().supports_render_target_partial_update, + external_images_require_copy: !device.get_capabilities().supports_image_external_essl3, + batch_lookback_count: WebRenderOptions::BATCH_LOOKBACK_COUNT, + background_color: Some(options.clear_color), + compositor_kind, + tile_size_override: None, + max_surface_override: None, + max_depth_ids: device.max_depth_ids(), + max_target_size: max_internal_texture_size, + force_invalidation: false, + is_software, + low_quality_pinch_zoom: options.low_quality_pinch_zoom, + max_shared_surface_size: options.max_shared_surface_size, + }; + info!("WR {:?}", config); + + let debug_flags = options.debug_flags; + let size_of_op = options.size_of_op; + let enclosing_size_of_op = options.enclosing_size_of_op; + let make_size_of_ops = + move || size_of_op.map(|o| MallocSizeOfOps::new(o, enclosing_size_of_op)); + let workers = options + .workers + .take() + .unwrap_or_else(|| { + let worker = ThreadPoolBuilder::new() + .thread_name(|idx|{ format!("WRWorker#{}", idx) }) + .start_handler(move |idx| { + register_thread_with_profiler(format!("WRWorker#{}", idx)); + profiler::register_thread(&format!("WRWorker#{}", idx)); + }) + .exit_handler(move |_idx| { + profiler::unregister_thread(); + }) + .build(); + Arc::new(worker.unwrap()) + }); + let sampler = options.sampler; + let namespace_alloc_by_client = options.namespace_alloc_by_client; + + // Ensure shared font keys exist within their own unique namespace so + // that they don't accidentally collide across Renderer instances. + let font_namespace = if namespace_alloc_by_client { + options.shared_font_namespace.expect("Shared font namespace must be allocated by client") + } else { + RenderBackend::next_namespace_id() + }; + let fonts = SharedFontResources::new(font_namespace); + + let blob_image_handler = options.blob_image_handler.take(); + let scene_builder_hooks = options.scene_builder_hooks; + let rb_thread_name = format!("WRRenderBackend#{}", options.renderer_id.unwrap_or(0)); + let scene_thread_name = format!("WRSceneBuilder#{}", options.renderer_id.unwrap_or(0)); + let lp_scene_thread_name = format!("WRSceneBuilderLP#{}", options.renderer_id.unwrap_or(0)); + let glyph_rasterizer = GlyphRasterizer::new(workers, device.get_capabilities().supports_r8_texture_upload); + + let (scene_builder_channels, scene_tx) = + SceneBuilderThreadChannels::new(api_tx.clone()); + + let sb_fonts = fonts.clone(); + + thread::Builder::new().name(scene_thread_name.clone()).spawn(move || { + register_thread_with_profiler(scene_thread_name.clone()); + profiler::register_thread(&scene_thread_name); + + let mut scene_builder = SceneBuilderThread::new( + config, + sb_fonts, + make_size_of_ops(), + scene_builder_hooks, + scene_builder_channels, + ); + scene_builder.run(); + + profiler::unregister_thread(); + })?; + + let low_priority_scene_tx = if options.support_low_priority_transactions { + let (low_priority_scene_tx, low_priority_scene_rx) = unbounded_channel(); + let lp_builder = LowPrioritySceneBuilderThread { + rx: low_priority_scene_rx, + tx: scene_tx.clone(), + }; + + thread::Builder::new().name(lp_scene_thread_name.clone()).spawn(move || { + register_thread_with_profiler(lp_scene_thread_name.clone()); + profiler::register_thread(&lp_scene_thread_name); + + let mut scene_builder = lp_builder; + scene_builder.run(); + + profiler::unregister_thread(); + })?; + + low_priority_scene_tx + } else { + scene_tx.clone() + }; + + let rb_blob_handler = blob_image_handler + .as_ref() + .map(|handler| handler.create_similar()); + + let texture_cache_config = options.texture_cache_config.clone(); + let mut picture_tile_size = options.picture_tile_size.unwrap_or(picture::TILE_SIZE_DEFAULT); + // Clamp the picture tile size to reasonable values. + picture_tile_size.width = picture_tile_size.width.max(128).min(4096); + picture_tile_size.height = picture_tile_size.height.max(128).min(4096); + + let picture_texture_filter = if options.low_quality_pinch_zoom { + TextureFilter::Linear + } else { + TextureFilter::Nearest + }; + + let rb_scene_tx = scene_tx.clone(); + let rb_fonts = fonts.clone(); + let enable_multithreading = options.enable_multithreading; + thread::Builder::new().name(rb_thread_name.clone()).spawn(move || { + register_thread_with_profiler(rb_thread_name.clone()); + profiler::register_thread(&rb_thread_name); + + let texture_cache = TextureCache::new( + max_internal_texture_size, + image_tiling_threshold, + color_cache_formats, + swizzle_settings, + &texture_cache_config, + ); + + let picture_textures = PictureTextures::new( + picture_tile_size, + picture_texture_filter, + ); + + let glyph_cache = GlyphCache::new(); + + let mut resource_cache = ResourceCache::new( + texture_cache, + picture_textures, + glyph_rasterizer, + glyph_cache, + rb_fonts, + rb_blob_handler, + ); + + resource_cache.enable_multithreading(enable_multithreading); + + let mut backend = RenderBackend::new( + api_rx, + result_tx, + rb_scene_tx, + resource_cache, + backend_notifier, + config, + sampler, + make_size_of_ops(), + debug_flags, + namespace_alloc_by_client, + ); + backend.run(); + profiler::unregister_thread(); + })?; + + let debug_method = if !options.enable_gpu_markers { + // The GPU markers are disabled. + GpuDebugMethod::None + } else if device.get_capabilities().supports_khr_debug { + GpuDebugMethod::KHR + } else if device.supports_extension("GL_EXT_debug_marker") { + GpuDebugMethod::MarkerEXT + } else { + warn!("asking to enable_gpu_markers but no supporting extension was found"); + GpuDebugMethod::None + }; + + info!("using {:?}", debug_method); + + let gpu_profiler = GpuProfiler::new(Rc::clone(device.rc_gl()), debug_method); + #[cfg(feature = "capture")] + let read_fbo = device.create_fbo(); + + let mut renderer = Renderer { + result_rx, + api_tx: api_tx.clone(), + device, + active_documents: FastHashMap::default(), + pending_texture_updates: Vec::new(), + pending_texture_cache_updates: false, + pending_native_surface_updates: Vec::new(), + pending_gpu_cache_updates: Vec::new(), + pending_gpu_cache_clear: false, + pending_shader_updates: Vec::new(), + shaders, + debug: debug::LazyInitializedDebugRenderer::new(), + debug_flags: DebugFlags::empty(), + profile: TransactionProfile::new(), + frame_counter: 0, + resource_upload_time: 0.0, + gpu_cache_upload_time: 0.0, + profiler: Profiler::new(), + max_recorded_profiles: options.max_recorded_profiles, + clear_color: options.clear_color, + enable_clear_scissor, + enable_advanced_blend_barriers: !ext_blend_equation_advanced_coherent, + clear_caches_with_quads: options.clear_caches_with_quads, + clear_alpha_targets_with_quads, + last_time: 0, + gpu_profiler, + vaos, + vertex_data_textures, + current_vertex_data_textures: 0, + pipeline_info: PipelineInfo::default(), + dither_matrix_texture, + external_image_handler: None, + size_of_ops: make_size_of_ops(), + cpu_profiles: VecDeque::new(), + gpu_profiles: VecDeque::new(), + gpu_cache_texture, + gpu_cache_debug_chunks: Vec::new(), + gpu_cache_frame_id: FrameId::INVALID, + gpu_cache_overflow: false, + texture_upload_pbo_pool, + staging_texture_pool, + texture_resolver, + renderer_errors: Vec::new(), + async_frame_recorder: None, + async_screenshots: None, + #[cfg(feature = "capture")] + read_fbo, + #[cfg(feature = "replay")] + owned_external_images: FastHashMap::default(), + notifications: Vec::new(), + device_size: None, + zoom_debug_texture: None, + cursor_position: DeviceIntPoint::zero(), + shared_texture_cache_cleared: false, + documents_seen: FastHashSet::default(), + force_redraw: true, + compositor_config: options.compositor_config, + current_compositor_kind: compositor_kind, + allocated_native_surfaces: FastHashSet::default(), + debug_overlay_state: DebugOverlayState::new(), + buffer_damage_tracker: BufferDamageTracker::default(), + max_primitive_instance_count, + enable_instancing: options.enable_instancing, + consecutive_oom_frames: 0, + target_frame_publish_id: None, + pending_result_msg: None, + }; + + // We initially set the flags to default and then now call set_debug_flags + // to ensure any potential transition when enabling a flag is run. + renderer.set_debug_flags(debug_flags); + + let sender = RenderApiSender::new( + api_tx, + scene_tx, + low_priority_scene_tx, + blob_image_handler, + fonts, + ); + Ok((renderer, sender)) +} diff --git a/gfx/wr/webrender/src/renderer/mod.rs b/gfx/wr/webrender/src/renderer/mod.rs new file mode 100644 index 0000000000..0eded7ffb4 --- /dev/null +++ b/gfx/wr/webrender/src/renderer/mod.rs @@ -0,0 +1,5994 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! The high-level module responsible for interfacing with the GPU. +//! +//! Much of WebRender's design is driven by separating work into different +//! threads. To avoid the complexities of multi-threaded GPU access, we restrict +//! all communication with the GPU to one thread, the render thread. But since +//! issuing GPU commands is often a bottleneck, we move everything else (i.e. +//! the computation of what commands to issue) to another thread, the +//! RenderBackend thread. The RenderBackend, in turn, may delegate work to other +//! thread (like the SceneBuilder threads or Rayon workers), but the +//! Render-vs-RenderBackend distinction is the most important. +//! +//! The consumer is responsible for initializing the render thread before +//! calling into WebRender, which means that this module also serves as the +//! initial entry point into WebRender, and is responsible for spawning the +//! various other threads discussed above. That said, WebRender initialization +//! returns both the `Renderer` instance as well as a channel for communicating +//! directly with the `RenderBackend`. Aside from a few high-level operations +//! like 'render now', most of interesting commands from the consumer go over +//! that channel and operate on the `RenderBackend`. +//! +//! ## Space conversion guidelines +//! At this stage, we shuld be operating with `DevicePixel` and `FramebufferPixel` only. +//! "Framebuffer" space represents the final destination of our rendeing, +//! and it happens to be Y-flipped on OpenGL. The conversion is done as follows: +//! - for rasterized primitives, the orthographics projection transforms +//! the content rectangle to -1 to 1 +//! - the viewport transformation is setup to map the whole range to +//! the framebuffer rectangle provided by the document view, stored in `DrawTarget` +//! - all the direct framebuffer operations, like blitting, reading pixels, and setting +//! up the scissor, are accepting already transformed coordinates, which we can get by +//! calling `DrawTarget::to_framebuffer_rect` + +use api::{ColorF, ColorU, MixBlendMode}; +use api::{DocumentId, Epoch, ExternalImageHandler, RenderReasons}; +#[cfg(feature = "replay")] +use api::ExternalImageId; +use api::{ExternalImageSource, ExternalImageType, ImageFormat, PremultipliedColorF}; +use api::{PipelineId, ImageRendering, Checkpoint, NotificationRequest, ImageBufferKind}; +#[cfg(feature = "replay")] +use api::ExternalImage; +use api::FramePublishId; +use api::units::*; +use api::channel::{Sender, Receiver}; +pub use api::DebugFlags; +use core::time::Duration; + +use crate::render_api::{DebugCommand, ApiMsg, MemoryReport}; +use crate::batch::{AlphaBatchContainer, BatchKind, BatchFeatures, BatchTextures, BrushBatchKind, ClipBatchList}; +use crate::batch::{ClipMaskInstanceList}; +#[cfg(any(feature = "capture", feature = "replay"))] +use crate::capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage}; +use crate::composite::{CompositeState, CompositeTileSurface, ResolvedExternalSurface, CompositorSurfaceTransform}; +use crate::composite::{CompositorKind, Compositor, NativeTileId, CompositeFeatures, CompositeSurfaceFormat, ResolvedExternalSurfaceColorData}; +use crate::composite::{CompositorConfig, NativeSurfaceOperationDetails, NativeSurfaceId, NativeSurfaceOperation}; +use crate::composite::{TileKind}; +use crate::debug_colors; +use crate::device::{DepthFunction, Device, DrawTarget, ExternalTexture, GpuFrameId, UploadPBOPool}; +use crate::device::{ReadTarget, ShaderError, Texture, TextureFilter, TextureFlags, TextureSlot}; +use crate::device::query::{GpuSampler, GpuTimer}; +#[cfg(feature = "capture")] +use crate::device::FBOId; +use crate::debug_item::DebugItem; +use crate::frame_builder::Frame; +use glyph_rasterizer::GlyphFormat; +use crate::gpu_cache::{GpuCacheUpdate, GpuCacheUpdateList}; +use crate::gpu_cache::{GpuCacheDebugChunk, GpuCacheDebugCmd}; +use crate::gpu_types::{ScalingInstance, SvgFilterInstance, CopyInstance, PrimitiveInstanceData}; +use crate::gpu_types::{BlurInstance, ClearInstance, CompositeInstance, CompositorTransform}; +use crate::internal_types::{TextureSource, TextureCacheCategory, FrameId}; +#[cfg(any(feature = "capture", feature = "replay"))] +use crate::internal_types::DebugOutput; +use crate::internal_types::{CacheTextureId, FastHashMap, FastHashSet, RenderedDocument, ResultMsg}; +use crate::internal_types::{TextureCacheAllocInfo, TextureCacheAllocationKind, TextureUpdateList}; +use crate::internal_types::{RenderTargetInfo, Swizzle, DeferredResolveIndex}; +use crate::picture::ResolvedSurfaceTexture; +use crate::prim_store::DeferredResolve; +use crate::profiler::{self, GpuProfileTag, TransactionProfile}; +use crate::profiler::{Profiler, add_event_marker, add_text_marker, thread_is_being_profiled}; +use crate::device::query::GpuProfiler; +use crate::render_target::{ResolveOp}; +use crate::render_task_graph::{RenderTaskGraph}; +use crate::render_task::{RenderTask, RenderTaskKind, ReadbackTask}; +use crate::screen_capture::AsyncScreenshotGrabber; +use crate::render_target::{AlphaRenderTarget, ColorRenderTarget, PictureCacheTarget, PictureCacheTargetKind}; +use crate::render_target::{RenderTarget, TextureCacheRenderTarget}; +use crate::render_target::{RenderTargetKind, BlitJob}; +use crate::telemetry::Telemetry; +use crate::tile_cache::PictureCacheDebugInfo; +use crate::util::drain_filter; +use crate::rectangle_occlusion as occlusion; +use upload::{upload_to_texture_cache, UploadTexturePool}; +use init::*; + +use euclid::{rect, Transform3D, Scale, default}; +use gleam::gl; +use malloc_size_of::MallocSizeOfOps; + +#[cfg(feature = "replay")] +use std::sync::Arc; + +use std::{ + cell::RefCell, + collections::VecDeque, + f32, + ffi::c_void, + mem, + num::NonZeroUsize, + path::PathBuf, + rc::Rc, +}; +#[cfg(any(feature = "capture", feature = "replay"))] +use std::collections::hash_map::Entry; +use time::precise_time_ns; + +mod debug; +mod gpu_buffer; +mod gpu_cache; +mod shade; +mod vertex; +mod upload; +pub(crate) mod init; + +pub use debug::DebugRenderer; +pub use shade::{Shaders, SharedShaders}; +pub use vertex::{desc, VertexArrayKind, MAX_VERTEX_TEXTURE_WIDTH}; +pub use gpu_buffer::{GpuBuffer, GpuBufferBuilder, GpuBufferAddress}; + +/// The size of the array of each type of vertex data texture that +/// is round-robin-ed each frame during bind_frame_data. Doing this +/// helps avoid driver stalls while updating the texture in some +/// drivers. The size of these textures are typically very small +/// (e.g. < 16 kB) so it's not a huge waste of memory. Despite that, +/// this is a short-term solution - we want to find a better way +/// to provide this frame data, which will likely involve some +/// combination of UBO/SSBO usage. Although this only affects some +/// platforms, it's enabled on all platforms to reduce testing +/// differences between platforms. +pub const VERTEX_DATA_TEXTURE_COUNT: usize = 3; + +/// Number of GPU blocks per UV rectangle provided for an image. +pub const BLOCKS_PER_UV_RECT: usize = 2; + +const GPU_TAG_BRUSH_OPACITY: GpuProfileTag = GpuProfileTag { + label: "B_Opacity", + color: debug_colors::DARKMAGENTA, +}; +const GPU_TAG_BRUSH_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag { + label: "B_LinearGradient", + color: debug_colors::POWDERBLUE, +}; +const GPU_TAG_BRUSH_YUV_IMAGE: GpuProfileTag = GpuProfileTag { + label: "B_YuvImage", + color: debug_colors::DARKGREEN, +}; +const GPU_TAG_BRUSH_MIXBLEND: GpuProfileTag = GpuProfileTag { + label: "B_MixBlend", + color: debug_colors::MAGENTA, +}; +const GPU_TAG_BRUSH_BLEND: GpuProfileTag = GpuProfileTag { + label: "B_Blend", + color: debug_colors::ORANGE, +}; +const GPU_TAG_BRUSH_IMAGE: GpuProfileTag = GpuProfileTag { + label: "B_Image", + color: debug_colors::SPRINGGREEN, +}; +const GPU_TAG_BRUSH_SOLID: GpuProfileTag = GpuProfileTag { + label: "B_Solid", + color: debug_colors::RED, +}; +const GPU_TAG_CACHE_CLIP: GpuProfileTag = GpuProfileTag { + label: "C_Clip", + color: debug_colors::PURPLE, +}; +const GPU_TAG_CACHE_BORDER: GpuProfileTag = GpuProfileTag { + label: "C_Border", + color: debug_colors::CORNSILK, +}; +const GPU_TAG_CACHE_LINE_DECORATION: GpuProfileTag = GpuProfileTag { + label: "C_LineDecoration", + color: debug_colors::YELLOWGREEN, +}; +const GPU_TAG_CACHE_FAST_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag { + label: "C_FastLinearGradient", + color: debug_colors::BROWN, +}; +const GPU_TAG_CACHE_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag { + label: "C_LinearGradient", + color: debug_colors::BROWN, +}; +const GPU_TAG_CACHE_RADIAL_GRADIENT: GpuProfileTag = GpuProfileTag { + label: "C_RadialGradient", + color: debug_colors::BROWN, +}; +const GPU_TAG_CACHE_CONIC_GRADIENT: GpuProfileTag = GpuProfileTag { + label: "C_ConicGradient", + color: debug_colors::BROWN, +}; +const GPU_TAG_SETUP_TARGET: GpuProfileTag = GpuProfileTag { + label: "target init", + color: debug_colors::SLATEGREY, +}; +const GPU_TAG_SETUP_DATA: GpuProfileTag = GpuProfileTag { + label: "data init", + color: debug_colors::LIGHTGREY, +}; +const GPU_TAG_PRIM_SPLIT_COMPOSITE: GpuProfileTag = GpuProfileTag { + label: "SplitComposite", + color: debug_colors::DARKBLUE, +}; +const GPU_TAG_PRIM_TEXT_RUN: GpuProfileTag = GpuProfileTag { + label: "TextRun", + color: debug_colors::BLUE, +}; +const GPU_TAG_PRIMITIVE: GpuProfileTag = GpuProfileTag { + label: "Primitive", + color: debug_colors::RED, +}; +const GPU_TAG_INDIRECT_PRIM: GpuProfileTag = GpuProfileTag { + label: "Primitive (indirect)", + color: debug_colors::YELLOWGREEN, +}; +const GPU_TAG_INDIRECT_MASK: GpuProfileTag = GpuProfileTag { + label: "Mask (indirect)", + color: debug_colors::IVORY, +}; +const GPU_TAG_BLUR: GpuProfileTag = GpuProfileTag { + label: "Blur", + color: debug_colors::VIOLET, +}; +const GPU_TAG_BLIT: GpuProfileTag = GpuProfileTag { + label: "Blit", + color: debug_colors::LIME, +}; +const GPU_TAG_SCALE: GpuProfileTag = GpuProfileTag { + label: "Scale", + color: debug_colors::GHOSTWHITE, +}; +const GPU_SAMPLER_TAG_ALPHA: GpuProfileTag = GpuProfileTag { + label: "Alpha targets", + color: debug_colors::BLACK, +}; +const GPU_SAMPLER_TAG_OPAQUE: GpuProfileTag = GpuProfileTag { + label: "Opaque pass", + color: debug_colors::BLACK, +}; +const GPU_SAMPLER_TAG_TRANSPARENT: GpuProfileTag = GpuProfileTag { + label: "Transparent pass", + color: debug_colors::BLACK, +}; +const GPU_TAG_SVG_FILTER: GpuProfileTag = GpuProfileTag { + label: "SvgFilter", + color: debug_colors::LEMONCHIFFON, +}; +const GPU_TAG_COMPOSITE: GpuProfileTag = GpuProfileTag { + label: "Composite", + color: debug_colors::TOMATO, +}; +const GPU_TAG_CLEAR: GpuProfileTag = GpuProfileTag { + label: "Clear", + color: debug_colors::CHOCOLATE, +}; + +/// The clear color used for the texture cache when the debug display is enabled. +/// We use a shade of blue so that we can still identify completely blue items in +/// the texture cache. +pub const TEXTURE_CACHE_DBG_CLEAR_COLOR: [f32; 4] = [0.0, 0.0, 0.8, 1.0]; + +impl BatchKind { + fn sampler_tag(&self) -> GpuProfileTag { + match *self { + BatchKind::SplitComposite => GPU_TAG_PRIM_SPLIT_COMPOSITE, + BatchKind::Brush(kind) => { + match kind { + BrushBatchKind::Solid => GPU_TAG_BRUSH_SOLID, + BrushBatchKind::Image(..) => GPU_TAG_BRUSH_IMAGE, + BrushBatchKind::Blend => GPU_TAG_BRUSH_BLEND, + BrushBatchKind::MixBlend { .. } => GPU_TAG_BRUSH_MIXBLEND, + BrushBatchKind::YuvImage(..) => GPU_TAG_BRUSH_YUV_IMAGE, + BrushBatchKind::LinearGradient => GPU_TAG_BRUSH_LINEAR_GRADIENT, + BrushBatchKind::Opacity => GPU_TAG_BRUSH_OPACITY, + } + } + BatchKind::TextRun(_) => GPU_TAG_PRIM_TEXT_RUN, + BatchKind::Primitive => GPU_TAG_PRIMITIVE, + } + } +} + +fn flag_changed(before: DebugFlags, after: DebugFlags, select: DebugFlags) -> Option<bool> { + if before & select != after & select { + Some(after.contains(select)) + } else { + None + } +} + +#[repr(C)] +#[derive(Copy, Clone, Debug)] +pub enum ShaderColorMode { + Alpha = 0, + SubpixelDualSource = 1, + BitmapShadow = 2, + ColorBitmap = 3, + Image = 4, + MultiplyDualSource = 5, +} + +impl From<GlyphFormat> for ShaderColorMode { + fn from(format: GlyphFormat) -> ShaderColorMode { + match format { + GlyphFormat::Alpha | + GlyphFormat::TransformedAlpha | + GlyphFormat::Bitmap => ShaderColorMode::Alpha, + GlyphFormat::Subpixel | GlyphFormat::TransformedSubpixel => { + panic!("Subpixel glyph formats must be handled separately."); + } + GlyphFormat::ColorBitmap => ShaderColorMode::ColorBitmap, + } + } +} + +/// Enumeration of the texture samplers used across the various WebRender shaders. +/// +/// Each variant corresponds to a uniform declared in shader source. We only bind +/// the variants we need for a given shader, so not every variant is bound for every +/// batch. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub(crate) enum TextureSampler { + Color0, + Color1, + Color2, + GpuCache, + TransformPalette, + RenderTasks, + Dither, + PrimitiveHeadersF, + PrimitiveHeadersI, + ClipMask, + GpuBuffer, +} + +impl TextureSampler { + pub(crate) fn color(n: usize) -> TextureSampler { + match n { + 0 => TextureSampler::Color0, + 1 => TextureSampler::Color1, + 2 => TextureSampler::Color2, + _ => { + panic!("There are only 3 color samplers."); + } + } + } +} + +impl Into<TextureSlot> for TextureSampler { + fn into(self) -> TextureSlot { + match self { + TextureSampler::Color0 => TextureSlot(0), + TextureSampler::Color1 => TextureSlot(1), + TextureSampler::Color2 => TextureSlot(2), + TextureSampler::GpuCache => TextureSlot(3), + TextureSampler::TransformPalette => TextureSlot(4), + TextureSampler::RenderTasks => TextureSlot(5), + TextureSampler::Dither => TextureSlot(6), + TextureSampler::PrimitiveHeadersF => TextureSlot(7), + TextureSampler::PrimitiveHeadersI => TextureSlot(8), + TextureSampler::ClipMask => TextureSlot(9), + TextureSampler::GpuBuffer => TextureSlot(10), + } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub enum GraphicsApi { + OpenGL, +} + +#[derive(Clone, Debug)] +pub struct GraphicsApiInfo { + pub kind: GraphicsApi, + pub renderer: String, + pub version: String, +} + +#[derive(Debug)] +pub struct GpuProfile { + pub frame_id: GpuFrameId, + pub paint_time_ns: u64, +} + +impl GpuProfile { + fn new(frame_id: GpuFrameId, timers: &[GpuTimer]) -> GpuProfile { + let mut paint_time_ns = 0; + for timer in timers { + paint_time_ns += timer.time_ns; + } + GpuProfile { + frame_id, + paint_time_ns, + } + } +} + +#[derive(Debug)] +pub struct CpuProfile { + pub frame_id: GpuFrameId, + pub backend_time_ns: u64, + pub composite_time_ns: u64, + pub draw_calls: usize, +} + +impl CpuProfile { + fn new( + frame_id: GpuFrameId, + backend_time_ns: u64, + composite_time_ns: u64, + draw_calls: usize, + ) -> CpuProfile { + CpuProfile { + frame_id, + backend_time_ns, + composite_time_ns, + draw_calls, + } + } +} + +/// The selected partial present mode for a given frame. +#[derive(Debug, Copy, Clone)] +enum PartialPresentMode { + /// The device supports fewer dirty rects than the number of dirty rects + /// that WR produced. In this case, the WR dirty rects are union'ed into + /// a single dirty rect, that is provided to the caller. + Single { + dirty_rect: DeviceRect, + }, +} + +struct CacheTexture { + texture: Texture, + category: TextureCacheCategory, +} + +/// Helper struct for resolving device Textures for use during rendering passes. +/// +/// Manages the mapping between the at-a-distance texture handles used by the +/// `RenderBackend` (which does not directly interface with the GPU) and actual +/// device texture handles. +struct TextureResolver { + /// A map to resolve texture cache IDs to native textures. + texture_cache_map: FastHashMap<CacheTextureId, CacheTexture>, + + /// Map of external image IDs to native textures. + external_images: FastHashMap<DeferredResolveIndex, ExternalTexture>, + + /// A special 1x1 dummy texture used for shaders that expect to work with + /// the output of the previous pass but are actually running in the first + /// pass. + dummy_cache_texture: Texture, +} + +impl TextureResolver { + fn new(device: &mut Device) -> TextureResolver { + let dummy_cache_texture = device + .create_texture( + ImageBufferKind::Texture2D, + ImageFormat::RGBA8, + 1, + 1, + TextureFilter::Linear, + None, + ); + device.upload_texture_immediate( + &dummy_cache_texture, + &[0xff, 0xff, 0xff, 0xff], + ); + + TextureResolver { + texture_cache_map: FastHashMap::default(), + external_images: FastHashMap::default(), + dummy_cache_texture, + } + } + + fn deinit(self, device: &mut Device) { + device.delete_texture(self.dummy_cache_texture); + + for (_id, item) in self.texture_cache_map { + device.delete_texture(item.texture); + } + } + + fn begin_frame(&mut self) { + } + + fn end_pass( + &mut self, + device: &mut Device, + textures_to_invalidate: &[CacheTextureId], + ) { + // For any texture that is no longer needed, immediately + // invalidate it so that tiled GPUs don't need to resolve it + // back to memory. + for texture_id in textures_to_invalidate { + let render_target = &self.texture_cache_map[texture_id].texture; + device.invalidate_render_target(render_target); + } + } + + // Bind a source texture to the device. + fn bind(&self, texture_id: &TextureSource, sampler: TextureSampler, device: &mut Device) -> Swizzle { + match *texture_id { + TextureSource::Invalid => { + Swizzle::default() + } + TextureSource::Dummy => { + let swizzle = Swizzle::default(); + device.bind_texture(sampler, &self.dummy_cache_texture, swizzle); + swizzle + } + TextureSource::External(ref index, _) => { + let texture = self.external_images + .get(index) + .expect("BUG: External image should be resolved by now"); + device.bind_external_texture(sampler, texture); + Swizzle::default() + } + TextureSource::TextureCache(index, swizzle) => { + let texture = &self.texture_cache_map[&index].texture; + device.bind_texture(sampler, texture, swizzle); + swizzle + } + } + } + + // Get the real (OpenGL) texture ID for a given source texture. + // For a texture cache texture, the IDs are stored in a vector + // map for fast access. + fn resolve(&self, texture_id: &TextureSource) -> Option<(&Texture, Swizzle)> { + match *texture_id { + TextureSource::Invalid => None, + TextureSource::Dummy => { + Some((&self.dummy_cache_texture, Swizzle::default())) + } + TextureSource::External(..) => { + panic!("BUG: External textures cannot be resolved, they can only be bound."); + } + TextureSource::TextureCache(index, swizzle) => { + Some((&self.texture_cache_map[&index].texture, swizzle)) + } + } + } + + // Retrieve the deferred / resolved UV rect if an external texture, otherwise + // return the default supplied UV rect. + fn get_uv_rect( + &self, + source: &TextureSource, + default_value: TexelRect, + ) -> TexelRect { + match source { + TextureSource::External(ref index, _) => { + let texture = self.external_images + .get(index) + .expect("BUG: External image should be resolved by now"); + texture.get_uv_rect() + } + _ => { + default_value + } + } + } + + /// Returns the size of the texture in pixels + fn get_texture_size(&self, texture: &TextureSource) -> DeviceIntSize { + match *texture { + TextureSource::Invalid => DeviceIntSize::zero(), + TextureSource::TextureCache(id, _) => { + self.texture_cache_map[&id].texture.get_dimensions() + }, + TextureSource::External(index, _) => { + let uv_rect = self.external_images[&index].get_uv_rect(); + (uv_rect.uv1 - uv_rect.uv0).abs().to_size().to_i32() + }, + TextureSource::Dummy => DeviceIntSize::new(1, 1), + } + } + + fn report_memory(&self) -> MemoryReport { + let mut report = MemoryReport::default(); + + // We're reporting GPU memory rather than heap-allocations, so we don't + // use size_of_op. + for item in self.texture_cache_map.values() { + let counter = match item.category { + TextureCacheCategory::Atlas => &mut report.atlas_textures, + TextureCacheCategory::Standalone => &mut report.standalone_textures, + TextureCacheCategory::PictureTile => &mut report.picture_tile_textures, + TextureCacheCategory::RenderTarget => &mut report.render_target_textures, + }; + *counter += item.texture.size_in_bytes(); + } + + report + } + + fn update_profile(&self, profile: &mut TransactionProfile) { + let mut external_image_bytes = 0; + for img in self.external_images.values() { + let uv_rect = img.get_uv_rect(); + let size = (uv_rect.uv1 - uv_rect.uv0).abs().to_size().to_i32(); + + // Assume 4 bytes per pixels which is true most of the time but + // not always. + let bpp = 4; + external_image_bytes += size.area() as usize * bpp; + } + + profile.set(profiler::EXTERNAL_IMAGE_BYTES, profiler::bytes_to_mb(external_image_bytes)); + } + + fn get_cache_texture_mut(&mut self, id: &CacheTextureId) -> &mut Texture { + &mut self.texture_cache_map + .get_mut(id) + .expect("bug: texture not allocated") + .texture + } +} + +#[derive(Debug, Copy, Clone, PartialEq)] +#[cfg_attr(feature = "capture", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub enum BlendMode { + None, + Alpha, + PremultipliedAlpha, + PremultipliedDestOut, + SubpixelDualSource, + Advanced(MixBlendMode), + MultiplyDualSource, + Screen, + Exclusion, + PlusLighter, +} + +impl BlendMode { + /// Decides when a given mix-blend-mode can be implemented in terms of + /// simple blending, dual-source blending, advanced blending, or not at + /// all based on available capabilities. + pub fn from_mix_blend_mode( + mode: MixBlendMode, + advanced_blend: bool, + coherent: bool, + dual_source: bool, + ) -> Option<BlendMode> { + // If we emulate a mix-blend-mode via simple or dual-source blending, + // care must be taken to output alpha As + Ad*(1-As) regardless of what + // the RGB output is to comply with the mix-blend-mode spec. + Some(match mode { + // If we have coherent advanced blend, just use that. + _ if advanced_blend && coherent => BlendMode::Advanced(mode), + // Screen can be implemented as Cs + Cd - Cs*Cd => Cs + Cd*(1-Cs) + MixBlendMode::Screen => BlendMode::Screen, + // Exclusion can be implemented as Cs + Cd - 2*Cs*Cd => Cs*(1-Cd) + Cd*(1-Cs) + MixBlendMode::Exclusion => BlendMode::Exclusion, + // PlusLighter is basically a clamped add. + MixBlendMode::PlusLighter => BlendMode::PlusLighter, + // Multiply can be implemented as Cs*Cd + Cs*(1-Ad) + Cd*(1-As) => Cs*(1-Ad) + Cd*(1 - SRC1=(As-Cs)) + MixBlendMode::Multiply if dual_source => BlendMode::MultiplyDualSource, + // Otherwise, use advanced blend without coherency if available. + _ if advanced_blend => BlendMode::Advanced(mode), + // If advanced blend is not available, then we have to use brush_mix_blend. + _ => return None, + }) + } +} + +/// Information about the state of the debugging / profiler overlay in native compositing mode. +struct DebugOverlayState { + /// True if any of the current debug flags will result in drawing a debug overlay. + is_enabled: bool, + + /// The current size of the debug overlay surface. None implies that the + /// debug surface isn't currently allocated. + current_size: Option<DeviceIntSize>, +} + +impl DebugOverlayState { + fn new() -> Self { + DebugOverlayState { + is_enabled: false, + current_size: None, + } + } +} + +/// Tracks buffer damage rects over a series of frames. +#[derive(Debug, Default)] +pub(crate) struct BufferDamageTracker { + damage_rects: [DeviceRect; 2], + current_offset: usize, +} + +impl BufferDamageTracker { + /// Sets the damage rect for the current frame. Should only be called *after* + /// get_damage_rect() has been called to get the current backbuffer's damage rect. + fn push_dirty_rect(&mut self, rect: &DeviceRect) { + self.damage_rects[self.current_offset] = rect.clone(); + self.current_offset = match self.current_offset { + 0 => self.damage_rects.len() - 1, + n => n - 1, + } + } + + /// Gets the damage rect for the current backbuffer, given the backbuffer's age. + /// (The number of frames since it was previously the backbuffer.) + /// Returns an empty rect if the buffer is valid, and None if the entire buffer is invalid. + fn get_damage_rect(&self, buffer_age: usize) -> Option<DeviceRect> { + match buffer_age { + // 0 means this is a new buffer, so is completely invalid. + 0 => None, + // 1 means this backbuffer was also the previous frame's backbuffer + // (so must have been copied to the frontbuffer). It is therefore entirely valid. + 1 => Some(DeviceRect::zero()), + // We must calculate the union of the damage rects since this buffer was previously + // the backbuffer. + n if n <= self.damage_rects.len() + 1 => { + Some( + self.damage_rects.iter() + .cycle() + .skip(self.current_offset + 1) + .take(n - 1) + .fold(DeviceRect::zero(), |acc, r| acc.union(r)) + ) + } + // The backbuffer is older than the number of frames for which we track, + // so we treat it as entirely invalid. + _ => None, + } + } +} + +/// The renderer is responsible for submitting to the GPU the work prepared by the +/// RenderBackend. +/// +/// We have a separate `Renderer` instance for each instance of WebRender (generally +/// one per OS window), and all instances share the same thread. +pub struct Renderer { + result_rx: Receiver<ResultMsg>, + api_tx: Sender<ApiMsg>, + pub device: Device, + pending_texture_updates: Vec<TextureUpdateList>, + /// True if there are any TextureCacheUpdate pending. + pending_texture_cache_updates: bool, + pending_native_surface_updates: Vec<NativeSurfaceOperation>, + pending_gpu_cache_updates: Vec<GpuCacheUpdateList>, + pending_gpu_cache_clear: bool, + pending_shader_updates: Vec<PathBuf>, + active_documents: FastHashMap<DocumentId, RenderedDocument>, + + shaders: Rc<RefCell<Shaders>>, + + max_recorded_profiles: usize, + + clear_color: ColorF, + enable_clear_scissor: bool, + enable_advanced_blend_barriers: bool, + clear_caches_with_quads: bool, + clear_alpha_targets_with_quads: bool, + + debug: debug::LazyInitializedDebugRenderer, + debug_flags: DebugFlags, + profile: TransactionProfile, + frame_counter: u64, + resource_upload_time: f64, + gpu_cache_upload_time: f64, + profiler: Profiler, + + last_time: u64, + + pub gpu_profiler: GpuProfiler, + vaos: vertex::RendererVAOs, + + gpu_cache_texture: gpu_cache::GpuCacheTexture, + vertex_data_textures: Vec<vertex::VertexDataTextures>, + current_vertex_data_textures: usize, + + /// When the GPU cache debugger is enabled, we keep track of the live blocks + /// in the GPU cache so that we can use them for the debug display. This + /// member stores those live blocks, indexed by row. + gpu_cache_debug_chunks: Vec<Vec<GpuCacheDebugChunk>>, + + gpu_cache_frame_id: FrameId, + gpu_cache_overflow: bool, + + pipeline_info: PipelineInfo, + + // Manages and resolves source textures IDs to real texture IDs. + texture_resolver: TextureResolver, + + texture_upload_pbo_pool: UploadPBOPool, + staging_texture_pool: UploadTexturePool, + + dither_matrix_texture: Option<Texture>, + + /// Optional trait object that allows the client + /// application to provide external buffers for image data. + external_image_handler: Option<Box<dyn ExternalImageHandler>>, + + /// Optional function pointers for measuring memory used by a given + /// heap-allocated pointer. + size_of_ops: Option<MallocSizeOfOps>, + + pub renderer_errors: Vec<RendererError>, + + pub(in crate) async_frame_recorder: Option<AsyncScreenshotGrabber>, + pub(in crate) async_screenshots: Option<AsyncScreenshotGrabber>, + + /// List of profile results from previous frames. Can be retrieved + /// via get_frame_profiles(). + cpu_profiles: VecDeque<CpuProfile>, + gpu_profiles: VecDeque<GpuProfile>, + + /// Notification requests to be fulfilled after rendering. + notifications: Vec<NotificationRequest>, + + device_size: Option<DeviceIntSize>, + + /// A lazily created texture for the zoom debugging widget. + zoom_debug_texture: Option<Texture>, + + /// The current mouse position. This is used for debugging + /// functionality only, such as the debug zoom widget. + cursor_position: DeviceIntPoint, + + /// Guards to check if we might be rendering a frame with expired texture + /// cache entries. + shared_texture_cache_cleared: bool, + + /// The set of documents which we've seen a publish for since last render. + documents_seen: FastHashSet<DocumentId>, + + #[cfg(feature = "capture")] + read_fbo: FBOId, + #[cfg(feature = "replay")] + owned_external_images: FastHashMap<(ExternalImageId, u8), ExternalTexture>, + + /// The compositing config, affecting how WR composites into the final scene. + compositor_config: CompositorConfig, + + current_compositor_kind: CompositorKind, + + /// Maintains a set of allocated native composite surfaces. This allows any + /// currently allocated surfaces to be cleaned up as soon as deinit() is + /// called (the normal bookkeeping for native surfaces exists in the + /// render backend thread). + allocated_native_surfaces: FastHashSet<NativeSurfaceId>, + + /// If true, partial present state has been reset and everything needs to + /// be drawn on the next render. + force_redraw: bool, + + /// State related to the debug / profiling overlays + debug_overlay_state: DebugOverlayState, + + /// Tracks the dirty rectangles from previous frames. Used on platforms + /// that require keeping the front buffer fully correct when doing + /// partial present (e.g. unix desktop with EGL_EXT_buffer_age). + buffer_damage_tracker: BufferDamageTracker, + + max_primitive_instance_count: usize, + enable_instancing: bool, + + /// Count consecutive oom frames to detectif we are stuck unable to render + /// in a loop. + consecutive_oom_frames: u32, + + /// update() defers processing of ResultMsg, if frame_publish_id of + /// ResultMsg::PublishDocument exceeds target_frame_publish_id. + target_frame_publish_id: Option<FramePublishId>, + + /// Hold a next ResultMsg that will be handled by update(). + pending_result_msg: Option<ResultMsg>, +} + +#[derive(Debug)] +pub enum RendererError { + Shader(ShaderError), + Thread(std::io::Error), + MaxTextureSize, + SoftwareRasterizer, + OutOfMemory, +} + +impl From<ShaderError> for RendererError { + fn from(err: ShaderError) -> Self { + RendererError::Shader(err) + } +} + +impl From<std::io::Error> for RendererError { + fn from(err: std::io::Error) -> Self { + RendererError::Thread(err) + } +} + +impl Renderer { + pub fn device_size(&self) -> Option<DeviceIntSize> { + self.device_size + } + + /// Update the current position of the debug cursor. + pub fn set_cursor_position( + &mut self, + position: DeviceIntPoint, + ) { + self.cursor_position = position; + } + + pub fn get_max_texture_size(&self) -> i32 { + self.device.max_texture_size() + } + + pub fn get_graphics_api_info(&self) -> GraphicsApiInfo { + GraphicsApiInfo { + kind: GraphicsApi::OpenGL, + version: self.device.gl().get_string(gl::VERSION), + renderer: self.device.gl().get_string(gl::RENDERER), + } + } + + pub fn preferred_color_format(&self) -> ImageFormat { + self.device.preferred_color_formats().external + } + + pub fn required_texture_stride_alignment(&self, format: ImageFormat) -> usize { + self.device.required_pbo_stride().num_bytes(format).get() + } + + pub fn set_clear_color(&mut self, color: ColorF) { + self.clear_color = color; + } + + pub fn flush_pipeline_info(&mut self) -> PipelineInfo { + mem::replace(&mut self.pipeline_info, PipelineInfo::default()) + } + + /// Returns the Epoch of the current frame in a pipeline. + pub fn current_epoch(&self, document_id: DocumentId, pipeline_id: PipelineId) -> Option<Epoch> { + self.pipeline_info.epochs.get(&(pipeline_id, document_id)).cloned() + } + + fn get_next_result_msg(&mut self) -> Option<ResultMsg> { + if self.pending_result_msg.is_none() { + if let Ok(msg) = self.result_rx.try_recv() { + self.pending_result_msg = Some(msg); + } + } + + match (&self.pending_result_msg, &self.target_frame_publish_id) { + (Some(ResultMsg::PublishDocument(frame_publish_id, _, _, _)), Some(target_id)) => { + if frame_publish_id > target_id { + return None; + } + } + _ => {} + } + + self.pending_result_msg.take() + } + + /// Processes the result queue. + /// + /// Should be called before `render()`, as texture cache updates are done here. + pub fn update(&mut self) { + profile_scope!("update"); + + // Pull any pending results and return the most recent. + while let Some(msg) = self.get_next_result_msg() { + match msg { + ResultMsg::PublishPipelineInfo(mut pipeline_info) => { + for ((pipeline_id, document_id), epoch) in pipeline_info.epochs { + self.pipeline_info.epochs.insert((pipeline_id, document_id), epoch); + } + self.pipeline_info.removed_pipelines.extend(pipeline_info.removed_pipelines.drain(..)); + } + ResultMsg::PublishDocument( + _, + document_id, + mut doc, + resource_update_list, + ) => { + // Add a new document to the active set + + // If the document we are replacing must be drawn (in order to + // update the texture cache), issue a render just to + // off-screen targets, ie pass None to render_impl. We do this + // because a) we don't need to render to the main framebuffer + // so it is cheaper not to, and b) doing so without a + // subsequent present would break partial present. + if let Some(mut prev_doc) = self.active_documents.remove(&document_id) { + doc.profile.merge(&mut prev_doc.profile); + + if prev_doc.frame.must_be_drawn() { + prev_doc.render_reasons |= RenderReasons::TEXTURE_CACHE_FLUSH; + self.render_impl( + document_id, + &mut prev_doc, + None, + 0, + ).ok(); + } + } + + self.active_documents.insert(document_id, doc); + + // IMPORTANT: The pending texture cache updates must be applied + // *after* the previous frame has been rendered above + // (if neceessary for a texture cache update). For + // an example of why this is required: + // 1) Previous frame contains a render task that + // targets Texture X. + // 2) New frame contains a texture cache update which + // frees Texture X. + // 3) bad stuff happens. + + //TODO: associate `document_id` with target window + self.pending_texture_cache_updates |= !resource_update_list.texture_updates.updates.is_empty(); + self.pending_texture_updates.push(resource_update_list.texture_updates); + self.pending_native_surface_updates.extend(resource_update_list.native_surface_updates); + self.documents_seen.insert(document_id); + } + ResultMsg::UpdateGpuCache(mut list) => { + if list.clear { + self.pending_gpu_cache_clear = true; + } + if list.clear { + self.gpu_cache_debug_chunks = Vec::new(); + } + for cmd in mem::replace(&mut list.debug_commands, Vec::new()) { + match cmd { + GpuCacheDebugCmd::Alloc(chunk) => { + let row = chunk.address.v as usize; + if row >= self.gpu_cache_debug_chunks.len() { + self.gpu_cache_debug_chunks.resize(row + 1, Vec::new()); + } + self.gpu_cache_debug_chunks[row].push(chunk); + }, + GpuCacheDebugCmd::Free(address) => { + let chunks = &mut self.gpu_cache_debug_chunks[address.v as usize]; + let pos = chunks.iter() + .position(|x| x.address == address).unwrap(); + chunks.remove(pos); + }, + } + } + self.pending_gpu_cache_updates.push(list); + } + ResultMsg::UpdateResources { + resource_updates, + memory_pressure, + } => { + if memory_pressure { + // If a memory pressure event arrives _after_ a new scene has + // been published that writes persistent targets (i.e. cached + // render tasks to the texture cache, or picture cache tiles) + // but _before_ the next update/render loop, those targets + // will not be updated due to the active_documents list being + // cleared at the end of this message. To work around that, + // if any of the existing documents have not rendered yet, and + // have picture/texture cache targets, force a render so that + // those targets are updated. + let active_documents = mem::replace( + &mut self.active_documents, + FastHashMap::default(), + ); + for (doc_id, mut doc) in active_documents { + if doc.frame.must_be_drawn() { + // As this render will not be presented, we must pass None to + // render_impl. This avoids interfering with partial present + // logic, as well as being more efficient. + self.render_impl( + doc_id, + &mut doc, + None, + 0, + ).ok(); + } + } + } + + self.pending_texture_cache_updates |= !resource_updates.texture_updates.updates.is_empty(); + self.pending_texture_updates.push(resource_updates.texture_updates); + self.pending_native_surface_updates.extend(resource_updates.native_surface_updates); + self.device.begin_frame(); + + self.update_texture_cache(); + self.update_native_surfaces(); + + // Flush the render target pool on memory pressure. + // + // This needs to be separate from the block below because + // the device module asserts if we delete textures while + // not in a frame. + if memory_pressure { + self.texture_upload_pbo_pool.on_memory_pressure(&mut self.device); + self.staging_texture_pool.delete_textures(&mut self.device); + } + + self.device.end_frame(); + } + ResultMsg::AppendNotificationRequests(mut notifications) => { + // We need to know specifically if there are any pending + // TextureCacheUpdate updates in any of the entries in + // pending_texture_updates. They may simply be nops, which do not + // need to prevent issuing the notification, and if so, may not + // cause a timely frame render to occur to wake up any listeners. + if !self.pending_texture_cache_updates { + drain_filter( + &mut notifications, + |n| { n.when() == Checkpoint::FrameTexturesUpdated }, + |n| { n.notify(); }, + ); + } + self.notifications.append(&mut notifications); + } + ResultMsg::ForceRedraw => { + self.force_redraw = true; + } + ResultMsg::RefreshShader(path) => { + self.pending_shader_updates.push(path); + } + ResultMsg::SetParameter(ref param) => { + self.device.set_parameter(param); + } + ResultMsg::DebugOutput(output) => match output { + #[cfg(feature = "capture")] + DebugOutput::SaveCapture(config, deferred) => { + self.save_capture(config, deferred); + } + #[cfg(feature = "replay")] + DebugOutput::LoadCapture(config, plain_externals) => { + self.active_documents.clear(); + self.load_capture(config, plain_externals); + } + }, + ResultMsg::DebugCommand(command) => { + self.handle_debug_command(command); + } + } + } + } + + /// update() defers processing of ResultMsg, if frame_publish_id of + /// ResultMsg::PublishDocument exceeds target_frame_publish_id. + pub fn set_target_frame_publish_id(&mut self, publish_id: FramePublishId) { + self.target_frame_publish_id = Some(publish_id); + } + + fn handle_debug_command(&mut self, command: DebugCommand) { + match command { + DebugCommand::SetPictureTileSize(_) | + DebugCommand::SetMaximumSurfaceSize(_) => { + panic!("Should be handled by render backend"); + } + DebugCommand::SaveCapture(..) | + DebugCommand::LoadCapture(..) | + DebugCommand::StartCaptureSequence(..) | + DebugCommand::StopCaptureSequence => { + panic!("Capture commands are not welcome here! Did you build with 'capture' feature?") + } + DebugCommand::ClearCaches(_) + | DebugCommand::SimulateLongSceneBuild(_) + | DebugCommand::EnableNativeCompositor(_) + | DebugCommand::SetBatchingLookback(_) => {} + DebugCommand::InvalidateGpuCache => { + self.gpu_cache_texture.invalidate(); + } + DebugCommand::SetFlags(flags) => { + self.set_debug_flags(flags); + } + } + } + + /// Set a callback for handling external images. + pub fn set_external_image_handler(&mut self, handler: Box<dyn ExternalImageHandler>) { + self.external_image_handler = Some(handler); + } + + /// Retrieve (and clear) the current list of recorded frame profiles. + pub fn get_frame_profiles(&mut self) -> (Vec<CpuProfile>, Vec<GpuProfile>) { + let cpu_profiles = self.cpu_profiles.drain(..).collect(); + let gpu_profiles = self.gpu_profiles.drain(..).collect(); + (cpu_profiles, gpu_profiles) + } + + /// Reset the current partial present state. This forces the entire framebuffer + /// to be refreshed next time `render` is called. + pub fn force_redraw(&mut self) { + self.force_redraw = true; + } + + /// Renders the current frame. + /// + /// A Frame is supplied by calling [`generate_frame()`][webrender_api::Transaction::generate_frame]. + /// buffer_age is the age of the current backbuffer. It is only relevant if partial present + /// is active, otherwise 0 should be passed here. + pub fn render( + &mut self, + device_size: DeviceIntSize, + buffer_age: usize, + ) -> Result<RenderResults, Vec<RendererError>> { + self.device_size = Some(device_size); + + // TODO(gw): We want to make the active document that is + // being rendered configurable via the public + // API in future. For now, just select the last + // added document as the active one to render + // (Gecko only ever creates a single document + // per renderer right now). + let doc_id = self.active_documents.keys().last().cloned(); + + let result = match doc_id { + Some(doc_id) => { + // Remove the doc from the map to appease the borrow checker + let mut doc = self.active_documents + .remove(&doc_id) + .unwrap(); + + let result = self.render_impl( + doc_id, + &mut doc, + Some(device_size), + buffer_age, + ); + + self.active_documents.insert(doc_id, doc); + + result + } + None => { + self.last_time = precise_time_ns(); + Ok(RenderResults::default()) + } + }; + + drain_filter( + &mut self.notifications, + |n| { n.when() == Checkpoint::FrameRendered }, + |n| { n.notify(); }, + ); + + let mut oom = false; + if let Err(ref errors) = result { + for error in errors { + if matches!(error, &RendererError::OutOfMemory) { + oom = true; + break; + } + } + } + + if oom { + let _ = self.api_tx.send(ApiMsg::MemoryPressure); + // Ensure we don't get stuck in a loop. + self.consecutive_oom_frames += 1; + assert!(self.consecutive_oom_frames < 5, "Renderer out of memory"); + } else { + self.consecutive_oom_frames = 0; + } + + // This is the end of the rendering pipeline. If some notifications are is still there, + // just clear them and they will autimatically fire the Checkpoint::TransactionDropped + // event. Otherwise they would just pile up in this vector forever. + self.notifications.clear(); + + tracy_frame_marker!(); + + result + } + + /// Update the state of any debug / profiler overlays. This is currently only needed + /// when running with the native compositor enabled. + fn update_debug_overlay( + &mut self, + framebuffer_size: DeviceIntSize, + has_debug_items: bool, + ) { + // If any of the following debug flags are set, something will be drawn on the debug overlay. + self.debug_overlay_state.is_enabled = has_debug_items || self.debug_flags.intersects( + DebugFlags::PROFILER_DBG | + DebugFlags::RENDER_TARGET_DBG | + DebugFlags::TEXTURE_CACHE_DBG | + DebugFlags::EPOCHS | + DebugFlags::GPU_CACHE_DBG | + DebugFlags::PICTURE_CACHING_DBG | + DebugFlags::PRIMITIVE_DBG | + DebugFlags::ZOOM_DBG | + DebugFlags::WINDOW_VISIBILITY_DBG + ); + + // Update the debug overlay surface, if we are running in native compositor mode. + if let CompositorKind::Native { .. } = self.current_compositor_kind { + let compositor = self.compositor_config.compositor().unwrap(); + + // If there is a current surface, destroy it if we don't need it for this frame, or if + // the size has changed. + if let Some(current_size) = self.debug_overlay_state.current_size { + if !self.debug_overlay_state.is_enabled || current_size != framebuffer_size { + compositor.destroy_surface(&mut self.device, NativeSurfaceId::DEBUG_OVERLAY); + self.debug_overlay_state.current_size = None; + } + } + + // Allocate a new surface, if we need it and there isn't one. + if self.debug_overlay_state.is_enabled && self.debug_overlay_state.current_size.is_none() { + compositor.create_surface( + &mut self.device, + NativeSurfaceId::DEBUG_OVERLAY, + DeviceIntPoint::zero(), + framebuffer_size, + false, + ); + compositor.create_tile( + &mut self.device, + NativeTileId::DEBUG_OVERLAY, + ); + self.debug_overlay_state.current_size = Some(framebuffer_size); + } + } + } + + /// Bind a draw target for the debug / profiler overlays, if required. + fn bind_debug_overlay(&mut self, device_size: DeviceIntSize) -> Option<DrawTarget> { + // Debug overlay setup are only required in native compositing mode + if self.debug_overlay_state.is_enabled { + if let CompositorKind::Native { .. } = self.current_compositor_kind { + let compositor = self.compositor_config.compositor().unwrap(); + let surface_size = self.debug_overlay_state.current_size.unwrap(); + + // Ensure old surface is invalidated before binding + compositor.invalidate_tile( + &mut self.device, + NativeTileId::DEBUG_OVERLAY, + DeviceIntRect::from_size(surface_size), + ); + // Bind the native surface + let surface_info = compositor.bind( + &mut self.device, + NativeTileId::DEBUG_OVERLAY, + DeviceIntRect::from_size(surface_size), + DeviceIntRect::from_size(surface_size), + ); + + // Bind the native surface to current FBO target + let draw_target = DrawTarget::NativeSurface { + offset: surface_info.origin, + external_fbo_id: surface_info.fbo_id, + dimensions: surface_size, + }; + self.device.bind_draw_target(draw_target); + + // When native compositing, clear the debug overlay each frame. + self.device.clear_target( + Some([0.0, 0.0, 0.0, 0.0]), + None, // debug renderer does not use depth + None, + ); + + Some(draw_target) + } else { + // If we're not using the native compositor, then the default + // frame buffer is already bound. Create a DrawTarget for it and + // return it. + Some(DrawTarget::new_default(device_size, self.device.surface_origin_is_top_left())) + } + } else { + None + } + } + + /// Unbind the draw target for debug / profiler overlays, if required. + fn unbind_debug_overlay(&mut self) { + // Debug overlay setup are only required in native compositing mode + if self.debug_overlay_state.is_enabled { + if let CompositorKind::Native { .. } = self.current_compositor_kind { + let compositor = self.compositor_config.compositor().unwrap(); + // Unbind the draw target and add it to the visual tree to be composited + compositor.unbind(&mut self.device); + + compositor.add_surface( + &mut self.device, + NativeSurfaceId::DEBUG_OVERLAY, + CompositorSurfaceTransform::identity(), + DeviceIntRect::from_size( + self.debug_overlay_state.current_size.unwrap(), + ), + ImageRendering::Auto, + ); + } + } + } + + // If device_size is None, don't render to the main frame buffer. This is useful to + // update texture cache render tasks but avoid doing a full frame render. If the + // render is not going to be presented, then this must be set to None, as performing a + // composite without a present will confuse partial present. + fn render_impl( + &mut self, + doc_id: DocumentId, + active_doc: &mut RenderedDocument, + device_size: Option<DeviceIntSize>, + buffer_age: usize, + ) -> Result<RenderResults, Vec<RendererError>> { + profile_scope!("render"); + let mut results = RenderResults::default(); + self.profile.start_time(profiler::RENDERER_TIME); + + self.staging_texture_pool.begin_frame(); + + let compositor_kind = active_doc.frame.composite_state.compositor_kind; + // CompositorKind is updated + if self.current_compositor_kind != compositor_kind { + let enable = match (self.current_compositor_kind, compositor_kind) { + (CompositorKind::Native { .. }, CompositorKind::Draw { .. }) => { + if self.debug_overlay_state.current_size.is_some() { + self.compositor_config + .compositor() + .unwrap() + .destroy_surface(&mut self.device, NativeSurfaceId::DEBUG_OVERLAY); + self.debug_overlay_state.current_size = None; + } + false + } + (CompositorKind::Draw { .. }, CompositorKind::Native { .. }) => { + true + } + (current_compositor_kind, active_doc_compositor_kind) => { + warn!("Compositor mismatch, assuming this is Wrench running. Current {:?}, active {:?}", + current_compositor_kind, active_doc_compositor_kind); + false + } + }; + + if let Some(config) = self.compositor_config.compositor() { + config.enable_native_compositor(&mut self.device, enable); + } + self.current_compositor_kind = compositor_kind; + } + + // The texture resolver scope should be outside of any rendering, including + // debug rendering. This ensures that when we return render targets to the + // pool via glInvalidateFramebuffer, we don't do any debug rendering after + // that point. Otherwise, the bind / invalidate / bind logic trips up the + // render pass logic in tiled / mobile GPUs, resulting in an extra copy / + // resolve step when the debug overlay is enabled. + self.texture_resolver.begin_frame(); + + if let Some(device_size) = device_size { + self.update_gpu_profile(device_size); + } + + let cpu_frame_id = { + let _gm = self.gpu_profiler.start_marker("begin frame"); + let frame_id = self.device.begin_frame(); + self.gpu_profiler.begin_frame(frame_id); + + self.device.disable_scissor(); + self.device.disable_depth(); + self.set_blend(false, FramebufferKind::Main); + //self.update_shaders(); + + self.update_texture_cache(); + self.update_native_surfaces(); + + frame_id + }; + + if let Some(device_size) = device_size { + // Inform the client that we are starting a composition transaction if native + // compositing is enabled. This needs to be done early in the frame, so that + // we can create debug overlays after drawing the main surfaces. + if let CompositorKind::Native { .. } = self.current_compositor_kind { + let compositor = self.compositor_config.compositor().unwrap(); + compositor.begin_frame(&mut self.device); + } + + // Update the state of the debug overlay surface, ensuring that + // the compositor mode has a suitable surface to draw to, if required. + self.update_debug_overlay(device_size, !active_doc.frame.debug_items.is_empty()); + } + + let frame = &mut active_doc.frame; + let profile = &mut active_doc.profile; + assert!(self.current_compositor_kind == frame.composite_state.compositor_kind); + + if self.shared_texture_cache_cleared { + assert!(self.documents_seen.contains(&doc_id), + "Cleared texture cache without sending new document frame."); + } + + match self.prepare_gpu_cache(&frame.deferred_resolves) { + Ok(..) => { + assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id, + "Received frame depends on a later GPU cache epoch ({:?}) than one we received last via `UpdateGpuCache` ({:?})", + frame.gpu_cache_frame_id, self.gpu_cache_frame_id); + + { + profile_scope!("gl.flush"); + self.device.gl().flush(); // early start on gpu cache updates + } + + self.draw_frame( + frame, + device_size, + buffer_age, + &mut results, + ); + + // TODO(nical): do this automatically by selecting counters in the wr profiler + // Profile marker for the number of invalidated picture cache + if thread_is_being_profiled() { + let duration = Duration::new(0,0); + if let Some(n) = self.profile.get(profiler::RENDERED_PICTURE_TILES) { + let message = (n as usize).to_string(); + add_text_marker("NumPictureCacheInvalidated", &message, duration); + } + } + + if device_size.is_some() { + self.draw_frame_debug_items(&frame.debug_items); + } + + self.profile.merge(profile); + } + Err(e) => { + self.renderer_errors.push(e); + } + } + + self.unlock_external_images(&frame.deferred_resolves); + + let _gm = self.gpu_profiler.start_marker("end frame"); + self.gpu_profiler.end_frame(); + + let debug_overlay = device_size.and_then(|device_size| { + // Bind a surface to draw the debug / profiler information to. + self.bind_debug_overlay(device_size).map(|draw_target| { + self.draw_render_target_debug(&draw_target); + self.draw_texture_cache_debug(&draw_target); + self.draw_gpu_cache_debug(device_size); + self.draw_zoom_debug(device_size); + self.draw_epoch_debug(); + self.draw_window_visibility_debug(); + draw_target + }) + }); + + let t = self.profile.end_time(profiler::RENDERER_TIME); + self.profile.end_time_if_started(profiler::TOTAL_FRAME_CPU_TIME); + Telemetry::record_renderer_time(Duration::from_micros((t * 1000.00) as u64)); + if self.profile.get(profiler::SHADER_BUILD_TIME).is_none() { + Telemetry::record_renderer_time_no_sc(Duration::from_micros((t * 1000.00) as u64)); + } + + let current_time = precise_time_ns(); + if device_size.is_some() { + let time = profiler::ns_to_ms(current_time - self.last_time); + self.profile.set(profiler::FRAME_TIME, time); + } + + if self.max_recorded_profiles > 0 { + while self.cpu_profiles.len() >= self.max_recorded_profiles { + self.cpu_profiles.pop_front(); + } + let cpu_profile = CpuProfile::new( + cpu_frame_id, + (self.profile.get_or(profiler::FRAME_BUILDING_TIME, 0.0) * 1000000.0) as u64, + (self.profile.get_or(profiler::RENDERER_TIME, 0.0) * 1000000.0) as u64, + self.profile.get_or(profiler::DRAW_CALLS, 0.0) as usize, + ); + self.cpu_profiles.push_back(cpu_profile); + } + + if thread_is_being_profiled() { + let duration = Duration::new(0,0); + let message = (self.profile.get_or(profiler::DRAW_CALLS, 0.0) as usize).to_string(); + add_text_marker("NumDrawCalls", &message, duration); + } + + let report = self.texture_resolver.report_memory(); + self.profile.set(profiler::RENDER_TARGET_MEM, profiler::bytes_to_mb(report.render_target_textures)); + self.profile.set(profiler::PICTURE_TILES_MEM, profiler::bytes_to_mb(report.picture_tile_textures)); + self.profile.set(profiler::ATLAS_TEXTURES_MEM, profiler::bytes_to_mb(report.atlas_textures)); + self.profile.set(profiler::STANDALONE_TEXTURES_MEM, profiler::bytes_to_mb(report.standalone_textures)); + + self.profile.set(profiler::DEPTH_TARGETS_MEM, profiler::bytes_to_mb(self.device.depth_targets_memory())); + + self.profile.set(profiler::TEXTURES_CREATED, self.device.textures_created); + self.profile.set(profiler::TEXTURES_DELETED, self.device.textures_deleted); + + results.stats.texture_upload_mb = self.profile.get_or(profiler::TEXTURE_UPLOADS_MEM, 0.0); + self.frame_counter += 1; + results.stats.resource_upload_time = self.resource_upload_time; + self.resource_upload_time = 0.0; + results.stats.gpu_cache_upload_time = self.gpu_cache_upload_time; + self.gpu_cache_upload_time = 0.0; + + if let Some(stats) = active_doc.frame_stats.take() { + // Copy the full frame stats to RendererStats + results.stats.merge(&stats); + + self.profiler.update_frame_stats(stats); + } + + // Turn the render reasons bitflags into something we can see in the profiler. + // For now this is just a binary yes/no for each bit, which means that when looking + // at "Render reasons" in the profiler HUD the average view indicates the proportion + // of frames that had the bit set over a half second window whereas max shows whether + // the bit as been set at least once during that time window. + // We could implement better ways to visualize this information. + let add_markers = thread_is_being_profiled(); + for i in 0..RenderReasons::NUM_BITS { + let counter = profiler::RENDER_REASON_FIRST + i as usize; + let mut val = 0.0; + let reason_bit = RenderReasons::from_bits_truncate(1 << i); + if active_doc.render_reasons.contains(reason_bit) { + val = 1.0; + if add_markers { + let event_str = format!("Render reason {:?}", reason_bit); + add_event_marker(&event_str); + } + } + self.profile.set(counter, val); + } + active_doc.render_reasons = RenderReasons::empty(); + + + self.texture_resolver.update_profile(&mut self.profile); + + // Note: this clears the values in self.profile. + self.profiler.set_counters(&mut self.profile); + + // Note: profile counters must be set before this or they will count for next frame. + self.profiler.update(); + + if self.debug_flags.intersects(DebugFlags::PROFILER_DBG | DebugFlags::PROFILER_CAPTURE) { + if let Some(device_size) = device_size { + //TODO: take device/pixel ratio into equation? + if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) { + self.profiler.draw_profile( + self.frame_counter, + debug_renderer, + device_size, + ); + } + } + } + + if self.debug_flags.contains(DebugFlags::ECHO_DRIVER_MESSAGES) { + self.device.echo_driver_messages(); + } + + if let Some(debug_renderer) = self.debug.try_get_mut() { + let small_screen = self.debug_flags.contains(DebugFlags::SMALL_SCREEN); + let scale = if small_screen { 1.6 } else { 1.0 }; + // TODO(gw): Tidy this up so that compositor config integrates better + // with the (non-compositor) surface y-flip options. + let surface_origin_is_top_left = match self.current_compositor_kind { + CompositorKind::Native { .. } => true, + CompositorKind::Draw { .. } => self.device.surface_origin_is_top_left(), + }; + // If there is a debug overlay, render it. Otherwise, just clear + // the debug renderer. + debug_renderer.render( + &mut self.device, + debug_overlay.and(device_size), + scale, + surface_origin_is_top_left, + ); + } + + self.staging_texture_pool.end_frame(&mut self.device); + self.texture_upload_pbo_pool.end_frame(&mut self.device); + self.device.end_frame(); + + if debug_overlay.is_some() { + self.last_time = current_time; + + // Unbind the target for the debug overlay. No debug or profiler drawing + // can occur afer this point. + self.unbind_debug_overlay(); + } + + if device_size.is_some() { + // Inform the client that we are finished this composition transaction if native + // compositing is enabled. This must be called after any debug / profiling compositor + // surfaces have been drawn and added to the visual tree. + if let CompositorKind::Native { .. } = self.current_compositor_kind { + profile_scope!("compositor.end_frame"); + let compositor = self.compositor_config.compositor().unwrap(); + compositor.end_frame(&mut self.device); + } + } + + self.documents_seen.clear(); + self.shared_texture_cache_cleared = false; + + self.check_gl_errors(); + + if self.renderer_errors.is_empty() { + Ok(results) + } else { + Err(mem::replace(&mut self.renderer_errors, Vec::new())) + } + } + + fn update_gpu_profile(&mut self, device_size: DeviceIntSize) { + let _gm = self.gpu_profiler.start_marker("build samples"); + // Block CPU waiting for last frame's GPU profiles to arrive. + // In general this shouldn't block unless heavily GPU limited. + let (gpu_frame_id, timers, samplers) = self.gpu_profiler.build_samples(); + + if self.max_recorded_profiles > 0 { + while self.gpu_profiles.len() >= self.max_recorded_profiles { + self.gpu_profiles.pop_front(); + } + + self.gpu_profiles.push_back(GpuProfile::new(gpu_frame_id, &timers)); + } + + self.profiler.set_gpu_time_queries(timers); + + if !samplers.is_empty() { + let screen_fraction = 1.0 / device_size.to_f32().area(); + + fn accumulate_sampler_value(description: &str, samplers: &[GpuSampler]) -> f32 { + let mut accum = 0.0; + for sampler in samplers { + if sampler.tag.label != description { + continue; + } + + accum += sampler.count as f32; + } + + accum + } + + let alpha_targets = accumulate_sampler_value(&"Alpha targets", &samplers) * screen_fraction; + let transparent_pass = accumulate_sampler_value(&"Transparent pass", &samplers) * screen_fraction; + let opaque_pass = accumulate_sampler_value(&"Opaque pass", &samplers) * screen_fraction; + self.profile.set(profiler::ALPHA_TARGETS_SAMPLERS, alpha_targets); + self.profile.set(profiler::TRANSPARENT_PASS_SAMPLERS, transparent_pass); + self.profile.set(profiler::OPAQUE_PASS_SAMPLERS, opaque_pass); + self.profile.set(profiler::TOTAL_SAMPLERS, alpha_targets + transparent_pass + opaque_pass); + } + } + + fn update_texture_cache(&mut self) { + profile_scope!("update_texture_cache"); + + let _gm = self.gpu_profiler.start_marker("texture cache update"); + let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]); + self.pending_texture_cache_updates = false; + + self.profile.start_time(profiler::TEXTURE_CACHE_UPDATE_TIME); + + let mut create_cache_texture_time = 0; + let mut delete_cache_texture_time = 0; + + for update_list in pending_texture_updates.drain(..) { + // Handle copies from one texture to another. + for ((src_tex, dst_tex), copies) in &update_list.copies { + + let dest_texture = &self.texture_resolver.texture_cache_map[&dst_tex].texture; + let dst_texture_size = dest_texture.get_dimensions().to_f32(); + + let mut copy_instances = Vec::new(); + for copy in copies { + copy_instances.push(CopyInstance { + src_rect: copy.src_rect.to_f32(), + dst_rect: copy.dst_rect.to_f32(), + dst_texture_size, + }); + } + + let draw_target = DrawTarget::from_texture(dest_texture, false); + self.device.bind_draw_target(draw_target); + + self.shaders + .borrow_mut() + .ps_copy + .bind( + &mut self.device, + &Transform3D::identity(), + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + self.draw_instanced_batch( + ©_instances, + VertexArrayKind::Copy, + &BatchTextures::composite_rgb( + TextureSource::TextureCache(*src_tex, Swizzle::default()) + ), + &mut RendererStats::default(), + ); + } + + // Find any textures that will need to be deleted in this group of allocations. + let mut pending_deletes = Vec::new(); + for allocation in &update_list.allocations { + let old = self.texture_resolver.texture_cache_map.remove(&allocation.id); + match allocation.kind { + TextureCacheAllocationKind::Alloc(_) => { + assert!(old.is_none(), "Renderer and backend disagree!"); + } + TextureCacheAllocationKind::Reset(_) | + TextureCacheAllocationKind::Free => { + assert!(old.is_some(), "Renderer and backend disagree!"); + } + } + if let Some(old) = old { + + // Regenerate the cache allocation info so we can search through deletes for reuse. + let size = old.texture.get_dimensions(); + let info = TextureCacheAllocInfo { + width: size.width, + height: size.height, + format: old.texture.get_format(), + filter: old.texture.get_filter(), + target: old.texture.get_target(), + is_shared_cache: old.texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE), + has_depth: old.texture.supports_depth(), + category: old.category, + }; + pending_deletes.push((old.texture, info)); + } + } + // Look for any alloc or reset that has matching alloc info and save it from being deleted. + let mut reused_textures = VecDeque::with_capacity(pending_deletes.len()); + for allocation in &update_list.allocations { + match allocation.kind { + TextureCacheAllocationKind::Alloc(ref info) | + TextureCacheAllocationKind::Reset(ref info) => { + reused_textures.push_back( + pending_deletes.iter() + .position(|(_, old_info)| *old_info == *info) + .map(|index| pending_deletes.swap_remove(index).0) + ); + } + TextureCacheAllocationKind::Free => {} + } + } + + // Now that we've saved as many deletions for reuse as we can, actually delete whatever is left. + if !pending_deletes.is_empty() { + let delete_texture_start = precise_time_ns(); + for (texture, _) in pending_deletes { + add_event_marker("TextureCacheFree"); + self.device.delete_texture(texture); + } + delete_cache_texture_time += precise_time_ns() - delete_texture_start; + } + + for allocation in update_list.allocations { + match allocation.kind { + TextureCacheAllocationKind::Alloc(_) => add_event_marker("TextureCacheAlloc"), + TextureCacheAllocationKind::Reset(_) => add_event_marker("TextureCacheReset"), + TextureCacheAllocationKind::Free => {} + }; + match allocation.kind { + TextureCacheAllocationKind::Alloc(ref info) | + TextureCacheAllocationKind::Reset(ref info) => { + let create_cache_texture_start = precise_time_ns(); + // Create a new native texture, as requested by the texture cache. + // If we managed to reuse a deleted texture, then prefer that instead. + // + // Ensure no PBO is bound when creating the texture storage, + // or GL will attempt to read data from there. + let mut texture = reused_textures.pop_front().unwrap_or(None).unwrap_or_else(|| { + self.device.create_texture( + info.target, + info.format, + info.width, + info.height, + info.filter, + // This needs to be a render target because some render + // tasks get rendered into the texture cache. + Some(RenderTargetInfo { has_depth: info.has_depth }), + ) + }); + + if info.is_shared_cache { + texture.flags_mut() + .insert(TextureFlags::IS_SHARED_TEXTURE_CACHE); + + // On Mali-Gxx devices we use batched texture uploads as it performs much better. + // However, due to another driver bug we must ensure the textures are fully cleared, + // otherwise we get visual artefacts when blitting to the texture cache. + if self.device.use_batched_texture_uploads() && + !self.device.get_capabilities().supports_render_target_partial_update + { + self.clear_texture(&texture, [0.0; 4]); + } + + // Textures in the cache generally don't need to be cleared, + // but we do so if the debug display is active to make it + // easier to identify unallocated regions. + if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) { + self.clear_texture(&texture, TEXTURE_CACHE_DBG_CLEAR_COLOR); + } + } + + create_cache_texture_time += precise_time_ns() - create_cache_texture_start; + + self.texture_resolver.texture_cache_map.insert(allocation.id, CacheTexture { + texture, + category: info.category, + }); + } + TextureCacheAllocationKind::Free => {} + }; + } + + upload_to_texture_cache(self, update_list.updates); + + self.check_gl_errors(); + } + + if create_cache_texture_time > 0 { + self.profile.set( + profiler::CREATE_CACHE_TEXTURE_TIME, + profiler::ns_to_ms(create_cache_texture_time) + ); + } + if delete_cache_texture_time > 0 { + self.profile.set( + profiler::DELETE_CACHE_TEXTURE_TIME, + profiler::ns_to_ms(delete_cache_texture_time) + ) + } + + let t = self.profile.end_time(profiler::TEXTURE_CACHE_UPDATE_TIME); + self.resource_upload_time += t; + Telemetry::record_texture_cache_update_time(Duration::from_micros((t * 1000.00) as u64)); + + drain_filter( + &mut self.notifications, + |n| { n.when() == Checkpoint::FrameTexturesUpdated }, + |n| { n.notify(); }, + ); + } + + fn check_gl_errors(&mut self) { + let err = self.device.gl().get_error(); + if err == gl::OUT_OF_MEMORY { + self.renderer_errors.push(RendererError::OutOfMemory); + } + + // Probably should check for other errors? + } + + fn bind_textures(&mut self, textures: &BatchTextures) { + for i in 0 .. 3 { + self.texture_resolver.bind( + &textures.input.colors[i], + TextureSampler::color(i), + &mut self.device, + ); + } + + self.texture_resolver.bind( + &textures.clip_mask, + TextureSampler::ClipMask, + &mut self.device, + ); + + // TODO: this probably isn't the best place for this. + if let Some(ref texture) = self.dither_matrix_texture { + self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default()); + } + } + + fn draw_instanced_batch<T: Clone>( + &mut self, + data: &[T], + vertex_array_kind: VertexArrayKind, + textures: &BatchTextures, + stats: &mut RendererStats, + ) { + self.bind_textures(textures); + + // If we end up with an empty draw call here, that means we have + // probably introduced unnecessary batch breaks during frame + // building - so we should be catching this earlier and removing + // the batch. + debug_assert!(!data.is_empty()); + + let vao = &self.vaos[vertex_array_kind]; + self.device.bind_vao(vao); + + let chunk_size = if self.debug_flags.contains(DebugFlags::DISABLE_BATCHING) { + 1 + } else if vertex_array_kind == VertexArrayKind::Primitive { + self.max_primitive_instance_count + } else { + data.len() + }; + + for chunk in data.chunks(chunk_size) { + if self.enable_instancing { + self.device + .update_vao_instances(vao, chunk, ONE_TIME_USAGE_HINT, None); + self.device + .draw_indexed_triangles_instanced_u16(6, chunk.len() as i32); + } else { + self.device + .update_vao_instances(vao, chunk, ONE_TIME_USAGE_HINT, NonZeroUsize::new(4)); + self.device + .draw_indexed_triangles(6 * chunk.len() as i32); + } + self.profile.inc(profiler::DRAW_CALLS); + stats.total_draw_calls += 1; + } + + self.profile.add(profiler::VERTICES, 6 * data.len()); + } + + fn handle_readback_composite( + &mut self, + draw_target: DrawTarget, + uses_scissor: bool, + backdrop: &RenderTask, + readback: &RenderTask, + ) { + // Extract the rectangle in the backdrop surface's device space of where + // we need to read from. + let readback_origin = match readback.kind { + RenderTaskKind::Readback(ReadbackTask { readback_origin: Some(o), .. }) => o, + RenderTaskKind::Readback(ReadbackTask { readback_origin: None, .. }) => { + // If this is a dummy readback, just early out. We know that the + // clear of the target will ensure the task rect is already zero alpha, + // so it won't affect the rendering output. + return; + } + _ => unreachable!(), + }; + + if uses_scissor { + self.device.disable_scissor(); + } + + let texture_source = TextureSource::TextureCache( + readback.get_target_texture(), + Swizzle::default(), + ); + let (cache_texture, _) = self.texture_resolver + .resolve(&texture_source).expect("bug: no source texture"); + + // Before submitting the composite batch, do the + // framebuffer readbacks that are needed for each + // composite operation in this batch. + let readback_rect = readback.get_target_rect(); + let backdrop_rect = backdrop.get_target_rect(); + let (backdrop_screen_origin, _) = match backdrop.kind { + RenderTaskKind::Picture(ref task_info) => (task_info.content_origin, task_info.device_pixel_scale), + _ => panic!("bug: composite on non-picture?"), + }; + + // Bind the FBO to blit the backdrop to. + // Called per-instance in case the FBO changes. The device will skip + // the GL call if the requested target is already bound. + let cache_draw_target = DrawTarget::from_texture( + cache_texture, + false, + ); + + // Get the rect that we ideally want, in space of the parent surface + let wanted_rect = DeviceRect::from_origin_and_size( + readback_origin, + readback_rect.size().to_f32(), + ); + + // Get the rect that is available on the parent surface. It may be smaller + // than desired because this is a picture cache tile covering only part of + // the wanted rect and/or because the parent surface was clipped. + let avail_rect = DeviceRect::from_origin_and_size( + backdrop_screen_origin, + backdrop_rect.size().to_f32(), + ); + + if let Some(int_rect) = wanted_rect.intersection(&avail_rect) { + // If there is a valid intersection, work out the correct origins and + // sizes of the copy rects, and do the blit. + let copy_size = int_rect.size().to_i32(); + + let src_origin = backdrop_rect.min.to_f32() + + int_rect.min.to_vector() - + backdrop_screen_origin.to_vector(); + + let src = DeviceIntRect::from_origin_and_size( + src_origin.to_i32(), + copy_size, + ); + + let dest_origin = readback_rect.min.to_f32() + + int_rect.min.to_vector() - + readback_origin.to_vector(); + + let dest = DeviceIntRect::from_origin_and_size( + dest_origin.to_i32(), + copy_size, + ); + + // Should always be drawing to picture cache tiles or off-screen surface! + debug_assert!(!draw_target.is_default()); + let device_to_framebuffer = Scale::new(1i32); + + self.device.blit_render_target( + draw_target.into(), + src * device_to_framebuffer, + cache_draw_target, + dest * device_to_framebuffer, + TextureFilter::Linear, + ); + } + + // Restore draw target to current pass render target, and reset + // the read target. + self.device.bind_draw_target(draw_target); + self.device.reset_read_target(); + + if uses_scissor { + self.device.enable_scissor(); + } + } + + fn handle_resolves( + &mut self, + resolve_ops: &[ResolveOp], + render_tasks: &RenderTaskGraph, + draw_target: DrawTarget, + ) { + if resolve_ops.is_empty() { + return; + } + + let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLIT); + + for resolve_op in resolve_ops { + self.handle_resolve( + resolve_op, + render_tasks, + draw_target, + ); + } + + self.device.reset_read_target(); + } + + fn handle_prims( + &mut self, + draw_target: &DrawTarget, + prim_instances: &[PrimitiveInstanceData], + prim_instances_with_scissor: &FastHashMap<DeviceIntRect, Vec<PrimitiveInstanceData>>, + projection: &default::Transform3D<f32>, + stats: &mut RendererStats, + ) { + self.device.disable_depth_write(); + + { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_INDIRECT_PRIM); + + if !prim_instances.is_empty() { + self.set_blend(false, FramebufferKind::Other); + + self.shaders.borrow_mut().ps_quad_textured.bind( + &mut self.device, + projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + self.draw_instanced_batch( + prim_instances, + VertexArrayKind::Primitive, + &BatchTextures::empty(), + stats, + ); + } + + if !prim_instances_with_scissor.is_empty() { + self.set_blend(true, FramebufferKind::Other); + self.device.set_blend_mode_premultiplied_alpha(); + self.device.enable_scissor(); + + self.shaders.borrow_mut().ps_quad_textured.bind( + &mut self.device, + projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + for (scissor_rect, prim_instances) in prim_instances_with_scissor { + self.device.set_scissor_rect(draw_target.to_framebuffer_rect(*scissor_rect)); + + self.draw_instanced_batch( + prim_instances, + VertexArrayKind::Primitive, + &BatchTextures::empty(), + stats, + ); + } + + self.device.disable_scissor(); + } + } + } + + fn handle_clips( + &mut self, + draw_target: &DrawTarget, + masks: &ClipMaskInstanceList, + projection: &default::Transform3D<f32>, + stats: &mut RendererStats, + ) { + self.device.disable_depth_write(); + + { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_INDIRECT_MASK); + + self.set_blend(true, FramebufferKind::Other); + self.set_blend_mode_multiply(FramebufferKind::Other); + + if !masks.mask_instances_fast.is_empty() { + self.shaders.borrow_mut().ps_mask_fast.bind( + &mut self.device, + projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + self.draw_instanced_batch( + &masks.mask_instances_fast, + VertexArrayKind::Mask, + &BatchTextures::empty(), + stats, + ); + } + + if !masks.mask_instances_fast_with_scissor.is_empty() { + self.shaders.borrow_mut().ps_mask_fast.bind( + &mut self.device, + projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + self.device.enable_scissor(); + + for (scissor_rect, instances) in &masks.mask_instances_fast_with_scissor { + self.device.set_scissor_rect(draw_target.to_framebuffer_rect(*scissor_rect)); + + self.draw_instanced_batch( + instances, + VertexArrayKind::Mask, + &BatchTextures::empty(), + stats, + ); + } + + self.device.disable_scissor(); + } + + if !masks.image_mask_instances.is_empty() { + self.shaders.borrow_mut().ps_quad_textured.bind( + &mut self.device, + projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + for (texture, prim_instances) in &masks.image_mask_instances { + self.draw_instanced_batch( + prim_instances, + VertexArrayKind::Primitive, + &BatchTextures::composite_rgb(*texture), + stats, + ); + } + } + + if !masks.image_mask_instances_with_scissor.is_empty() { + self.device.enable_scissor(); + + self.shaders.borrow_mut().ps_quad_textured.bind( + &mut self.device, + projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + for ((scissor_rect, texture), prim_instances) in &masks.image_mask_instances_with_scissor { + self.device.set_scissor_rect(draw_target.to_framebuffer_rect(*scissor_rect)); + + self.draw_instanced_batch( + prim_instances, + VertexArrayKind::Primitive, + &BatchTextures::composite_rgb(*texture), + stats, + ); + } + + self.device.disable_scissor(); + } + + if !masks.mask_instances_slow.is_empty() { + self.shaders.borrow_mut().ps_mask.bind( + &mut self.device, + projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + self.draw_instanced_batch( + &masks.mask_instances_slow, + VertexArrayKind::Mask, + &BatchTextures::empty(), + stats, + ); + } + + if !masks.mask_instances_slow_with_scissor.is_empty() { + self.shaders.borrow_mut().ps_mask.bind( + &mut self.device, + projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + self.device.enable_scissor(); + + for (scissor_rect, instances) in &masks.mask_instances_slow_with_scissor { + self.device.set_scissor_rect(draw_target.to_framebuffer_rect(*scissor_rect)); + + self.draw_instanced_batch( + instances, + VertexArrayKind::Mask, + &BatchTextures::empty(), + stats, + ); + } + + self.device.disable_scissor(); + } + } + } + + fn handle_blits( + &mut self, + blits: &[BlitJob], + render_tasks: &RenderTaskGraph, + draw_target: DrawTarget, + ) { + if blits.is_empty() { + return; + } + + let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLIT); + + // TODO(gw): For now, we don't bother batching these by source texture. + // If if ever shows up as an issue, we can easily batch them. + for blit in blits { + let (source, source_rect) = { + // A blit from the child render task into this target. + // TODO(gw): Support R8 format here once we start + // creating mips for alpha masks. + let task = &render_tasks[blit.source]; + let source_rect = task.get_target_rect(); + let source_texture = task.get_texture_source(); + + (source_texture, source_rect) + }; + + debug_assert_eq!(source_rect.size(), blit.target_rect.size()); + let (texture, swizzle) = self.texture_resolver + .resolve(&source) + .expect("BUG: invalid source texture"); + + if swizzle != Swizzle::default() { + error!("Swizzle {:?} can't be handled by a blit", swizzle); + } + + let read_target = DrawTarget::from_texture( + texture, + false, + ); + + self.device.blit_render_target( + read_target.into(), + read_target.to_framebuffer_rect(source_rect), + draw_target, + draw_target.to_framebuffer_rect(blit.target_rect), + TextureFilter::Linear, + ); + } + } + + fn handle_scaling( + &mut self, + scalings: &FastHashMap<TextureSource, Vec<ScalingInstance>>, + projection: &default::Transform3D<f32>, + stats: &mut RendererStats, + ) { + if scalings.is_empty() { + return + } + + let _timer = self.gpu_profiler.start_timer(GPU_TAG_SCALE); + + for (source, instances) in scalings { + let buffer_kind = source.image_buffer_kind(); + + // When the source texture is an external texture, the UV rect is not known + // when the external surface descriptor is created, because external textures + // are not resolved until the lock() callback is invoked at the start of the + // frame render. We must therefore override the source rects now. + let uv_override_instances; + let instances = match source { + TextureSource::External(..) => { + uv_override_instances = instances.iter().map(|instance| { + let texel_rect: TexelRect = self.texture_resolver.get_uv_rect( + &source, + instance.source_rect.cast().into() + ).into(); + ScalingInstance { + target_rect: instance.target_rect, + source_rect: DeviceRect::new(texel_rect.uv0, texel_rect.uv1), + } + }).collect::<Vec<_>>(); + &uv_override_instances + } + _ => &instances + }; + + self.shaders + .borrow_mut() + .get_scale_shader(buffer_kind) + .bind( + &mut self.device, + &projection, + Some(self.texture_resolver.get_texture_size(source).to_f32()), + &mut self.renderer_errors, + &mut self.profile, + ); + + self.draw_instanced_batch( + instances, + VertexArrayKind::Scale, + &BatchTextures::composite_rgb(*source), + stats, + ); + } + } + + fn handle_svg_filters( + &mut self, + textures: &BatchTextures, + svg_filters: &[SvgFilterInstance], + projection: &default::Transform3D<f32>, + stats: &mut RendererStats, + ) { + if svg_filters.is_empty() { + return; + } + + let _timer = self.gpu_profiler.start_timer(GPU_TAG_SVG_FILTER); + + self.shaders.borrow_mut().cs_svg_filter.bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + self.draw_instanced_batch( + &svg_filters, + VertexArrayKind::SvgFilter, + textures, + stats, + ); + } + + fn handle_resolve( + &mut self, + resolve_op: &ResolveOp, + render_tasks: &RenderTaskGraph, + draw_target: DrawTarget, + ) { + for src_task_id in &resolve_op.src_task_ids { + let src_task = &render_tasks[*src_task_id]; + let src_info = match src_task.kind { + RenderTaskKind::Picture(ref info) => info, + _ => panic!("bug: not a picture"), + }; + let src_task_rect = src_task.get_target_rect().to_f32(); + + let dest_task = &render_tasks[resolve_op.dest_task_id]; + let dest_info = match dest_task.kind { + RenderTaskKind::Picture(ref info) => info, + _ => panic!("bug: not a picture"), + }; + let dest_task_rect = dest_task.get_target_rect().to_f32(); + + // Get the rect that we ideally want, in space of the parent surface + let wanted_rect = DeviceRect::from_origin_and_size( + dest_info.content_origin, + dest_task_rect.size().to_f32(), + ).cast_unit() * dest_info.device_pixel_scale.inverse(); + + // Get the rect that is available on the parent surface. It may be smaller + // than desired because this is a picture cache tile covering only part of + // the wanted rect and/or because the parent surface was clipped. + let avail_rect = DeviceRect::from_origin_and_size( + src_info.content_origin, + src_task_rect.size().to_f32(), + ).cast_unit() * src_info.device_pixel_scale.inverse(); + + if let Some(device_int_rect) = wanted_rect.intersection(&avail_rect) { + let src_int_rect = (device_int_rect * src_info.device_pixel_scale).cast_unit(); + let dest_int_rect = (device_int_rect * dest_info.device_pixel_scale).cast_unit(); + + // If there is a valid intersection, work out the correct origins and + // sizes of the copy rects, and do the blit. + + let src_origin = src_task_rect.min.to_f32() + + src_int_rect.min.to_vector() - + src_info.content_origin.to_vector(); + + let src = DeviceIntRect::from_origin_and_size( + src_origin.to_i32(), + src_int_rect.size().round().to_i32(), + ); + + let dest_origin = dest_task_rect.min.to_f32() + + dest_int_rect.min.to_vector() - + dest_info.content_origin.to_vector(); + + let dest = DeviceIntRect::from_origin_and_size( + dest_origin.to_i32(), + dest_int_rect.size().round().to_i32(), + ); + + let texture_source = TextureSource::TextureCache( + src_task.get_target_texture(), + Swizzle::default(), + ); + let (cache_texture, _) = self.texture_resolver + .resolve(&texture_source).expect("bug: no source texture"); + + let read_target = ReadTarget::from_texture(cache_texture); + + // Should always be drawing to picture cache tiles or off-screen surface! + debug_assert!(!draw_target.is_default()); + let device_to_framebuffer = Scale::new(1i32); + + self.device.blit_render_target( + read_target, + src * device_to_framebuffer, + draw_target, + dest * device_to_framebuffer, + TextureFilter::Linear, + ); + } + } + } + + fn draw_picture_cache_target( + &mut self, + target: &PictureCacheTarget, + draw_target: DrawTarget, + projection: &default::Transform3D<f32>, + render_tasks: &RenderTaskGraph, + stats: &mut RendererStats, + ) { + profile_scope!("draw_picture_cache_target"); + + self.profile.inc(profiler::RENDERED_PICTURE_TILES); + let _gm = self.gpu_profiler.start_marker("picture cache target"); + let framebuffer_kind = FramebufferKind::Other; + + { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET); + self.device.bind_draw_target(draw_target); + + if self.device.get_capabilities().supports_qcom_tiled_rendering { + self.device.gl().start_tiling_qcom( + target.dirty_rect.min.x.max(0) as _, + target.dirty_rect.min.y.max(0) as _, + target.dirty_rect.width() as _, + target.dirty_rect.height() as _, + 0, + ); + } + + self.device.enable_depth_write(); + self.set_blend(false, framebuffer_kind); + + let clear_color = target.clear_color.map(|c| c.to_array()); + let scissor_rect = if self.device.get_capabilities().supports_render_target_partial_update + && (target.dirty_rect != target.valid_rect + || self.device.get_capabilities().prefers_clear_scissor) + { + Some(target.dirty_rect) + } else { + None + }; + match scissor_rect { + // If updating only a dirty rect within a picture cache target, the + // clear must also be scissored to that dirty region. + Some(r) if self.clear_caches_with_quads => { + self.device.enable_depth(DepthFunction::Always); + // Save the draw call count so that our reftests don't get confused... + let old_draw_call_count = stats.total_draw_calls; + if clear_color.is_none() { + self.device.disable_color_write(); + } + let instance = ClearInstance { + rect: [ + r.min.x as f32, r.min.y as f32, + r.max.x as f32, r.max.y as f32, + ], + color: clear_color.unwrap_or([0.0; 4]), + }; + self.shaders.borrow_mut().ps_clear.bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + self.draw_instanced_batch( + &[instance], + VertexArrayKind::Clear, + &BatchTextures::empty(), + stats, + ); + if clear_color.is_none() { + self.device.enable_color_write(); + } + stats.total_draw_calls = old_draw_call_count; + self.device.disable_depth(); + } + other => { + let scissor_rect = other.map(|rect| { + draw_target.build_scissor_rect(Some(rect)) + }); + self.device.clear_target(clear_color, Some(1.0), scissor_rect); + } + }; + self.device.disable_depth_write(); + } + + match target.kind { + PictureCacheTargetKind::Draw { ref alpha_batch_container } => { + self.draw_alpha_batch_container( + alpha_batch_container, + draw_target, + framebuffer_kind, + projection, + render_tasks, + stats, + ); + } + PictureCacheTargetKind::Blit { task_id, sub_rect_offset } => { + let src_task = &render_tasks[task_id]; + let (texture, _swizzle) = self.texture_resolver + .resolve(&src_task.get_texture_source()) + .expect("BUG: invalid source texture"); + + let src_task_rect = src_task.get_target_rect(); + + let p0 = src_task_rect.min + sub_rect_offset; + let p1 = p0 + target.dirty_rect.size(); + let src_rect = DeviceIntRect::new(p0, p1); + + // TODO(gw): In future, it'd be tidier to have the draw target offset + // for DC surfaces handled by `blit_render_target`. However, + // for now they are only ever written to here. + let target_rect = target + .dirty_rect + .translate(draw_target.offset().to_vector()) + .cast_unit(); + + self.device.blit_render_target( + ReadTarget::from_texture(texture), + src_rect.cast_unit(), + draw_target, + target_rect, + TextureFilter::Nearest, + ); + } + } + + self.device.invalidate_depth_target(); + if self.device.get_capabilities().supports_qcom_tiled_rendering { + self.device.gl().end_tiling_qcom(gl::COLOR_BUFFER_BIT0_QCOM); + } + } + + /// Draw an alpha batch container into a given draw target. This is used + /// by both color and picture cache target kinds. + fn draw_alpha_batch_container( + &mut self, + alpha_batch_container: &AlphaBatchContainer, + draw_target: DrawTarget, + framebuffer_kind: FramebufferKind, + projection: &default::Transform3D<f32>, + render_tasks: &RenderTaskGraph, + stats: &mut RendererStats, + ) { + let uses_scissor = alpha_batch_container.task_scissor_rect.is_some(); + + if uses_scissor { + self.device.enable_scissor(); + let scissor_rect = draw_target.build_scissor_rect( + alpha_batch_container.task_scissor_rect, + ); + self.device.set_scissor_rect(scissor_rect) + } + + if !alpha_batch_container.opaque_batches.is_empty() + && !self.debug_flags.contains(DebugFlags::DISABLE_OPAQUE_PASS) { + let _gl = self.gpu_profiler.start_marker("opaque batches"); + let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE); + self.set_blend(false, framebuffer_kind); + //Note: depth equality is needed for split planes + self.device.enable_depth(DepthFunction::LessEqual); + self.device.enable_depth_write(); + + // Draw opaque batches front-to-back for maximum + // z-buffer efficiency! + for batch in alpha_batch_container + .opaque_batches + .iter() + .rev() + { + if should_skip_batch(&batch.key.kind, self.debug_flags) { + continue; + } + + self.shaders.borrow_mut() + .get(&batch.key, batch.features, self.debug_flags, &self.device) + .bind( + &mut self.device, projection, None, + &mut self.renderer_errors, + &mut self.profile, + ); + + let _timer = self.gpu_profiler.start_timer(batch.key.kind.sampler_tag()); + self.draw_instanced_batch( + &batch.instances, + VertexArrayKind::Primitive, + &batch.key.textures, + stats + ); + } + + self.device.disable_depth_write(); + self.gpu_profiler.finish_sampler(opaque_sampler); + } else { + self.device.disable_depth(); + } + + if !alpha_batch_container.alpha_batches.is_empty() + && !self.debug_flags.contains(DebugFlags::DISABLE_ALPHA_PASS) { + let _gl = self.gpu_profiler.start_marker("alpha batches"); + let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT); + self.set_blend(true, framebuffer_kind); + + let mut prev_blend_mode = BlendMode::None; + let shaders_rc = self.shaders.clone(); + + for batch in &alpha_batch_container.alpha_batches { + if should_skip_batch(&batch.key.kind, self.debug_flags) { + continue; + } + + let mut shaders = shaders_rc.borrow_mut(); + let shader = shaders.get( + &batch.key, + batch.features | BatchFeatures::ALPHA_PASS, + self.debug_flags, + &self.device, + ); + + if batch.key.blend_mode != prev_blend_mode { + match batch.key.blend_mode { + _ if self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) && + framebuffer_kind == FramebufferKind::Main => { + self.device.set_blend_mode_show_overdraw(); + } + BlendMode::None => { + unreachable!("bug: opaque blend in alpha pass"); + } + BlendMode::Alpha => { + self.device.set_blend_mode_alpha(); + } + BlendMode::PremultipliedAlpha => { + self.device.set_blend_mode_premultiplied_alpha(); + } + BlendMode::PremultipliedDestOut => { + self.device.set_blend_mode_premultiplied_dest_out(); + } + BlendMode::SubpixelDualSource => { + self.device.set_blend_mode_subpixel_dual_source(); + } + BlendMode::Advanced(mode) => { + if self.enable_advanced_blend_barriers { + self.device.gl().blend_barrier_khr(); + } + self.device.set_blend_mode_advanced(mode); + } + BlendMode::MultiplyDualSource => { + self.device.set_blend_mode_multiply_dual_source(); + } + BlendMode::Screen => { + self.device.set_blend_mode_screen(); + } + BlendMode::Exclusion => { + self.device.set_blend_mode_exclusion(); + } + BlendMode::PlusLighter => { + self.device.set_blend_mode_plus_lighter(); + } + } + prev_blend_mode = batch.key.blend_mode; + } + + // Handle special case readback for composites. + if let BatchKind::Brush(BrushBatchKind::MixBlend { task_id, backdrop_id }) = batch.key.kind { + // composites can't be grouped together because + // they may overlap and affect each other. + debug_assert_eq!(batch.instances.len(), 1); + self.handle_readback_composite( + draw_target, + uses_scissor, + &render_tasks[task_id], + &render_tasks[backdrop_id], + ); + } + + let _timer = self.gpu_profiler.start_timer(batch.key.kind.sampler_tag()); + shader.bind( + &mut self.device, + projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + self.draw_instanced_batch( + &batch.instances, + VertexArrayKind::Primitive, + &batch.key.textures, + stats + ); + } + + self.set_blend(false, framebuffer_kind); + self.gpu_profiler.finish_sampler(transparent_sampler); + } + + self.device.disable_depth(); + if uses_scissor { + self.device.disable_scissor(); + } + } + + /// Rasterize any external compositor surfaces that require updating + fn update_external_native_surfaces( + &mut self, + external_surfaces: &[ResolvedExternalSurface], + results: &mut RenderResults, + ) { + if external_surfaces.is_empty() { + return; + } + + let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE); + + self.device.disable_depth(); + self.set_blend(false, FramebufferKind::Main); + + for surface in external_surfaces { + // See if this surface needs to be updated + let (native_surface_id, surface_size) = match surface.update_params { + Some(params) => params, + None => continue, + }; + + // When updating an external surface, the entire surface rect is used + // for all of the draw, dirty, valid and clip rect parameters. + let surface_rect = surface_size.into(); + + // Bind the native compositor surface to update + let surface_info = self.compositor_config + .compositor() + .unwrap() + .bind( + &mut self.device, + NativeTileId { + surface_id: native_surface_id, + x: 0, + y: 0, + }, + surface_rect, + surface_rect, + ); + + // Bind the native surface to current FBO target + let draw_target = DrawTarget::NativeSurface { + offset: surface_info.origin, + external_fbo_id: surface_info.fbo_id, + dimensions: surface_size, + }; + self.device.bind_draw_target(draw_target); + + let projection = Transform3D::ortho( + 0.0, + surface_size.width as f32, + 0.0, + surface_size.height as f32, + self.device.ortho_near_plane(), + self.device.ortho_far_plane(), + ); + + let ( textures, instance ) = match surface.color_data { + ResolvedExternalSurfaceColorData::Yuv{ + ref planes, color_space, format, channel_bit_depth, .. } => { + + // Bind an appropriate YUV shader for the texture format kind + self.shaders + .borrow_mut() + .get_composite_shader( + CompositeSurfaceFormat::Yuv, + surface.image_buffer_kind, + CompositeFeatures::empty(), + ).bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + let textures = BatchTextures::composite_yuv( + planes[0].texture, + planes[1].texture, + planes[2].texture, + ); + + // When the texture is an external texture, the UV rect is not known when + // the external surface descriptor is created, because external textures + // are not resolved until the lock() callback is invoked at the start of + // the frame render. To handle this, query the texture resolver for the + // UV rect if it's an external texture, otherwise use the default UV rect. + let uv_rects = [ + self.texture_resolver.get_uv_rect(&textures.input.colors[0], planes[0].uv_rect), + self.texture_resolver.get_uv_rect(&textures.input.colors[1], planes[1].uv_rect), + self.texture_resolver.get_uv_rect(&textures.input.colors[2], planes[2].uv_rect), + ]; + + let instance = CompositeInstance::new_yuv( + surface_rect.cast_unit().to_f32(), + surface_rect.to_f32(), + // z-id is not relevant when updating a native compositor surface. + // TODO(gw): Support compositor surfaces without z-buffer, for memory / perf win here. + color_space, + format, + channel_bit_depth, + uv_rects, + CompositorTransform::identity(), + ); + + ( textures, instance ) + }, + ResolvedExternalSurfaceColorData::Rgb{ ref plane, .. } => { + self.shaders + .borrow_mut() + .get_composite_shader( + CompositeSurfaceFormat::Rgba, + surface.image_buffer_kind, + CompositeFeatures::empty(), + ).bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + let textures = BatchTextures::composite_rgb(plane.texture); + let uv_rect = self.texture_resolver.get_uv_rect(&textures.input.colors[0], plane.uv_rect); + let instance = CompositeInstance::new_rgb( + surface_rect.cast_unit().to_f32(), + surface_rect.to_f32(), + PremultipliedColorF::WHITE, + uv_rect, + CompositorTransform::identity(), + ); + + ( textures, instance ) + }, + }; + + self.draw_instanced_batch( + &[instance], + VertexArrayKind::Composite, + &textures, + &mut results.stats, + ); + + self.compositor_config + .compositor() + .unwrap() + .unbind(&mut self.device); + } + + self.gpu_profiler.finish_sampler(opaque_sampler); + } + + /// Draw a list of tiles to the framebuffer + fn draw_tile_list<'a, I: Iterator<Item = &'a occlusion::Item>>( + &mut self, + tiles_iter: I, + composite_state: &CompositeState, + external_surfaces: &[ResolvedExternalSurface], + projection: &default::Transform3D<f32>, + stats: &mut RendererStats, + ) { + let mut current_shader_params = ( + CompositeSurfaceFormat::Rgba, + ImageBufferKind::Texture2D, + CompositeFeatures::empty(), + None, + ); + let mut current_textures = BatchTextures::empty(); + let mut instances = Vec::new(); + + self.shaders + .borrow_mut() + .get_composite_shader( + current_shader_params.0, + current_shader_params.1, + current_shader_params.2, + ).bind( + &mut self.device, + projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + for item in tiles_iter { + let tile = &composite_state.tiles[item.key]; + + let clip_rect = item.rectangle; + let tile_rect = tile.local_rect; + let transform = composite_state.get_device_transform(tile.transform_index).into(); + + // Work out the draw params based on the tile surface + let (instance, textures, shader_params) = match tile.surface { + CompositeTileSurface::Color { color } => { + let dummy = TextureSource::Dummy; + let image_buffer_kind = dummy.image_buffer_kind(); + let instance = CompositeInstance::new( + tile_rect, + clip_rect, + color.premultiplied(), + transform, + ); + let features = instance.get_rgb_features(); + ( + instance, + BatchTextures::composite_rgb(dummy), + (CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None), + ) + } + CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::TextureCache { texture } } => { + let instance = CompositeInstance::new( + tile_rect, + clip_rect, + PremultipliedColorF::WHITE, + transform, + ); + let features = instance.get_rgb_features(); + ( + instance, + BatchTextures::composite_rgb(texture), + ( + CompositeSurfaceFormat::Rgba, + ImageBufferKind::Texture2D, + features, + None, + ), + ) + } + CompositeTileSurface::ExternalSurface { external_surface_index } => { + let surface = &external_surfaces[external_surface_index.0]; + + match surface.color_data { + ResolvedExternalSurfaceColorData::Yuv{ ref planes, color_space, format, channel_bit_depth, .. } => { + let textures = BatchTextures::composite_yuv( + planes[0].texture, + planes[1].texture, + planes[2].texture, + ); + + // When the texture is an external texture, the UV rect is not known when + // the external surface descriptor is created, because external textures + // are not resolved until the lock() callback is invoked at the start of + // the frame render. To handle this, query the texture resolver for the + // UV rect if it's an external texture, otherwise use the default UV rect. + let uv_rects = [ + self.texture_resolver.get_uv_rect(&textures.input.colors[0], planes[0].uv_rect), + self.texture_resolver.get_uv_rect(&textures.input.colors[1], planes[1].uv_rect), + self.texture_resolver.get_uv_rect(&textures.input.colors[2], planes[2].uv_rect), + ]; + + ( + CompositeInstance::new_yuv( + tile_rect, + clip_rect, + color_space, + format, + channel_bit_depth, + uv_rects, + transform, + ), + textures, + ( + CompositeSurfaceFormat::Yuv, + surface.image_buffer_kind, + CompositeFeatures::empty(), + None + ), + ) + }, + ResolvedExternalSurfaceColorData::Rgb { ref plane, .. } => { + let uv_rect = self.texture_resolver.get_uv_rect(&plane.texture, plane.uv_rect); + let instance = CompositeInstance::new_rgb( + tile_rect, + clip_rect, + PremultipliedColorF::WHITE, + uv_rect, + transform, + ); + let features = instance.get_rgb_features(); + ( + instance, + BatchTextures::composite_rgb(plane.texture), + ( + CompositeSurfaceFormat::Rgba, + surface.image_buffer_kind, + features, + Some(self.texture_resolver.get_texture_size(&plane.texture).to_f32()), + ), + ) + }, + } + } + CompositeTileSurface::Clear => { + let dummy = TextureSource::Dummy; + let image_buffer_kind = dummy.image_buffer_kind(); + let instance = CompositeInstance::new( + tile_rect, + clip_rect, + PremultipliedColorF::BLACK, + transform, + ); + let features = instance.get_rgb_features(); + ( + instance, + BatchTextures::composite_rgb(dummy), + (CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None), + ) + } + CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::Native { .. } } => { + unreachable!("bug: found native surface in simple composite path"); + } + }; + + // Flush batch if shader params or textures changed + let flush_batch = !current_textures.is_compatible_with(&textures) || + shader_params != current_shader_params; + + if flush_batch { + if !instances.is_empty() { + self.draw_instanced_batch( + &instances, + VertexArrayKind::Composite, + ¤t_textures, + stats, + ); + instances.clear(); + } + } + + if shader_params != current_shader_params { + self.shaders + .borrow_mut() + .get_composite_shader(shader_params.0, shader_params.1, shader_params.2) + .bind( + &mut self.device, + projection, + shader_params.3, + &mut self.renderer_errors, + &mut self.profile, + ); + + current_shader_params = shader_params; + } + + current_textures = textures; + + // Add instance to current batch + instances.push(instance); + } + + // Flush the last batch + if !instances.is_empty() { + self.draw_instanced_batch( + &instances, + VertexArrayKind::Composite, + ¤t_textures, + stats, + ); + } + } + + /// Composite picture cache tiles into the framebuffer. This is currently + /// the only way that picture cache tiles get drawn. In future, the tiles + /// will often be handed to the OS compositor, and this method will be + /// rarely used. + fn composite_simple( + &mut self, + composite_state: &CompositeState, + draw_target: DrawTarget, + projection: &default::Transform3D<f32>, + results: &mut RenderResults, + partial_present_mode: Option<PartialPresentMode>, + ) { + let _gm = self.gpu_profiler.start_marker("framebuffer"); + let _timer = self.gpu_profiler.start_timer(GPU_TAG_COMPOSITE); + + self.device.bind_draw_target(draw_target); + self.device.disable_depth_write(); + self.device.disable_depth(); + + // If using KHR_partial_update, call eglSetDamageRegion. + // This must be called exactly once per frame, and prior to any rendering to the main + // framebuffer. Additionally, on Mali-G77 we encountered rendering issues when calling + // this earlier in the frame, during offscreen render passes. So call it now, immediately + // before rendering to the main framebuffer. See bug 1685276 for details. + if let Some(partial_present) = self.compositor_config.partial_present() { + if let Some(PartialPresentMode::Single { dirty_rect }) = partial_present_mode { + partial_present.set_buffer_damage_region(&[dirty_rect.to_i32()]); + } + } + + let cap = composite_state.tiles.len(); + + let mut occlusion = occlusion::FrontToBackBuilder::with_capacity(cap, cap); + let mut clear_tiles = Vec::new(); + + for (idx, tile) in composite_state.tiles.iter().enumerate() { + // Clear tiles overwrite whatever is under them, so they are treated as opaque. + let is_opaque = tile.kind != TileKind::Alpha; + + let device_tile_box = composite_state.get_device_rect( + &tile.local_rect, + tile.transform_index + ); + + // Determine a clip rect to apply to this tile, depending on what + // the partial present mode is. + let partial_clip_rect = match partial_present_mode { + Some(PartialPresentMode::Single { dirty_rect }) => dirty_rect, + None => device_tile_box, + }; + + // Simple compositor needs the valid rect in device space to match clip rect + let device_valid_rect = composite_state + .get_device_rect(&tile.local_valid_rect, tile.transform_index); + + let rect = device_tile_box + .intersection_unchecked(&tile.device_clip_rect) + .intersection_unchecked(&partial_clip_rect) + .intersection_unchecked(&device_valid_rect); + + if rect.is_empty() { + continue; + } + + if tile.kind == TileKind::Clear { + // Clear tiles are specific to how we render the window buttons on + // Windows 8. They clobber what's under them so they can be treated as opaque, + // but require a different blend state so they will be rendered after the opaque + // tiles and before transparent ones. + clear_tiles.push(occlusion::Item { rectangle: rect, key: idx }); + continue; + } + + occlusion.add(&rect, is_opaque, idx); + } + + // Clear the framebuffer + let clear_color = Some(self.clear_color.to_array()); + + match partial_present_mode { + Some(PartialPresentMode::Single { dirty_rect }) => { + // There is no need to clear if the dirty rect is occluded. Additionally, + // on Mali-G77 we have observed artefacts when calling glClear (even with + // the empty scissor rect set) after calling eglSetDamageRegion with an + // empty damage region. So avoid clearing in that case. See bug 1709548. + if !dirty_rect.is_empty() && occlusion.test(&dirty_rect) { + // We have a single dirty rect, so clear only that + self.device.clear_target(clear_color, + None, + Some(draw_target.to_framebuffer_rect(dirty_rect.to_i32()))); + } + } + None => { + // Partial present is disabled, so clear the entire framebuffer + self.device.clear_target(clear_color, + None, + None); + } + } + + // We are only interested in tiles backed with actual cached pixels so we don't + // count clear tiles here. + let num_tiles = composite_state.tiles + .iter() + .filter(|tile| tile.kind != TileKind::Clear).count(); + self.profile.set(profiler::PICTURE_TILES, num_tiles); + + if !occlusion.opaque_items().is_empty() { + let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE); + self.set_blend(false, FramebufferKind::Main); + self.draw_tile_list( + occlusion.opaque_items().iter(), + &composite_state, + &composite_state.external_surfaces, + projection, + &mut results.stats, + ); + self.gpu_profiler.finish_sampler(opaque_sampler); + } + + if !clear_tiles.is_empty() { + let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT); + self.set_blend(true, FramebufferKind::Main); + self.device.set_blend_mode_premultiplied_dest_out(); + self.draw_tile_list( + clear_tiles.iter(), + &composite_state, + &composite_state.external_surfaces, + projection, + &mut results.stats, + ); + self.gpu_profiler.finish_sampler(transparent_sampler); + } + + // Draw alpha tiles + if !occlusion.alpha_items().is_empty() { + let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT); + self.set_blend(true, FramebufferKind::Main); + self.set_blend_mode_premultiplied_alpha(FramebufferKind::Main); + self.draw_tile_list( + occlusion.alpha_items().iter().rev(), + &composite_state, + &composite_state.external_surfaces, + projection, + &mut results.stats, + ); + self.gpu_profiler.finish_sampler(transparent_sampler); + } + } + + fn draw_color_target( + &mut self, + draw_target: DrawTarget, + target: &ColorRenderTarget, + clear_depth: Option<f32>, + render_tasks: &RenderTaskGraph, + projection: &default::Transform3D<f32>, + stats: &mut RendererStats, + ) { + profile_scope!("draw_color_target"); + + self.profile.inc(profiler::COLOR_PASSES); + let _gm = self.gpu_profiler.start_marker("color target"); + + // sanity check for the depth buffer + if let DrawTarget::Texture { with_depth, .. } = draw_target { + assert!(with_depth >= target.needs_depth()); + } + + let framebuffer_kind = if draw_target.is_default() { + FramebufferKind::Main + } else { + FramebufferKind::Other + }; + + { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET); + self.device.bind_draw_target(draw_target); + + if self.device.get_capabilities().supports_qcom_tiled_rendering { + let preserve_mask = match target.clear_color { + Some(_) => 0, + None => gl::COLOR_BUFFER_BIT0_QCOM, + }; + self.device.gl().start_tiling_qcom( + target.used_rect.min.x.max(0) as _, + target.used_rect.min.y.max(0) as _, + target.used_rect.width() as _, + target.used_rect.height() as _, + preserve_mask, + ); + } + + self.device.disable_depth(); + self.set_blend(false, framebuffer_kind); + + if clear_depth.is_some() { + self.device.enable_depth_write(); + } + + let clear_color = target + .clear_color + .map(|color| color.to_array()); + + let clear_rect = match draw_target { + DrawTarget::NativeSurface { .. } => { + unreachable!("bug: native compositor surface in child target"); + } + DrawTarget::Default { rect, total_size, .. } if rect.min == FramebufferIntPoint::zero() && rect.size() == total_size => { + // whole screen is covered, no need for scissor + None + } + DrawTarget::Default { rect, .. } => { + Some(rect) + } + DrawTarget::Texture { .. } if self.enable_clear_scissor => { + // TODO(gw): Applying a scissor rect and minimal clear here + // is a very large performance win on the Intel and nVidia + // GPUs that I have tested with. It's possible it may be a + // performance penalty on other GPU types - we should test this + // and consider different code paths. + // + // Note: The above measurements were taken when render + // target slices were minimum 2048x2048. Now that we size + // them adaptively, this may be less of a win (except perhaps + // on a mostly-unused last slice of a large texture array). + Some(draw_target.to_framebuffer_rect(target.used_rect)) + } + DrawTarget::Texture { .. } | DrawTarget::External { .. } => { + None + } + }; + + self.device.clear_target( + clear_color, + clear_depth, + clear_rect, + ); + + if clear_depth.is_some() { + self.device.disable_depth_write(); + } + } + + // Handle any resolves from parent pictures to this target + self.handle_resolves( + &target.resolve_ops, + render_tasks, + draw_target, + ); + + // Handle any blits from the texture cache to this target. + self.handle_blits( + &target.blits, + render_tasks, + draw_target, + ); + + // Draw any blurs for this target. + // Blurs are rendered as a standard 2-pass + // separable implementation. + // TODO(gw): In the future, consider having + // fast path blur shaders for common + // blur radii with fixed weights. + if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLUR); + + self.set_blend(false, framebuffer_kind); + self.shaders.borrow_mut().cs_blur_rgba8 + .bind(&mut self.device, projection, None, &mut self.renderer_errors, &mut self.profile); + + if !target.vertical_blurs.is_empty() { + self.draw_blurs( + &target.vertical_blurs, + stats, + ); + } + + if !target.horizontal_blurs.is_empty() { + self.draw_blurs( + &target.horizontal_blurs, + stats, + ); + } + } + + self.handle_scaling( + &target.scalings, + projection, + stats, + ); + + for (ref textures, ref filters) in &target.svg_filters { + self.handle_svg_filters( + textures, + filters, + projection, + stats, + ); + } + + for alpha_batch_container in &target.alpha_batch_containers { + self.draw_alpha_batch_container( + alpha_batch_container, + draw_target, + framebuffer_kind, + projection, + render_tasks, + stats, + ); + } + + self.handle_prims( + &draw_target, + &target.prim_instances, + &target.prim_instances_with_scissor, + projection, + stats, + ); + + for clip_masks in &target.clip_masks { + self.handle_clips( + &draw_target, + clip_masks, + projection, + stats, + ); + } + + if clear_depth.is_some() { + self.device.invalidate_depth_target(); + } + if self.device.get_capabilities().supports_qcom_tiled_rendering { + self.device.gl().end_tiling_qcom(gl::COLOR_BUFFER_BIT0_QCOM); + } + } + + fn draw_blurs( + &mut self, + blurs: &FastHashMap<TextureSource, Vec<BlurInstance>>, + stats: &mut RendererStats, + ) { + for (texture, blurs) in blurs { + let textures = BatchTextures::composite_rgb( + *texture, + ); + + self.draw_instanced_batch( + blurs, + VertexArrayKind::Blur, + &textures, + stats, + ); + } + } + + /// Draw all the instances in a clip batcher list to the current target. + fn draw_clip_batch_list( + &mut self, + list: &ClipBatchList, + draw_target: &DrawTarget, + projection: &default::Transform3D<f32>, + stats: &mut RendererStats, + ) { + if self.debug_flags.contains(DebugFlags::DISABLE_CLIP_MASKS) { + return; + } + + // draw rounded cornered rectangles + if !list.slow_rectangles.is_empty() { + let _gm2 = self.gpu_profiler.start_marker("slow clip rectangles"); + self.shaders.borrow_mut().cs_clip_rectangle_slow.bind( + &mut self.device, + projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + self.draw_instanced_batch( + &list.slow_rectangles, + VertexArrayKind::ClipRect, + &BatchTextures::empty(), + stats, + ); + } + if !list.fast_rectangles.is_empty() { + let _gm2 = self.gpu_profiler.start_marker("fast clip rectangles"); + self.shaders.borrow_mut().cs_clip_rectangle_fast.bind( + &mut self.device, + projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + self.draw_instanced_batch( + &list.fast_rectangles, + VertexArrayKind::ClipRect, + &BatchTextures::empty(), + stats, + ); + } + + // draw box-shadow clips + for (mask_texture_id, items) in list.box_shadows.iter() { + let _gm2 = self.gpu_profiler.start_marker("box-shadows"); + let textures = BatchTextures::composite_rgb(*mask_texture_id); + self.shaders.borrow_mut().cs_clip_box_shadow + .bind(&mut self.device, projection, None, &mut self.renderer_errors, &mut self.profile); + self.draw_instanced_batch( + items, + VertexArrayKind::ClipBoxShadow, + &textures, + stats, + ); + } + + // draw image masks + let mut using_scissor = false; + for ((mask_texture_id, clip_rect), items) in list.images.iter() { + let _gm2 = self.gpu_profiler.start_marker("clip images"); + // Some image masks may require scissoring to ensure they don't draw + // outside their task's target bounds. Axis-aligned primitives will + // be clamped inside the shader and should not require scissoring. + // TODO: We currently assume scissor state is off by default for + // alpha targets here, but in the future we may want to track the + // current scissor state so that this can be properly saved and + // restored here. + if let Some(clip_rect) = clip_rect { + if !using_scissor { + self.device.enable_scissor(); + using_scissor = true; + } + let scissor_rect = draw_target.build_scissor_rect(Some(*clip_rect)); + self.device.set_scissor_rect(scissor_rect); + } else if using_scissor { + self.device.disable_scissor(); + using_scissor = false; + } + let textures = BatchTextures::composite_rgb(*mask_texture_id); + self.shaders.borrow_mut().cs_clip_image + .bind(&mut self.device, projection, None, &mut self.renderer_errors, &mut self.profile); + self.draw_instanced_batch( + items, + VertexArrayKind::ClipImage, + &textures, + stats, + ); + } + if using_scissor { + self.device.disable_scissor(); + } + } + + fn draw_alpha_target( + &mut self, + draw_target: DrawTarget, + target: &AlphaRenderTarget, + projection: &default::Transform3D<f32>, + render_tasks: &RenderTaskGraph, + stats: &mut RendererStats, + ) { + profile_scope!("draw_alpha_target"); + + self.profile.inc(profiler::ALPHA_PASSES); + let _gm = self.gpu_profiler.start_marker("alpha target"); + let alpha_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_ALPHA); + + { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET); + self.device.bind_draw_target(draw_target); + self.device.disable_depth(); + self.device.disable_depth_write(); + self.set_blend(false, FramebufferKind::Other); + + let zero_color = [0.0, 0.0, 0.0, 0.0]; + let one_color = [1.0, 1.0, 1.0, 1.0]; + + // On some Adreno 4xx devices we have seen render tasks to alpha targets have no + // effect unless the target is fully cleared prior to rendering. See bug 1714227. + if self.device.get_capabilities().requires_alpha_target_full_clear { + self.device.clear_target( + Some(zero_color), + None, + None, + ); + } + + // On some Mali-T devices we have observed crashes in subsequent draw calls + // immediately after clearing the alpha render target regions with glClear(). + // Using the shader to clear the regions avoids the crash. See bug 1638593. + if self.clear_alpha_targets_with_quads + && !(target.zero_clears.is_empty() && target.one_clears.is_empty()) + { + let zeroes = target.zero_clears + .iter() + .map(|task_id| { + let rect = render_tasks[*task_id].get_target_rect().to_f32(); + ClearInstance { + rect: [ + rect.min.x, rect.min.y, + rect.max.x, rect.max.y, + ], + color: zero_color, + } + }); + + let ones = target.one_clears + .iter() + .map(|task_id| { + let rect = render_tasks[*task_id].get_target_rect().to_f32(); + ClearInstance { + rect: [ + rect.min.x, rect.min.y, + rect.max.x, rect.max.y, + ], + color: one_color, + } + }); + + let instances = zeroes.chain(ones).collect::<Vec<_>>(); + self.shaders.borrow_mut().ps_clear.bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + self.draw_instanced_batch( + &instances, + VertexArrayKind::Clear, + &BatchTextures::empty(), + stats, + ); + } else { + // TODO(gw): Applying a scissor rect and minimal clear here + // is a very large performance win on the Intel and nVidia + // GPUs that I have tested with. It's possible it may be a + // performance penalty on other GPU types - we should test this + // and consider different code paths. + for &task_id in &target.zero_clears { + let rect = render_tasks[task_id].get_target_rect(); + self.device.clear_target( + Some(zero_color), + None, + Some(draw_target.to_framebuffer_rect(rect)), + ); + } + + for &task_id in &target.one_clears { + let rect = render_tasks[task_id].get_target_rect(); + self.device.clear_target( + Some(one_color), + None, + Some(draw_target.to_framebuffer_rect(rect)), + ); + } + } + } + + // Draw any blurs for this target. + // Blurs are rendered as a standard 2-pass + // separable implementation. + // TODO(gw): In the future, consider having + // fast path blur shaders for common + // blur radii with fixed weights. + if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLUR); + + self.shaders.borrow_mut().cs_blur_a8 + .bind(&mut self.device, projection, None, &mut self.renderer_errors, &mut self.profile); + + if !target.vertical_blurs.is_empty() { + self.draw_blurs( + &target.vertical_blurs, + stats, + ); + } + + if !target.horizontal_blurs.is_empty() { + self.draw_blurs( + &target.horizontal_blurs, + stats, + ); + } + } + + self.handle_scaling( + &target.scalings, + projection, + stats, + ); + + // Draw the clip items into the tiled alpha mask. + { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_CLIP); + + // TODO(gw): Consider grouping multiple clip masks per shader + // invocation here to reduce memory bandwith further? + + // Draw the primary clip mask - since this is the first mask + // for the task, we can disable blending, knowing that it will + // overwrite every pixel in the mask area. + self.set_blend(false, FramebufferKind::Other); + self.draw_clip_batch_list( + &target.clip_batcher.primary_clips, + &draw_target, + projection, + stats, + ); + + // switch to multiplicative blending for secondary masks, using + // multiplicative blending to accumulate clips into the mask. + self.set_blend(true, FramebufferKind::Other); + self.set_blend_mode_multiply(FramebufferKind::Other); + self.draw_clip_batch_list( + &target.clip_batcher.secondary_clips, + &draw_target, + projection, + stats, + ); + + for clip_masks in &target.clip_masks { + self.handle_clips( + &draw_target, + clip_masks, + projection, + stats, + ); + } + } + + self.gpu_profiler.finish_sampler(alpha_sampler); + } + + fn draw_texture_cache_target( + &mut self, + texture: &CacheTextureId, + target: &TextureCacheRenderTarget, + render_tasks: &RenderTaskGraph, + stats: &mut RendererStats, + ) { + profile_scope!("draw_texture_cache_target"); + + self.device.disable_depth(); + self.device.disable_depth_write(); + + self.set_blend(false, FramebufferKind::Other); + + let texture = &self.texture_resolver.texture_cache_map[texture].texture; + let target_size = texture.get_dimensions(); + + let projection = Transform3D::ortho( + 0.0, + target_size.width as f32, + 0.0, + target_size.height as f32, + self.device.ortho_near_plane(), + self.device.ortho_far_plane(), + ); + + let draw_target = DrawTarget::from_texture( + texture, + false, + ); + self.device.bind_draw_target(draw_target); + + { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_CLEAR); + + self.device.disable_depth(); + self.device.disable_depth_write(); + self.set_blend(false, FramebufferKind::Other); + + let color = [0.0, 0.0, 0.0, 0.0]; + if self.clear_caches_with_quads && !target.clears.is_empty() { + let instances = target.clears + .iter() + .map(|r| ClearInstance { + rect: [ + r.min.x as f32, r.min.y as f32, + r.max.x as f32, r.max.y as f32, + ], + color, + }) + .collect::<Vec<_>>(); + self.shaders.borrow_mut().ps_clear.bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + self.draw_instanced_batch( + &instances, + VertexArrayKind::Clear, + &BatchTextures::empty(), + stats, + ); + } else { + for rect in &target.clears { + self.device.clear_target( + Some(color), + None, + Some(draw_target.to_framebuffer_rect(*rect)), + ); + } + } + + // Handle any blits to this texture from child tasks. + self.handle_blits( + &target.blits, + render_tasks, + draw_target, + ); + } + + // Draw any borders for this target. + if !target.border_segments_solid.is_empty() || + !target.border_segments_complex.is_empty() + { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_BORDER); + + self.set_blend(true, FramebufferKind::Other); + self.set_blend_mode_premultiplied_alpha(FramebufferKind::Other); + + if !target.border_segments_solid.is_empty() { + self.shaders.borrow_mut().cs_border_solid.bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + self.draw_instanced_batch( + &target.border_segments_solid, + VertexArrayKind::Border, + &BatchTextures::empty(), + stats, + ); + } + + if !target.border_segments_complex.is_empty() { + self.shaders.borrow_mut().cs_border_segment.bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + self.draw_instanced_batch( + &target.border_segments_complex, + VertexArrayKind::Border, + &BatchTextures::empty(), + stats, + ); + } + + self.set_blend(false, FramebufferKind::Other); + } + + // Draw any line decorations for this target. + if !target.line_decorations.is_empty() { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_LINE_DECORATION); + + self.set_blend(true, FramebufferKind::Other); + self.set_blend_mode_premultiplied_alpha(FramebufferKind::Other); + + self.shaders.borrow_mut().cs_line_decoration.bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + self.draw_instanced_batch( + &target.line_decorations, + VertexArrayKind::LineDecoration, + &BatchTextures::empty(), + stats, + ); + + self.set_blend(false, FramebufferKind::Other); + } + + // Draw any fast path linear gradients for this target. + if !target.fast_linear_gradients.is_empty() { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_FAST_LINEAR_GRADIENT); + + self.set_blend(false, FramebufferKind::Other); + + self.shaders.borrow_mut().cs_fast_linear_gradient.bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + self.draw_instanced_batch( + &target.fast_linear_gradients, + VertexArrayKind::FastLinearGradient, + &BatchTextures::empty(), + stats, + ); + } + + // Draw any linear gradients for this target. + if !target.linear_gradients.is_empty() { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_LINEAR_GRADIENT); + + self.set_blend(false, FramebufferKind::Other); + + self.shaders.borrow_mut().cs_linear_gradient.bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + if let Some(ref texture) = self.dither_matrix_texture { + self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default()); + } + + self.draw_instanced_batch( + &target.linear_gradients, + VertexArrayKind::LinearGradient, + &BatchTextures::empty(), + stats, + ); + } + + // Draw any radial gradients for this target. + if !target.radial_gradients.is_empty() { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_RADIAL_GRADIENT); + + self.set_blend(false, FramebufferKind::Other); + + self.shaders.borrow_mut().cs_radial_gradient.bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + if let Some(ref texture) = self.dither_matrix_texture { + self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default()); + } + + self.draw_instanced_batch( + &target.radial_gradients, + VertexArrayKind::RadialGradient, + &BatchTextures::empty(), + stats, + ); + } + + // Draw any conic gradients for this target. + if !target.conic_gradients.is_empty() { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_CONIC_GRADIENT); + + self.set_blend(false, FramebufferKind::Other); + + self.shaders.borrow_mut().cs_conic_gradient.bind( + &mut self.device, + &projection, + None, + &mut self.renderer_errors, + &mut self.profile, + ); + + if let Some(ref texture) = self.dither_matrix_texture { + self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default()); + } + + self.draw_instanced_batch( + &target.conic_gradients, + VertexArrayKind::ConicGradient, + &BatchTextures::empty(), + stats, + ); + } + + // Draw any blurs for this target. + if !target.horizontal_blurs.is_empty() { + let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLUR); + + { + let mut shaders = self.shaders.borrow_mut(); + match target.target_kind { + RenderTargetKind::Alpha => &mut shaders.cs_blur_a8, + RenderTargetKind::Color => &mut shaders.cs_blur_rgba8, + }.bind(&mut self.device, &projection, None, &mut self.renderer_errors, &mut self.profile); + } + + self.draw_blurs( + &target.horizontal_blurs, + stats, + ); + } + } + + fn update_deferred_resolves(&mut self, deferred_resolves: &[DeferredResolve]) -> Option<GpuCacheUpdateList> { + // The first thing we do is run through any pending deferred + // resolves, and use a callback to get the UV rect for this + // custom item. Then we patch the resource_rects structure + // here before it's uploaded to the GPU. + if deferred_resolves.is_empty() { + return None; + } + + let handler = self.external_image_handler + .as_mut() + .expect("Found external image, but no handler set!"); + + let mut list = GpuCacheUpdateList { + frame_id: FrameId::INVALID, + clear: false, + height: self.gpu_cache_texture.get_height(), + blocks: Vec::new(), + updates: Vec::new(), + debug_commands: Vec::new(), + }; + + for (i, deferred_resolve) in deferred_resolves.iter().enumerate() { + self.gpu_profiler.place_marker("deferred resolve"); + let props = &deferred_resolve.image_properties; + let ext_image = props + .external_image + .expect("BUG: Deferred resolves must be external images!"); + // Provide rendering information for NativeTexture external images. + let image = handler.lock(ext_image.id, ext_image.channel_index); + let texture_target = match ext_image.image_type { + ExternalImageType::TextureHandle(target) => target, + ExternalImageType::Buffer => { + panic!("not a suitable image type in update_deferred_resolves()"); + } + }; + + // In order to produce the handle, the external image handler may call into + // the GL context and change some states. + self.device.reset_state(); + + let texture = match image.source { + ExternalImageSource::NativeTexture(texture_id) => { + ExternalTexture::new( + texture_id, + texture_target, + image.uv, + deferred_resolve.rendering, + ) + } + ExternalImageSource::Invalid => { + warn!("Invalid ext-image"); + debug!( + "For ext_id:{:?}, channel:{}.", + ext_image.id, + ext_image.channel_index + ); + // Just use 0 as the gl handle for this failed case. + ExternalTexture::new( + 0, + texture_target, + image.uv, + deferred_resolve.rendering, + ) + } + ExternalImageSource::RawData(_) => { + panic!("Raw external data is not expected for deferred resolves!"); + } + }; + + self.texture_resolver + .external_images + .insert(DeferredResolveIndex(i as u32), texture); + + list.updates.push(GpuCacheUpdate::Copy { + block_index: list.blocks.len(), + block_count: BLOCKS_PER_UV_RECT, + address: deferred_resolve.address, + }); + list.blocks.push(image.uv.into()); + list.blocks.push([0f32; 4].into()); + } + + Some(list) + } + + fn unlock_external_images( + &mut self, + deferred_resolves: &[DeferredResolve], + ) { + if !self.texture_resolver.external_images.is_empty() { + let handler = self.external_image_handler + .as_mut() + .expect("Found external image, but no handler set!"); + + for (index, _) in self.texture_resolver.external_images.drain() { + let props = &deferred_resolves[index.0 as usize].image_properties; + let ext_image = props + .external_image + .expect("BUG: Deferred resolves must be external images!"); + handler.unlock(ext_image.id, ext_image.channel_index); + } + } + } + + /// Update the dirty rects based on current compositing mode and config + // TODO(gw): This can be tidied up significantly once the Draw compositor + // is implemented in terms of the compositor trait. + fn calculate_dirty_rects( + &mut self, + buffer_age: usize, + composite_state: &CompositeState, + draw_target_dimensions: DeviceIntSize, + results: &mut RenderResults, + ) -> Option<PartialPresentMode> { + let mut partial_present_mode = None; + + let (max_partial_present_rects, draw_previous_partial_present_regions) = match self.current_compositor_kind { + CompositorKind::Native { .. } => { + // Assume that we can return a single dirty rect for native + // compositor for now, and that there is no buffer-age functionality. + // These params can be exposed by the compositor capabilities struct + // as the Draw compositor is ported to use it. + (1, false) + } + CompositorKind::Draw { draw_previous_partial_present_regions, max_partial_present_rects } => { + (max_partial_present_rects, draw_previous_partial_present_regions) + } + }; + + if max_partial_present_rects > 0 { + let prev_frames_damage_rect = if let Some(..) = self.compositor_config.partial_present() { + self.buffer_damage_tracker + .get_damage_rect(buffer_age) + .or_else(|| Some(DeviceRect::from_size(draw_target_dimensions.to_f32()))) + } else { + None + }; + + let can_use_partial_present = + composite_state.dirty_rects_are_valid && + !self.force_redraw && + !(prev_frames_damage_rect.is_none() && draw_previous_partial_present_regions) && + !self.debug_overlay_state.is_enabled; + + if can_use_partial_present { + let mut combined_dirty_rect = DeviceRect::zero(); + let fb_rect = DeviceRect::from_size(draw_target_dimensions.to_f32()); + + // Work out how many dirty rects WR produced, and if that's more than + // what the device supports. + for tile in &composite_state.tiles { + if tile.kind == TileKind::Clear { + continue; + } + let dirty_rect = composite_state.get_device_rect( + &tile.local_dirty_rect, + tile.transform_index, + ); + + // In pathological cases where a tile is extremely zoomed, it + // may end up with device coords outside the range of an i32, + // so clamp it to the frame buffer rect here, before it gets + // casted to an i32 rect below. + if let Some(dirty_rect) = dirty_rect.intersection(&fb_rect) { + combined_dirty_rect = combined_dirty_rect.union(&dirty_rect); + } + } + + let combined_dirty_rect = combined_dirty_rect.round(); + let combined_dirty_rect_i32 = combined_dirty_rect.to_i32(); + // Return this frame's dirty region. If nothing has changed, don't return any dirty + // rects at all (the client can use this as a signal to skip present completely). + if !combined_dirty_rect.is_empty() { + results.dirty_rects.push(combined_dirty_rect_i32); + } + + // Track this frame's dirty region, for calculating subsequent frames' damage. + if draw_previous_partial_present_regions { + self.buffer_damage_tracker.push_dirty_rect(&combined_dirty_rect); + } + + // If the implementation requires manually keeping the buffer consistent, + // then we must combine this frame's dirty region with that of previous frames + // to determine the total_dirty_rect. The is used to determine what region we + // render to, and is what we send to the compositor as the buffer damage region + // (eg for KHR_partial_update). + let total_dirty_rect = if draw_previous_partial_present_regions { + combined_dirty_rect.union(&prev_frames_damage_rect.unwrap()) + } else { + combined_dirty_rect + }; + + partial_present_mode = Some(PartialPresentMode::Single { + dirty_rect: total_dirty_rect, + }); + } else { + // If we don't have a valid partial present scenario, return a single + // dirty rect to the client that covers the entire framebuffer. + let fb_rect = DeviceIntRect::from_size( + draw_target_dimensions, + ); + results.dirty_rects.push(fb_rect); + + if draw_previous_partial_present_regions { + self.buffer_damage_tracker.push_dirty_rect(&fb_rect.to_f32()); + } + } + + self.force_redraw = false; + } + + partial_present_mode + } + + fn bind_frame_data(&mut self, frame: &mut Frame) { + profile_scope!("bind_frame_data"); + + let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_DATA); + + self.vertex_data_textures[self.current_vertex_data_textures].update( + &mut self.device, + &mut self.texture_upload_pbo_pool, + frame, + ); + self.current_vertex_data_textures = + (self.current_vertex_data_textures + 1) % VERTEX_DATA_TEXTURE_COUNT; + } + + fn update_native_surfaces(&mut self) { + profile_scope!("update_native_surfaces"); + + match self.compositor_config { + CompositorConfig::Native { ref mut compositor, .. } => { + for op in self.pending_native_surface_updates.drain(..) { + match op.details { + NativeSurfaceOperationDetails::CreateSurface { id, virtual_offset, tile_size, is_opaque } => { + let _inserted = self.allocated_native_surfaces.insert(id); + debug_assert!(_inserted, "bug: creating existing surface"); + compositor.create_surface( + &mut self.device, + id, + virtual_offset, + tile_size, + is_opaque, + ); + } + NativeSurfaceOperationDetails::CreateExternalSurface { id, is_opaque } => { + let _inserted = self.allocated_native_surfaces.insert(id); + debug_assert!(_inserted, "bug: creating existing surface"); + compositor.create_external_surface( + &mut self.device, + id, + is_opaque, + ); + } + NativeSurfaceOperationDetails::CreateBackdropSurface { id, color } => { + let _inserted = self.allocated_native_surfaces.insert(id); + debug_assert!(_inserted, "bug: creating existing surface"); + compositor.create_backdrop_surface( + &mut self.device, + id, + color, + ); + } + NativeSurfaceOperationDetails::DestroySurface { id } => { + let _existed = self.allocated_native_surfaces.remove(&id); + debug_assert!(_existed, "bug: removing unknown surface"); + compositor.destroy_surface(&mut self.device, id); + } + NativeSurfaceOperationDetails::CreateTile { id } => { + compositor.create_tile(&mut self.device, id); + } + NativeSurfaceOperationDetails::DestroyTile { id } => { + compositor.destroy_tile(&mut self.device, id); + } + NativeSurfaceOperationDetails::AttachExternalImage { id, external_image } => { + compositor.attach_external_image(&mut self.device, id, external_image); + } + } + } + } + CompositorConfig::Draw { .. } => { + // Ensure nothing is added in simple composite mode, since otherwise + // memory will leak as this doesn't get drained + debug_assert!(self.pending_native_surface_updates.is_empty()); + } + } + } + + fn draw_frame( + &mut self, + frame: &mut Frame, + device_size: Option<DeviceIntSize>, + buffer_age: usize, + results: &mut RenderResults, + ) { + profile_scope!("draw_frame"); + + // These markers seem to crash a lot on Android, see bug 1559834 + #[cfg(not(target_os = "android"))] + let _gm = self.gpu_profiler.start_marker("draw frame"); + + if frame.passes.is_empty() { + frame.has_been_rendered = true; + return; + } + + self.device.disable_depth_write(); + self.set_blend(false, FramebufferKind::Other); + self.device.disable_stencil(); + + self.bind_frame_data(frame); + + // Upload experimental GPU buffer texture if there is any data present + // TODO: Recycle these textures, upload via PBO or best approach for platform + let gpu_buffer_texture = if frame.gpu_buffer.is_empty() { + None + } else { + let gpu_buffer_texture = self.device.create_texture( + ImageBufferKind::Texture2D, + ImageFormat::RGBAF32, + frame.gpu_buffer.size.width, + frame.gpu_buffer.size.height, + TextureFilter::Nearest, + None, + ); + + self.device.bind_texture( + TextureSampler::GpuBuffer, + &gpu_buffer_texture, + Swizzle::default(), + ); + + self.device.upload_texture_immediate( + &gpu_buffer_texture, + &frame.gpu_buffer.data, + ); + + Some(gpu_buffer_texture) + }; + + // Determine the present mode and dirty rects, if device_size + // is Some(..). If it's None, no composite will occur and only + // picture cache and texture cache targets will be updated. + // TODO(gw): Split Frame so that it's clearer when a composite + // is occurring. + let present_mode = device_size.and_then(|device_size| { + self.calculate_dirty_rects( + buffer_age, + &frame.composite_state, + device_size, + results, + ) + }); + + // If we have a native OS compositor, then make use of that interface to + // specify how to composite each of the picture cache surfaces. First, we + // need to find each tile that may be bound and updated later in the frame + // and invalidate it so that the native render compositor knows that these + // tiles can't be composited early. Next, after all such tiles have been + // invalidated, then we queue surfaces for native composition by the render + // compositor before we actually update the tiles. This allows the render + // compositor to start early composition while the tiles are updating. + if let CompositorKind::Native { .. } = self.current_compositor_kind { + let compositor = self.compositor_config.compositor().unwrap(); + // Invalidate any native surface tiles that might be updated by passes. + if !frame.has_been_rendered { + for tile in &frame.composite_state.tiles { + if tile.kind == TileKind::Clear { + continue; + } + if !tile.local_dirty_rect.is_empty() { + if let CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::Native { id, .. } } = tile.surface { + let valid_rect = frame.composite_state.get_surface_rect( + &tile.local_valid_rect, + &tile.local_rect, + tile.transform_index, + ).to_i32(); + + compositor.invalidate_tile(&mut self.device, id, valid_rect); + } + } + } + } + // Ensure any external surfaces that might be used during early composition + // are invalidated first so that the native compositor can properly schedule + // composition to happen only when the external surface is updated. + // See update_external_native_surfaces for more details. + for surface in &frame.composite_state.external_surfaces { + if let Some((native_surface_id, size)) = surface.update_params { + let surface_rect = size.into(); + compositor.invalidate_tile(&mut self.device, NativeTileId { surface_id: native_surface_id, x: 0, y: 0 }, surface_rect); + } + } + // Finally queue native surfaces for early composition, if applicable. By now, + // we have already invalidated any tiles that such surfaces may depend upon, so + // the native render compositor can keep track of when to actually schedule + // composition as surfaces are updated. + if device_size.is_some() { + frame.composite_state.composite_native( + self.clear_color, + &results.dirty_rects, + &mut self.device, + &mut **compositor, + ); + } + } + + for (_pass_index, pass) in frame.passes.iter_mut().enumerate() { + #[cfg(not(target_os = "android"))] + let _gm = self.gpu_profiler.start_marker(&format!("pass {}", _pass_index)); + + profile_scope!("offscreen target"); + + // If this frame has already been drawn, then any texture + // cache targets have already been updated and can be + // skipped this time. + if !frame.has_been_rendered { + for (&texture_id, target) in &pass.texture_cache { + self.draw_texture_cache_target( + &texture_id, + target, + &frame.render_tasks, + &mut results.stats, + ); + } + + if !pass.picture_cache.is_empty() { + self.profile.inc(profiler::COLOR_PASSES); + } + + // Draw picture caching tiles for this pass. + for picture_target in &pass.picture_cache { + results.stats.color_target_count += 1; + + let draw_target = match picture_target.surface { + ResolvedSurfaceTexture::TextureCache { ref texture } => { + let (texture, _) = self.texture_resolver + .resolve(texture) + .expect("bug"); + + DrawTarget::from_texture( + texture, + true, + ) + } + ResolvedSurfaceTexture::Native { id, size } => { + let surface_info = match self.current_compositor_kind { + CompositorKind::Native { .. } => { + let compositor = self.compositor_config.compositor().unwrap(); + compositor.bind( + &mut self.device, + id, + picture_target.dirty_rect, + picture_target.valid_rect, + ) + } + CompositorKind::Draw { .. } => { + unreachable!(); + } + }; + + DrawTarget::NativeSurface { + offset: surface_info.origin, + external_fbo_id: surface_info.fbo_id, + dimensions: size, + } + } + }; + + let projection = Transform3D::ortho( + 0.0, + draw_target.dimensions().width as f32, + 0.0, + draw_target.dimensions().height as f32, + self.device.ortho_near_plane(), + self.device.ortho_far_plane(), + ); + + self.draw_picture_cache_target( + picture_target, + draw_target, + &projection, + &frame.render_tasks, + &mut results.stats, + ); + + // Native OS surfaces must be unbound at the end of drawing to them + if let ResolvedSurfaceTexture::Native { .. } = picture_target.surface { + match self.current_compositor_kind { + CompositorKind::Native { .. } => { + let compositor = self.compositor_config.compositor().unwrap(); + compositor.unbind(&mut self.device); + } + CompositorKind::Draw { .. } => { + unreachable!(); + } + } + } + } + } + + for target in &pass.alpha.targets { + results.stats.alpha_target_count += 1; + + let texture_id = target.texture_id(); + + let alpha_tex = self.texture_resolver.get_cache_texture_mut(&texture_id); + + let draw_target = DrawTarget::from_texture( + alpha_tex, + false, + ); + + let projection = Transform3D::ortho( + 0.0, + draw_target.dimensions().width as f32, + 0.0, + draw_target.dimensions().height as f32, + self.device.ortho_near_plane(), + self.device.ortho_far_plane(), + ); + + self.draw_alpha_target( + draw_target, + target, + &projection, + &frame.render_tasks, + &mut results.stats, + ); + } + + let color_rt_info = RenderTargetInfo { has_depth: pass.color.needs_depth() }; + + for target in &pass.color.targets { + results.stats.color_target_count += 1; + + let texture_id = target.texture_id(); + + let color_tex = self.texture_resolver.get_cache_texture_mut(&texture_id); + + self.device.reuse_render_target::<u8>( + color_tex, + color_rt_info, + ); + + let draw_target = DrawTarget::from_texture( + color_tex, + target.needs_depth(), + ); + + let projection = Transform3D::ortho( + 0.0, + draw_target.dimensions().width as f32, + 0.0, + draw_target.dimensions().height as f32, + self.device.ortho_near_plane(), + self.device.ortho_far_plane(), + ); + + let clear_depth = if target.needs_depth() { + Some(1.0) + } else { + None + }; + + self.draw_color_target( + draw_target, + target, + clear_depth, + &frame.render_tasks, + &projection, + &mut results.stats, + ); + } + + // Only end the pass here and invalidate previous textures for + // off-screen targets. Deferring return of the inputs to the + // frame buffer until the implicit end_pass in end_frame allows + // debug draw overlays to be added without triggering a copy + // resolve stage in mobile / tiled GPUs. + self.texture_resolver.end_pass( + &mut self.device, + &pass.textures_to_invalidate, + ); + { + profile_scope!("gl.flush"); + self.device.gl().flush(); + } + } + + self.composite_frame( + frame, + device_size, + results, + present_mode, + ); + + if let Some(gpu_buffer_texture) = gpu_buffer_texture { + self.device.delete_texture(gpu_buffer_texture); + } + + frame.has_been_rendered = true; + } + + fn composite_frame( + &mut self, + frame: &mut Frame, + device_size: Option<DeviceIntSize>, + results: &mut RenderResults, + present_mode: Option<PartialPresentMode>, + ) { + profile_scope!("main target"); + + if let Some(device_size) = device_size { + results.stats.color_target_count += 1; + results.picture_cache_debug = mem::replace( + &mut frame.composite_state.picture_cache_debug, + PictureCacheDebugInfo::new(), + ); + + let size = frame.device_rect.size().to_f32(); + let surface_origin_is_top_left = self.device.surface_origin_is_top_left(); + let (bottom, top) = if surface_origin_is_top_left { + (0.0, size.height) + } else { + (size.height, 0.0) + }; + + let projection = Transform3D::ortho( + 0.0, + size.width, + bottom, + top, + self.device.ortho_near_plane(), + self.device.ortho_far_plane(), + ); + + let fb_scale = Scale::<_, _, FramebufferPixel>::new(1i32); + let mut fb_rect = frame.device_rect * fb_scale; + + if !surface_origin_is_top_left { + let h = fb_rect.height(); + fb_rect.min.y = device_size.height - fb_rect.max.y; + fb_rect.max.y = fb_rect.min.y + h; + } + + let draw_target = DrawTarget::Default { + rect: fb_rect, + total_size: device_size * fb_scale, + surface_origin_is_top_left, + }; + + // If we have a native OS compositor, then make use of that interface + // to specify how to composite each of the picture cache surfaces. + match self.current_compositor_kind { + CompositorKind::Native { .. } => { + // We have already queued surfaces for early native composition by this point. + // All that is left is to finally update any external native surfaces that were + // invalidated so that composition can complete. + self.update_external_native_surfaces( + &frame.composite_state.external_surfaces, + results, + ); + } + CompositorKind::Draw { .. } => { + self.composite_simple( + &frame.composite_state, + draw_target, + &projection, + results, + present_mode, + ); + } + } + } else { + // Rendering a frame without presenting it will confuse the partial + // present logic, so force a full present for the next frame. + self.force_redraw(); + } + } + + pub fn debug_renderer(&mut self) -> Option<&mut DebugRenderer> { + self.debug.get_mut(&mut self.device) + } + + pub fn get_debug_flags(&self) -> DebugFlags { + self.debug_flags + } + + pub fn set_debug_flags(&mut self, flags: DebugFlags) { + if let Some(enabled) = flag_changed(self.debug_flags, flags, DebugFlags::GPU_TIME_QUERIES) { + if enabled { + self.gpu_profiler.enable_timers(); + } else { + self.gpu_profiler.disable_timers(); + } + } + if let Some(enabled) = flag_changed(self.debug_flags, flags, DebugFlags::GPU_SAMPLE_QUERIES) { + if enabled { + self.gpu_profiler.enable_samplers(); + } else { + self.gpu_profiler.disable_samplers(); + } + } + + self.debug_flags = flags; + } + + pub fn set_profiler_ui(&mut self, ui_str: &str) { + self.profiler.set_ui(ui_str); + } + + fn draw_frame_debug_items(&mut self, items: &[DebugItem]) { + if items.is_empty() { + return; + } + + let debug_renderer = match self.debug.get_mut(&mut self.device) { + Some(render) => render, + None => return, + }; + + for item in items { + match item { + DebugItem::Rect { rect, outer_color, inner_color } => { + debug_renderer.add_quad( + rect.min.x, + rect.min.y, + rect.max.x, + rect.max.y, + (*inner_color).into(), + (*inner_color).into(), + ); + + debug_renderer.add_rect( + &rect.to_i32(), + (*outer_color).into(), + ); + } + DebugItem::Text { ref msg, position, color } => { + debug_renderer.add_text( + position.x, + position.y, + msg, + (*color).into(), + None, + ); + } + } + } + } + + fn draw_render_target_debug(&mut self, draw_target: &DrawTarget) { + if !self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) { + return; + } + + let debug_renderer = match self.debug.get_mut(&mut self.device) { + Some(render) => render, + None => return, + }; + + let textures = self.texture_resolver + .texture_cache_map + .values() + .filter(|item| item.category == TextureCacheCategory::RenderTarget) + .map(|item| &item.texture) + .collect::<Vec<&Texture>>(); + + Self::do_debug_blit( + &mut self.device, + debug_renderer, + textures, + draw_target, + 0, + &|_| [0.0, 1.0, 0.0, 1.0], // Use green for all RTs. + ); + } + + fn draw_zoom_debug( + &mut self, + device_size: DeviceIntSize, + ) { + if !self.debug_flags.contains(DebugFlags::ZOOM_DBG) { + return; + } + + let debug_renderer = match self.debug.get_mut(&mut self.device) { + Some(render) => render, + None => return, + }; + + let source_size = DeviceIntSize::new(64, 64); + let target_size = DeviceIntSize::new(1024, 1024); + + let source_origin = DeviceIntPoint::new( + (self.cursor_position.x - source_size.width / 2) + .min(device_size.width - source_size.width) + .max(0), + (self.cursor_position.y - source_size.height / 2) + .min(device_size.height - source_size.height) + .max(0), + ); + + let source_rect = DeviceIntRect::from_origin_and_size( + source_origin, + source_size, + ); + + let target_rect = DeviceIntRect::from_origin_and_size( + DeviceIntPoint::new( + device_size.width - target_size.width - 64, + device_size.height - target_size.height - 64, + ), + target_size, + ); + + let texture_rect = FramebufferIntRect::from_size( + source_rect.size().cast_unit(), + ); + + debug_renderer.add_rect( + &target_rect.inflate(1, 1), + debug_colors::RED.into(), + ); + + if self.zoom_debug_texture.is_none() { + let texture = self.device.create_texture( + ImageBufferKind::Texture2D, + ImageFormat::BGRA8, + source_rect.width(), + source_rect.height(), + TextureFilter::Nearest, + Some(RenderTargetInfo { has_depth: false }), + ); + + self.zoom_debug_texture = Some(texture); + } + + // Copy frame buffer into the zoom texture + let read_target = DrawTarget::new_default(device_size, self.device.surface_origin_is_top_left()); + self.device.blit_render_target( + read_target.into(), + read_target.to_framebuffer_rect(source_rect), + DrawTarget::from_texture( + self.zoom_debug_texture.as_ref().unwrap(), + false, + ), + texture_rect, + TextureFilter::Nearest, + ); + + // Draw the zoom texture back to the framebuffer + self.device.blit_render_target( + ReadTarget::from_texture( + self.zoom_debug_texture.as_ref().unwrap(), + ), + texture_rect, + read_target, + read_target.to_framebuffer_rect(target_rect), + TextureFilter::Nearest, + ); + } + + fn draw_texture_cache_debug(&mut self, draw_target: &DrawTarget) { + if !self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) { + return; + } + + let debug_renderer = match self.debug.get_mut(&mut self.device) { + Some(render) => render, + None => return, + }; + + let textures = self.texture_resolver + .texture_cache_map + .values() + .filter(|item| item.category == TextureCacheCategory::Atlas) + .map(|item| &item.texture) + .collect::<Vec<&Texture>>(); + + fn select_color(texture: &Texture) -> [f32; 4] { + if texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) { + [1.0, 0.5, 0.0, 1.0] // Orange for shared. + } else { + [1.0, 0.0, 1.0, 1.0] // Fuchsia for standalone. + } + } + + Self::do_debug_blit( + &mut self.device, + debug_renderer, + textures, + draw_target, + if self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) { 544 } else { 0 }, + &select_color, + ); + } + + fn do_debug_blit( + device: &mut Device, + debug_renderer: &mut DebugRenderer, + mut textures: Vec<&Texture>, + draw_target: &DrawTarget, + bottom: i32, + select_color: &dyn Fn(&Texture) -> [f32; 4], + ) { + let mut spacing = 16; + let mut size = 512; + + let device_size = draw_target.dimensions(); + let fb_width = device_size.width; + let fb_height = device_size.height; + let surface_origin_is_top_left = draw_target.surface_origin_is_top_left(); + + let num_textures = textures.len() as i32; + + if num_textures * (size + spacing) > fb_width { + let factor = fb_width as f32 / (num_textures * (size + spacing)) as f32; + size = (size as f32 * factor) as i32; + spacing = (spacing as f32 * factor) as i32; + } + + let text_height = 14; // Visually approximated. + let text_margin = 1; + let tag_height = text_height + text_margin * 2; + let tag_y = fb_height - (bottom + spacing + tag_height); + let image_y = tag_y - size; + + // Sort the display by size (in bytes), so that left-to-right is + // largest-to-smallest. + // + // Note that the vec here is in increasing order, because the elements + // get drawn right-to-left. + textures.sort_by_key(|t| t.size_in_bytes()); + + let mut i = 0; + for texture in textures.iter() { + let dimensions = texture.get_dimensions(); + let src_rect = FramebufferIntRect::from_size( + FramebufferIntSize::new(dimensions.width as i32, dimensions.height as i32), + ); + + let x = fb_width - (spacing + size) * (i as i32 + 1); + + // If we have more targets than fit on one row in screen, just early exit. + if x > fb_width { + return; + } + + // Draw the info tag. + let tag_rect = rect(x, tag_y, size, tag_height).to_box2d(); + let tag_color = select_color(texture); + device.clear_target( + Some(tag_color), + None, + Some(draw_target.to_framebuffer_rect(tag_rect)), + ); + + // Draw the dimensions onto the tag. + let dim = texture.get_dimensions(); + let text_rect = tag_rect.inflate(-text_margin, -text_margin); + debug_renderer.add_text( + text_rect.min.x as f32, + text_rect.max.y as f32, // Top-relative. + &format!("{}x{}", dim.width, dim.height), + ColorU::new(0, 0, 0, 255), + Some(tag_rect.to_f32()) + ); + + // Blit the contents of the texture. + let dest_rect = draw_target.to_framebuffer_rect(rect(x, image_y, size, size).to_box2d()); + let read_target = ReadTarget::from_texture(texture); + + if surface_origin_is_top_left { + device.blit_render_target( + read_target, + src_rect, + *draw_target, + dest_rect, + TextureFilter::Linear, + ); + } else { + // Invert y. + device.blit_render_target_invert_y( + read_target, + src_rect, + *draw_target, + dest_rect, + ); + } + i += 1; + } + } + + fn draw_epoch_debug(&mut self) { + if !self.debug_flags.contains(DebugFlags::EPOCHS) { + return; + } + + let debug_renderer = match self.debug.get_mut(&mut self.device) { + Some(render) => render, + None => return, + }; + + let dy = debug_renderer.line_height(); + let x0: f32 = 30.0; + let y0: f32 = 30.0; + let mut y = y0; + let mut text_width = 0.0; + for ((pipeline, document_id), epoch) in &self.pipeline_info.epochs { + y += dy; + let w = debug_renderer.add_text( + x0, y, + &format!("({:?}, {:?}): {:?}", pipeline, document_id, epoch), + ColorU::new(255, 255, 0, 255), + None, + ).size.width; + text_width = f32::max(text_width, w); + } + + let margin = 10.0; + debug_renderer.add_quad( + x0 - margin, + y0 - margin, + x0 + text_width + margin, + y + margin, + ColorU::new(25, 25, 25, 200), + ColorU::new(51, 51, 51, 200), + ); + } + + fn draw_window_visibility_debug(&mut self) { + if !self.debug_flags.contains(DebugFlags::WINDOW_VISIBILITY_DBG) { + return; + } + + let debug_renderer = match self.debug.get_mut(&mut self.device) { + Some(render) => render, + None => return, + }; + + let x: f32 = 30.0; + let y: f32 = 40.0; + + if let CompositorConfig::Native { ref mut compositor, .. } = self.compositor_config { + let visibility = compositor.get_window_visibility(&mut self.device); + let color = if visibility.is_fully_occluded { + ColorU::new(255, 0, 0, 255) + + } else { + ColorU::new(0, 0, 255, 255) + }; + + debug_renderer.add_text( + x, y, + &format!("{:?}", visibility), + color, + None, + ); + } + + + } + + fn draw_gpu_cache_debug(&mut self, device_size: DeviceIntSize) { + if !self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) { + return; + } + + let debug_renderer = match self.debug.get_mut(&mut self.device) { + Some(render) => render, + None => return, + }; + + let (x_off, y_off) = (30f32, 30f32); + let height = self.gpu_cache_texture.get_height() + .min(device_size.height - (y_off as i32) * 2) as usize; + debug_renderer.add_quad( + x_off, + y_off, + x_off + MAX_VERTEX_TEXTURE_WIDTH as f32, + y_off + height as f32, + ColorU::new(80, 80, 80, 80), + ColorU::new(80, 80, 80, 80), + ); + + let upper = self.gpu_cache_debug_chunks.len().min(height); + for chunk in self.gpu_cache_debug_chunks[0..upper].iter().flatten() { + let color = ColorU::new(250, 0, 0, 200); + debug_renderer.add_quad( + x_off + chunk.address.u as f32, + y_off + chunk.address.v as f32, + x_off + chunk.address.u as f32 + chunk.size as f32, + y_off + chunk.address.v as f32 + 1.0, + color, + color, + ); + } + } + + /// Pass-through to `Device::read_pixels_into`, used by Gecko's WR bindings. + pub fn read_pixels_into(&mut self, rect: FramebufferIntRect, format: ImageFormat, output: &mut [u8]) { + self.device.read_pixels_into(rect, format, output); + } + + pub fn read_pixels_rgba8(&mut self, rect: FramebufferIntRect) -> Vec<u8> { + let mut pixels = vec![0; (rect.area() * 4) as usize]; + self.device.read_pixels_into(rect, ImageFormat::RGBA8, &mut pixels); + pixels + } + + // De-initialize the Renderer safely, assuming the GL is still alive and active. + pub fn deinit(mut self) { + //Note: this is a fake frame, only needed because texture deletion is require to happen inside a frame + self.device.begin_frame(); + // If we are using a native compositor, ensure that any remaining native + // surfaces are freed. + if let CompositorConfig::Native { mut compositor, .. } = self.compositor_config { + for id in self.allocated_native_surfaces.drain() { + compositor.destroy_surface(&mut self.device, id); + } + // Destroy the debug overlay surface, if currently allocated. + if self.debug_overlay_state.current_size.is_some() { + compositor.destroy_surface(&mut self.device, NativeSurfaceId::DEBUG_OVERLAY); + } + compositor.deinit(&mut self.device); + } + self.gpu_cache_texture.deinit(&mut self.device); + if let Some(dither_matrix_texture) = self.dither_matrix_texture { + self.device.delete_texture(dither_matrix_texture); + } + if let Some(zoom_debug_texture) = self.zoom_debug_texture { + self.device.delete_texture(zoom_debug_texture); + } + for textures in self.vertex_data_textures.drain(..) { + textures.deinit(&mut self.device); + } + self.texture_upload_pbo_pool.deinit(&mut self.device); + self.staging_texture_pool.delete_textures(&mut self.device); + self.texture_resolver.deinit(&mut self.device); + self.vaos.deinit(&mut self.device); + self.debug.deinit(&mut self.device); + + if let Ok(shaders) = Rc::try_unwrap(self.shaders) { + shaders.into_inner().deinit(&mut self.device); + } + + if let Some(async_screenshots) = self.async_screenshots.take() { + async_screenshots.deinit(&mut self.device); + } + + if let Some(async_frame_recorder) = self.async_frame_recorder.take() { + async_frame_recorder.deinit(&mut self.device); + } + + #[cfg(feature = "capture")] + self.device.delete_fbo(self.read_fbo); + #[cfg(feature = "replay")] + for (_, ext) in self.owned_external_images { + self.device.delete_external_texture(ext); + } + self.device.end_frame(); + } + + fn size_of<T>(&self, ptr: *const T) -> usize { + let ops = self.size_of_ops.as_ref().unwrap(); + unsafe { ops.malloc_size_of(ptr) } + } + + /// Collects a memory report. + pub fn report_memory(&self, swgl: *mut c_void) -> MemoryReport { + let mut report = MemoryReport::default(); + + // GPU cache CPU memory. + self.gpu_cache_texture.report_memory_to(&mut report, self.size_of_ops.as_ref().unwrap()); + + self.staging_texture_pool.report_memory_to(&mut report, self.size_of_ops.as_ref().unwrap()); + + // Render task CPU memory. + for (_id, doc) in &self.active_documents { + report.render_tasks += self.size_of(doc.frame.render_tasks.tasks.as_ptr()); + report.render_tasks += self.size_of(doc.frame.render_tasks.task_data.as_ptr()); + } + + // Vertex data GPU memory. + for textures in &self.vertex_data_textures { + report.vertex_data_textures += textures.size_in_bytes(); + } + + // Texture cache and render target GPU memory. + report += self.texture_resolver.report_memory(); + + // Texture upload PBO memory. + report += self.texture_upload_pbo_pool.report_memory(); + + // Textures held internally within the device layer. + report += self.device.report_memory(self.size_of_ops.as_ref().unwrap(), swgl); + + report + } + + // Sets the blend mode. Blend is unconditionally set if the "show overdraw" debugging mode is + // enabled. + fn set_blend(&mut self, mut blend: bool, framebuffer_kind: FramebufferKind) { + if framebuffer_kind == FramebufferKind::Main && + self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) { + blend = true + } + self.device.set_blend(blend) + } + + fn set_blend_mode_multiply(&mut self, framebuffer_kind: FramebufferKind) { + if framebuffer_kind == FramebufferKind::Main && + self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) { + self.device.set_blend_mode_show_overdraw(); + } else { + self.device.set_blend_mode_multiply(); + } + } + + fn set_blend_mode_premultiplied_alpha(&mut self, framebuffer_kind: FramebufferKind) { + if framebuffer_kind == FramebufferKind::Main && + self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) { + self.device.set_blend_mode_show_overdraw(); + } else { + self.device.set_blend_mode_premultiplied_alpha(); + } + } + + /// Clears the texture with a given color. + fn clear_texture(&mut self, texture: &Texture, color: [f32; 4]) { + self.device.bind_draw_target(DrawTarget::from_texture( + &texture, + false, + )); + self.device.clear_target(Some(color), None, None); + } +} + +bitflags! { + /// Flags that control how shaders are pre-cached, if at all. + #[derive(Default, Debug, Copy, PartialEq, Eq, Clone, PartialOrd, Ord, Hash)] + pub struct ShaderPrecacheFlags: u32 { + /// Needed for const initialization + const EMPTY = 0; + + /// Only start async compile + const ASYNC_COMPILE = 1 << 2; + + /// Do a full compile/link during startup + const FULL_COMPILE = 1 << 3; + } +} + +/// The cumulative times spent in each painting phase to generate this frame. +#[derive(Debug, Default)] +pub struct FullFrameStats { + pub full_display_list: bool, + pub gecko_display_list_time: f64, + pub wr_display_list_time: f64, + pub scene_build_time: f64, + pub frame_build_time: f64, +} + +impl FullFrameStats { + pub fn merge(&self, other: &FullFrameStats) -> Self { + Self { + full_display_list: self.full_display_list || other.full_display_list, + gecko_display_list_time: self.gecko_display_list_time + other.gecko_display_list_time, + wr_display_list_time: self.wr_display_list_time + other.wr_display_list_time, + scene_build_time: self.scene_build_time + other.scene_build_time, + frame_build_time: self.frame_build_time + other.frame_build_time + } + } + + pub fn total(&self) -> f64 { + self.gecko_display_list_time + self.wr_display_list_time + self.scene_build_time + self.frame_build_time + } +} + +/// Some basic statistics about the rendered scene, used in Gecko, as +/// well as in wrench reftests to ensure that tests are batching and/or +/// allocating on render targets as we expect them to. +#[repr(C)] +#[derive(Debug, Default)] +pub struct RendererStats { + pub total_draw_calls: usize, + pub alpha_target_count: usize, + pub color_target_count: usize, + pub texture_upload_mb: f64, + pub resource_upload_time: f64, + pub gpu_cache_upload_time: f64, + pub gecko_display_list_time: f64, + pub wr_display_list_time: f64, + pub scene_build_time: f64, + pub frame_build_time: f64, + pub full_display_list: bool, + pub full_paint: bool, +} + +impl RendererStats { + pub fn merge(&mut self, stats: &FullFrameStats) { + self.gecko_display_list_time = stats.gecko_display_list_time; + self.wr_display_list_time = stats.wr_display_list_time; + self.scene_build_time = stats.scene_build_time; + self.frame_build_time = stats.frame_build_time; + self.full_display_list = stats.full_display_list; + self.full_paint = true; + } +} + +/// Return type from render(), which contains some repr(C) statistics as well as +/// some non-repr(C) data. +#[derive(Debug, Default)] +pub struct RenderResults { + /// Statistics about the frame that was rendered. + pub stats: RendererStats, + + /// A list of the device dirty rects that were updated + /// this frame. + /// TODO(gw): This is an initial interface, likely to change in future. + /// TODO(gw): The dirty rects here are currently only useful when scrolling + /// is not occurring. They are still correct in the case of + /// scrolling, but will be very large (until we expose proper + /// OS compositor support where the dirty rects apply to a + /// specific picture cache slice / OS compositor surface). + pub dirty_rects: Vec<DeviceIntRect>, + + /// Information about the state of picture cache tiles. This is only + /// allocated and stored if config.testing is true (such as wrench) + pub picture_cache_debug: PictureCacheDebugInfo, +} + +#[cfg(any(feature = "capture", feature = "replay"))] +#[cfg_attr(feature = "capture", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +struct PlainTexture { + data: String, + size: DeviceIntSize, + format: ImageFormat, + filter: TextureFilter, + has_depth: bool, + category: Option<TextureCacheCategory>, +} + + +#[cfg(any(feature = "capture", feature = "replay"))] +#[cfg_attr(feature = "capture", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +struct PlainRenderer { + device_size: Option<DeviceIntSize>, + gpu_cache: PlainTexture, + gpu_cache_frame_id: FrameId, + textures: FastHashMap<CacheTextureId, PlainTexture>, +} + +#[cfg(any(feature = "capture", feature = "replay"))] +#[cfg_attr(feature = "capture", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +struct PlainExternalResources { + images: Vec<ExternalCaptureImage> +} + +#[cfg(feature = "replay")] +enum CapturedExternalImageData { + NativeTexture(gl::GLuint), + Buffer(Arc<Vec<u8>>), +} + +#[cfg(feature = "replay")] +struct DummyExternalImageHandler { + data: FastHashMap<(ExternalImageId, u8), (CapturedExternalImageData, TexelRect)>, +} + +#[cfg(feature = "replay")] +impl ExternalImageHandler for DummyExternalImageHandler { + fn lock(&mut self, key: ExternalImageId, channel_index: u8) -> ExternalImage { + let (ref captured_data, ref uv) = self.data[&(key, channel_index)]; + ExternalImage { + uv: *uv, + source: match *captured_data { + CapturedExternalImageData::NativeTexture(tid) => ExternalImageSource::NativeTexture(tid), + CapturedExternalImageData::Buffer(ref arc) => ExternalImageSource::RawData(&*arc), + } + } + } + fn unlock(&mut self, _key: ExternalImageId, _channel_index: u8) {} +} + +#[derive(Default)] +pub struct PipelineInfo { + pub epochs: FastHashMap<(PipelineId, DocumentId), Epoch>, + pub removed_pipelines: Vec<(PipelineId, DocumentId)>, +} + +impl Renderer { + #[cfg(feature = "capture")] + fn save_texture( + texture: &Texture, category: Option<TextureCacheCategory>, name: &str, root: &PathBuf, device: &mut Device + ) -> PlainTexture { + use std::fs; + use std::io::Write; + + let short_path = format!("textures/{}.raw", name); + + let bytes_per_pixel = texture.get_format().bytes_per_pixel(); + let read_format = texture.get_format(); + let rect_size = texture.get_dimensions(); + + let mut file = fs::File::create(root.join(&short_path)) + .expect(&format!("Unable to create {}", short_path)); + let bytes_per_texture = (rect_size.width * rect_size.height * bytes_per_pixel) as usize; + let mut data = vec![0; bytes_per_texture]; + + //TODO: instead of reading from an FBO with `read_pixels*`, we could + // read from textures directly with `get_tex_image*`. + + let rect = device_size_as_framebuffer_size(rect_size).into(); + + device.attach_read_texture(texture); + #[cfg(feature = "png")] + { + let mut png_data; + let (data_ref, format) = match texture.get_format() { + ImageFormat::RGBAF32 => { + png_data = vec![0; (rect_size.width * rect_size.height * 4) as usize]; + device.read_pixels_into(rect, ImageFormat::RGBA8, &mut png_data); + (&png_data, ImageFormat::RGBA8) + } + fm => (&data, fm), + }; + CaptureConfig::save_png( + root.join(format!("textures/{}-{}.png", name, 0)), + rect_size, format, + None, + data_ref, + ); + } + device.read_pixels_into(rect, read_format, &mut data); + file.write_all(&data) + .unwrap(); + + PlainTexture { + data: short_path, + size: rect_size, + format: texture.get_format(), + filter: texture.get_filter(), + has_depth: texture.supports_depth(), + category, + } + } + + #[cfg(feature = "replay")] + fn load_texture( + target: ImageBufferKind, + plain: &PlainTexture, + rt_info: Option<RenderTargetInfo>, + root: &PathBuf, + device: &mut Device + ) -> (Texture, Vec<u8>) + { + use std::fs::File; + use std::io::Read; + + let mut texels = Vec::new(); + File::open(root.join(&plain.data)) + .expect(&format!("Unable to open texture at {}", plain.data)) + .read_to_end(&mut texels) + .unwrap(); + + let texture = device.create_texture( + target, + plain.format, + plain.size.width, + plain.size.height, + plain.filter, + rt_info, + ); + device.upload_texture_immediate(&texture, &texels); + + (texture, texels) + } + + #[cfg(feature = "capture")] + fn save_capture( + &mut self, + config: CaptureConfig, + deferred_images: Vec<ExternalCaptureImage>, + ) { + use std::fs; + use std::io::Write; + use api::ExternalImageData; + use crate::render_api::CaptureBits; + + let root = config.resource_root(); + + self.device.begin_frame(); + let _gm = self.gpu_profiler.start_marker("read GPU data"); + self.device.bind_read_target_impl(self.read_fbo, DeviceIntPoint::zero()); + + if config.bits.contains(CaptureBits::EXTERNAL_RESOURCES) && !deferred_images.is_empty() { + info!("saving external images"); + let mut arc_map = FastHashMap::<*const u8, String>::default(); + let mut tex_map = FastHashMap::<u32, String>::default(); + let handler = self.external_image_handler + .as_mut() + .expect("Unable to lock the external image handler!"); + for def in &deferred_images { + info!("\t{}", def.short_path); + let ExternalImageData { id, channel_index, image_type } = def.external; + // The image rendering parameter is irrelevant because no filtering happens during capturing. + let ext_image = handler.lock(id, channel_index); + let (data, short_path) = match ext_image.source { + ExternalImageSource::RawData(data) => { + let arc_id = arc_map.len() + 1; + match arc_map.entry(data.as_ptr()) { + Entry::Occupied(e) => { + (None, e.get().clone()) + } + Entry::Vacant(e) => { + let short_path = format!("externals/d{}.raw", arc_id); + (Some(data.to_vec()), e.insert(short_path).clone()) + } + } + } + ExternalImageSource::NativeTexture(gl_id) => { + let tex_id = tex_map.len() + 1; + match tex_map.entry(gl_id) { + Entry::Occupied(e) => { + (None, e.get().clone()) + } + Entry::Vacant(e) => { + let target = match image_type { + ExternalImageType::TextureHandle(target) => target, + ExternalImageType::Buffer => unreachable!(), + }; + info!("\t\tnative texture of target {:?}", target); + self.device.attach_read_texture_external(gl_id, target); + let data = self.device.read_pixels(&def.descriptor); + let short_path = format!("externals/t{}.raw", tex_id); + (Some(data), e.insert(short_path).clone()) + } + } + } + ExternalImageSource::Invalid => { + info!("\t\tinvalid source!"); + (None, String::new()) + } + }; + if let Some(bytes) = data { + fs::File::create(root.join(&short_path)) + .expect(&format!("Unable to create {}", short_path)) + .write_all(&bytes) + .unwrap(); + #[cfg(feature = "png")] + CaptureConfig::save_png( + root.join(&short_path).with_extension("png"), + def.descriptor.size, + def.descriptor.format, + def.descriptor.stride, + &bytes, + ); + } + let plain = PlainExternalImage { + data: short_path, + external: def.external, + uv: ext_image.uv, + }; + config.serialize_for_resource(&plain, &def.short_path); + } + for def in &deferred_images { + handler.unlock(def.external.id, def.external.channel_index); + } + let plain_external = PlainExternalResources { + images: deferred_images, + }; + config.serialize_for_resource(&plain_external, "external_resources"); + } + + if config.bits.contains(CaptureBits::FRAME) { + let path_textures = root.join("textures"); + if !path_textures.is_dir() { + fs::create_dir(&path_textures).unwrap(); + } + + info!("saving GPU cache"); + self.update_gpu_cache(); // flush pending updates + let mut plain_self = PlainRenderer { + device_size: self.device_size, + gpu_cache: Self::save_texture( + self.gpu_cache_texture.get_texture(), + None, "gpu", &root, &mut self.device, + ), + gpu_cache_frame_id: self.gpu_cache_frame_id, + textures: FastHashMap::default(), + }; + + info!("saving cached textures"); + for (id, item) in &self.texture_resolver.texture_cache_map { + let file_name = format!("cache-{}", plain_self.textures.len() + 1); + info!("\t{}", file_name); + let plain = Self::save_texture(&item.texture, Some(item.category), &file_name, &root, &mut self.device); + plain_self.textures.insert(*id, plain); + } + + config.serialize_for_resource(&plain_self, "renderer"); + } + + self.device.reset_read_target(); + self.device.end_frame(); + + let mut stats_file = fs::File::create(config.root.join("profiler-stats.txt")) + .expect(&format!("Unable to create profiler-stats.txt")); + if self.debug_flags.intersects(DebugFlags::PROFILER_DBG | DebugFlags::PROFILER_CAPTURE) { + self.profiler.dump_stats(&mut stats_file).unwrap(); + } else { + writeln!(stats_file, "Turn on PROFILER_DBG or PROFILER_CAPTURE to get stats here!").unwrap(); + } + + info!("done."); + } + + #[cfg(feature = "replay")] + fn load_capture( + &mut self, + config: CaptureConfig, + plain_externals: Vec<PlainExternalImage>, + ) { + use std::{fs::File, io::Read}; + + info!("loading external buffer-backed images"); + assert!(self.texture_resolver.external_images.is_empty()); + let mut raw_map = FastHashMap::<String, Arc<Vec<u8>>>::default(); + let mut image_handler = DummyExternalImageHandler { + data: FastHashMap::default(), + }; + + let root = config.resource_root(); + + // Note: this is a `SCENE` level population of the external image handlers + // It would put both external buffers and texture into the map. + // But latter are going to be overwritten later in this function + // if we are in the `FRAME` level. + for plain_ext in plain_externals { + let data = match raw_map.entry(plain_ext.data) { + Entry::Occupied(e) => e.get().clone(), + Entry::Vacant(e) => { + let mut buffer = Vec::new(); + File::open(root.join(e.key())) + .expect(&format!("Unable to open {}", e.key())) + .read_to_end(&mut buffer) + .unwrap(); + e.insert(Arc::new(buffer)).clone() + } + }; + let ext = plain_ext.external; + let value = (CapturedExternalImageData::Buffer(data), plain_ext.uv); + image_handler.data.insert((ext.id, ext.channel_index), value); + } + + if let Some(external_resources) = config.deserialize_for_resource::<PlainExternalResources, _>("external_resources") { + info!("loading external texture-backed images"); + let mut native_map = FastHashMap::<String, gl::GLuint>::default(); + for ExternalCaptureImage { short_path, external, descriptor } in external_resources.images { + let target = match external.image_type { + ExternalImageType::TextureHandle(target) => target, + ExternalImageType::Buffer => continue, + }; + let plain_ext = config.deserialize_for_resource::<PlainExternalImage, _>(&short_path) + .expect(&format!("Unable to read {}.ron", short_path)); + let key = (external.id, external.channel_index); + + let tid = match native_map.entry(plain_ext.data) { + Entry::Occupied(e) => e.get().clone(), + Entry::Vacant(e) => { + let plain_tex = PlainTexture { + data: e.key().clone(), + size: descriptor.size, + format: descriptor.format, + filter: TextureFilter::Linear, + has_depth: false, + category: None, + }; + let t = Self::load_texture( + target, + &plain_tex, + None, + &root, + &mut self.device + ); + let extex = t.0.into_external(); + self.owned_external_images.insert(key, extex.clone()); + e.insert(extex.internal_id()).clone() + } + }; + + let value = (CapturedExternalImageData::NativeTexture(tid), plain_ext.uv); + image_handler.data.insert(key, value); + } + } + + self.device.begin_frame(); + self.gpu_cache_texture.remove_texture(&mut self.device); + + if let Some(renderer) = config.deserialize_for_resource::<PlainRenderer, _>("renderer") { + info!("loading cached textures"); + self.device_size = renderer.device_size; + + for (_id, item) in self.texture_resolver.texture_cache_map.drain() { + self.device.delete_texture(item.texture); + } + for (id, texture) in renderer.textures { + info!("\t{}", texture.data); + let target = ImageBufferKind::Texture2D; + let t = Self::load_texture( + target, + &texture, + Some(RenderTargetInfo { has_depth: texture.has_depth }), + &root, + &mut self.device + ); + self.texture_resolver.texture_cache_map.insert(id, CacheTexture { + texture: t.0, + category: texture.category.unwrap_or(TextureCacheCategory::Standalone), + }); + } + + info!("loading gpu cache"); + let (t, gpu_cache_data) = Self::load_texture( + ImageBufferKind::Texture2D, + &renderer.gpu_cache, + Some(RenderTargetInfo { has_depth: false }), + &root, + &mut self.device, + ); + self.gpu_cache_texture.load_from_data(t, gpu_cache_data); + self.gpu_cache_frame_id = renderer.gpu_cache_frame_id; + } else { + info!("loading cached textures"); + self.device.begin_frame(); + for (_id, item) in self.texture_resolver.texture_cache_map.drain() { + self.device.delete_texture(item.texture); + } + } + self.device.end_frame(); + + self.external_image_handler = Some(Box::new(image_handler) as Box<_>); + info!("done."); + } +} + +#[derive(Clone, Copy, PartialEq)] +enum FramebufferKind { + Main, + Other, +} + +fn should_skip_batch(kind: &BatchKind, flags: DebugFlags) -> bool { + match kind { + BatchKind::TextRun(_) => { + flags.contains(DebugFlags::DISABLE_TEXT_PRIMS) + } + BatchKind::Brush(BrushBatchKind::LinearGradient) => { + flags.contains(DebugFlags::DISABLE_GRADIENT_PRIMS) + } + _ => false, + } +} + +impl CompositeState { + /// Use the client provided native compositor interface to add all picture + /// cache tiles to the OS compositor + fn composite_native( + &self, + clear_color: ColorF, + dirty_rects: &[DeviceIntRect], + device: &mut Device, + compositor: &mut dyn Compositor, + ) { + // Add each surface to the visual tree. z-order is implicit based on + // order added. Offset and clip rect apply to all tiles within this + // surface. + for surface in &self.descriptor.surfaces { + compositor.add_surface( + device, + surface.surface_id.expect("bug: no native surface allocated"), + surface.transform, + surface.clip_rect.to_i32(), + surface.image_rendering, + ); + } + compositor.start_compositing(device, clear_color, dirty_rects, &[]); + } +} + +mod tests { + #[test] + fn test_buffer_damage_tracker() { + use super::BufferDamageTracker; + use api::units::{DevicePoint, DeviceRect, DeviceSize}; + + let mut tracker = BufferDamageTracker::default(); + assert_eq!(tracker.get_damage_rect(0), None); + assert_eq!(tracker.get_damage_rect(1), Some(DeviceRect::zero())); + assert_eq!(tracker.get_damage_rect(2), Some(DeviceRect::zero())); + assert_eq!(tracker.get_damage_rect(3), Some(DeviceRect::zero())); + assert_eq!(tracker.get_damage_rect(4), None); + + let damage1 = DeviceRect::from_origin_and_size(DevicePoint::new(10.0, 10.0), DeviceSize::new(10.0, 10.0)); + let damage2 = DeviceRect::from_origin_and_size(DevicePoint::new(20.0, 20.0), DeviceSize::new(10.0, 10.0)); + let combined = damage1.union(&damage2); + + tracker.push_dirty_rect(&damage1); + assert_eq!(tracker.get_damage_rect(0), None); + assert_eq!(tracker.get_damage_rect(1), Some(DeviceRect::zero())); + assert_eq!(tracker.get_damage_rect(2), Some(damage1)); + assert_eq!(tracker.get_damage_rect(3), Some(damage1)); + assert_eq!(tracker.get_damage_rect(4), None); + + tracker.push_dirty_rect(&damage2); + assert_eq!(tracker.get_damage_rect(0), None); + assert_eq!(tracker.get_damage_rect(1), Some(DeviceRect::zero())); + assert_eq!(tracker.get_damage_rect(2), Some(damage2)); + assert_eq!(tracker.get_damage_rect(3), Some(combined)); + assert_eq!(tracker.get_damage_rect(4), None); + } +} diff --git a/gfx/wr/webrender/src/renderer/shade.rs b/gfx/wr/webrender/src/renderer/shade.rs new file mode 100644 index 0000000000..30baee0a19 --- /dev/null +++ b/gfx/wr/webrender/src/renderer/shade.rs @@ -0,0 +1,1507 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use api::{ImageBufferKind, units::DeviceSize}; +use crate::batch::{BatchKey, BatchKind, BrushBatchKind, BatchFeatures}; +use crate::composite::{CompositeFeatures, CompositeSurfaceFormat}; +use crate::device::{Device, Program, ShaderError}; +use euclid::default::Transform3D; +use glyph_rasterizer::GlyphFormat; +use crate::renderer::{ + desc, + BlendMode, DebugFlags, RendererError, WebRenderOptions, + TextureSampler, VertexArrayKind, ShaderPrecacheFlags, +}; +use crate::profiler::{self, TransactionProfile, ns_to_ms}; + +use gleam::gl::GlType; +use time::precise_time_ns; + +use std::cell::RefCell; +use std::rc::Rc; + +use webrender_build::shader::{ShaderFeatures, ShaderFeatureFlags, get_shader_features}; + +/// Which extension version to use for texture external support. +#[derive(Clone, Copy, Debug, PartialEq)] +enum TextureExternalVersion { + // GL_OES_EGL_image_external_essl3 (Compatible with ESSL 3.0 and + // later shaders, but not supported on all GLES 3 devices.) + ESSL3, + // GL_OES_EGL_image_external (Compatible with ESSL 1.0 shaders) + ESSL1, +} + +fn get_feature_string(kind: ImageBufferKind, texture_external_version: TextureExternalVersion) -> &'static str { + match (kind, texture_external_version) { + (ImageBufferKind::Texture2D, _) => "TEXTURE_2D", + (ImageBufferKind::TextureRect, _) => "TEXTURE_RECT", + (ImageBufferKind::TextureExternal, TextureExternalVersion::ESSL3) => "TEXTURE_EXTERNAL", + (ImageBufferKind::TextureExternal, TextureExternalVersion::ESSL1) => "TEXTURE_EXTERNAL_ESSL1", + (ImageBufferKind::TextureExternalBT709, _) => "TEXTURE_EXTERNAL_BT709", + } +} + +fn has_platform_support(kind: ImageBufferKind, device: &Device) -> bool { + match (kind, device.gl().get_type()) { + (ImageBufferKind::Texture2D, _) => true, + (ImageBufferKind::TextureRect, GlType::Gles) => false, + (ImageBufferKind::TextureRect, GlType::Gl) => true, + (ImageBufferKind::TextureExternal, GlType::Gles) => true, + (ImageBufferKind::TextureExternal, GlType::Gl) => false, + (ImageBufferKind::TextureExternalBT709, GlType::Gles) => device.supports_extension("GL_EXT_YUV_target"), + (ImageBufferKind::TextureExternalBT709, GlType::Gl) => false, + } +} + +pub const IMAGE_BUFFER_KINDS: [ImageBufferKind; 4] = [ + ImageBufferKind::Texture2D, + ImageBufferKind::TextureRect, + ImageBufferKind::TextureExternal, + ImageBufferKind::TextureExternalBT709, +]; + +const ADVANCED_BLEND_FEATURE: &str = "ADVANCED_BLEND"; +const ALPHA_FEATURE: &str = "ALPHA_PASS"; +const DEBUG_OVERDRAW_FEATURE: &str = "DEBUG_OVERDRAW"; +const DITHERING_FEATURE: &str = "DITHERING"; +const DUAL_SOURCE_FEATURE: &str = "DUAL_SOURCE_BLENDING"; +const FAST_PATH_FEATURE: &str = "FAST_PATH"; + +pub(crate) enum ShaderKind { + Primitive, + Cache(VertexArrayKind), + ClipCache(VertexArrayKind), + Brush, + Text, + #[allow(dead_code)] + VectorStencil, + #[allow(dead_code)] + VectorCover, + #[allow(dead_code)] + Resolve, + Composite, + Clear, + Copy, +} + +pub struct LazilyCompiledShader { + program: Option<Program>, + name: &'static str, + kind: ShaderKind, + cached_projection: Transform3D<f32>, + features: Vec<&'static str>, +} + +impl LazilyCompiledShader { + pub(crate) fn new( + kind: ShaderKind, + name: &'static str, + unsorted_features: &[&'static str], + device: &mut Device, + precache_flags: ShaderPrecacheFlags, + shader_list: &ShaderFeatures, + profile: &mut TransactionProfile, + ) -> Result<Self, ShaderError> { + + let mut features = unsorted_features.to_vec(); + features.sort(); + + // Ensure this shader config is in the available shader list so that we get + // alerted if the list gets out-of-date when shaders or features are added. + let config = features.join(","); + assert!( + shader_list.get(name).map_or(false, |f| f.contains(&config)), + "shader \"{}\" with features \"{}\" not in available shader list", + name, + config, + ); + + let mut shader = LazilyCompiledShader { + program: None, + name, + kind, + //Note: this isn't really the default state, but there is no chance + // an actual projection passed here would accidentally match. + cached_projection: Transform3D::identity(), + features, + }; + + if precache_flags.intersects(ShaderPrecacheFlags::ASYNC_COMPILE | ShaderPrecacheFlags::FULL_COMPILE) { + let t0 = precise_time_ns(); + shader.get_internal(device, precache_flags, profile)?; + let t1 = precise_time_ns(); + debug!("[C: {:.1} ms ] Precache {} {:?}", + (t1 - t0) as f64 / 1000000.0, + name, + unsorted_features + ); + } + + Ok(shader) + } + + pub fn bind( + &mut self, + device: &mut Device, + projection: &Transform3D<f32>, + texture_size: Option<DeviceSize>, + renderer_errors: &mut Vec<RendererError>, + profile: &mut TransactionProfile, + ) { + let update_projection = self.cached_projection != *projection; + let program = match self.get_internal(device, ShaderPrecacheFlags::FULL_COMPILE, profile) { + Ok(program) => program, + Err(e) => { + renderer_errors.push(RendererError::from(e)); + return; + } + }; + device.bind_program(program); + if let Some(texture_size) = texture_size { + device.set_shader_texture_size(program, texture_size); + } + if update_projection { + device.set_uniforms(program, projection); + // thanks NLL for this (`program` technically borrows `self`) + self.cached_projection = *projection; + } + } + + fn get_internal( + &mut self, + device: &mut Device, + precache_flags: ShaderPrecacheFlags, + profile: &mut TransactionProfile, + ) -> Result<&mut Program, ShaderError> { + if self.program.is_none() { + let start_time = precise_time_ns(); + let program = match self.kind { + ShaderKind::Primitive | ShaderKind::Brush | ShaderKind::Text | ShaderKind::Resolve | ShaderKind::Clear | ShaderKind::Copy => { + create_prim_shader( + self.name, + device, + &self.features, + ) + } + ShaderKind::Cache(..) => { + create_prim_shader( + self.name, + device, + &self.features, + ) + } + ShaderKind::VectorStencil => { + create_prim_shader( + self.name, + device, + &self.features, + ) + } + ShaderKind::VectorCover => { + create_prim_shader( + self.name, + device, + &self.features, + ) + } + ShaderKind::Composite => { + create_prim_shader( + self.name, + device, + &self.features, + ) + } + ShaderKind::ClipCache(..) => { + create_clip_shader( + self.name, + device, + &self.features, + ) + } + }; + self.program = Some(program?); + + let end_time = precise_time_ns(); + profile.add(profiler::SHADER_BUILD_TIME, ns_to_ms(end_time - start_time)); + } + + let program = self.program.as_mut().unwrap(); + + if precache_flags.contains(ShaderPrecacheFlags::FULL_COMPILE) && !program.is_initialized() { + let start_time = precise_time_ns(); + + let vertex_format = match self.kind { + ShaderKind::Primitive | + ShaderKind::Brush | + ShaderKind::Text => VertexArrayKind::Primitive, + ShaderKind::Cache(format) => format, + ShaderKind::VectorStencil => VertexArrayKind::VectorStencil, + ShaderKind::VectorCover => VertexArrayKind::VectorCover, + ShaderKind::ClipCache(format) => format, + ShaderKind::Resolve => VertexArrayKind::Resolve, + ShaderKind::Composite => VertexArrayKind::Composite, + ShaderKind::Clear => VertexArrayKind::Clear, + ShaderKind::Copy => VertexArrayKind::Copy, + }; + + let vertex_descriptor = match vertex_format { + VertexArrayKind::Primitive => &desc::PRIM_INSTANCES, + VertexArrayKind::LineDecoration => &desc::LINE, + VertexArrayKind::FastLinearGradient => &desc::FAST_LINEAR_GRADIENT, + VertexArrayKind::LinearGradient => &desc::LINEAR_GRADIENT, + VertexArrayKind::RadialGradient => &desc::RADIAL_GRADIENT, + VertexArrayKind::ConicGradient => &desc::CONIC_GRADIENT, + VertexArrayKind::Blur => &desc::BLUR, + VertexArrayKind::ClipImage => &desc::CLIP_IMAGE, + VertexArrayKind::ClipRect => &desc::CLIP_RECT, + VertexArrayKind::ClipBoxShadow => &desc::CLIP_BOX_SHADOW, + VertexArrayKind::VectorStencil => &desc::VECTOR_STENCIL, + VertexArrayKind::VectorCover => &desc::VECTOR_COVER, + VertexArrayKind::Border => &desc::BORDER, + VertexArrayKind::Scale => &desc::SCALE, + VertexArrayKind::Resolve => &desc::RESOLVE, + VertexArrayKind::SvgFilter => &desc::SVG_FILTER, + VertexArrayKind::Composite => &desc::COMPOSITE, + VertexArrayKind::Clear => &desc::CLEAR, + VertexArrayKind::Copy => &desc::COPY, + VertexArrayKind::Mask => &desc::MASK, + }; + + device.link_program(program, vertex_descriptor)?; + device.bind_program(program); + match self.kind { + ShaderKind::ClipCache(..) => { + device.bind_shader_samplers( + &program, + &[ + ("sColor0", TextureSampler::Color0), + ("sTransformPalette", TextureSampler::TransformPalette), + ("sRenderTasks", TextureSampler::RenderTasks), + ("sGpuCache", TextureSampler::GpuCache), + ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF), + ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI), + ("sGpuBuffer", TextureSampler::GpuBuffer), + ], + ); + } + _ => { + device.bind_shader_samplers( + &program, + &[ + ("sColor0", TextureSampler::Color0), + ("sColor1", TextureSampler::Color1), + ("sColor2", TextureSampler::Color2), + ("sDither", TextureSampler::Dither), + ("sTransformPalette", TextureSampler::TransformPalette), + ("sRenderTasks", TextureSampler::RenderTasks), + ("sGpuCache", TextureSampler::GpuCache), + ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF), + ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI), + ("sClipMask", TextureSampler::ClipMask), + ("sGpuBuffer", TextureSampler::GpuBuffer), + ], + ); + } + } + + let end_time = precise_time_ns(); + profile.add(profiler::SHADER_BUILD_TIME, ns_to_ms(end_time - start_time)); + } + + Ok(program) + } + + fn deinit(self, device: &mut Device) { + if let Some(program) = self.program { + device.delete_program(program); + } + } +} + +// A brush shader supports two modes: +// opaque: +// Used for completely opaque primitives, +// or inside segments of partially +// opaque primitives. Assumes no need +// for clip masks, AA etc. +// alpha: +// Used for brush primitives in the alpha +// pass. Assumes that AA should be applied +// along the primitive edge, and also that +// clip mask is present. +struct BrushShader { + opaque: LazilyCompiledShader, + alpha: LazilyCompiledShader, + advanced_blend: Option<LazilyCompiledShader>, + dual_source: Option<LazilyCompiledShader>, + debug_overdraw: LazilyCompiledShader, +} + +impl BrushShader { + fn new( + name: &'static str, + device: &mut Device, + features: &[&'static str], + precache_flags: ShaderPrecacheFlags, + shader_list: &ShaderFeatures, + use_advanced_blend: bool, + use_dual_source: bool, + profile: &mut TransactionProfile, + ) -> Result<Self, ShaderError> { + let opaque_features = features.to_vec(); + let opaque = LazilyCompiledShader::new( + ShaderKind::Brush, + name, + &opaque_features, + device, + precache_flags, + &shader_list, + profile, + )?; + + let mut alpha_features = opaque_features.to_vec(); + alpha_features.push(ALPHA_FEATURE); + + let alpha = LazilyCompiledShader::new( + ShaderKind::Brush, + name, + &alpha_features, + device, + precache_flags, + &shader_list, + profile, + )?; + + let advanced_blend = if use_advanced_blend { + let mut advanced_blend_features = alpha_features.to_vec(); + advanced_blend_features.push(ADVANCED_BLEND_FEATURE); + + let shader = LazilyCompiledShader::new( + ShaderKind::Brush, + name, + &advanced_blend_features, + device, + precache_flags, + &shader_list, + profile, + )?; + + Some(shader) + } else { + None + }; + + let dual_source = if use_dual_source { + let mut dual_source_features = alpha_features.to_vec(); + dual_source_features.push(DUAL_SOURCE_FEATURE); + + let shader = LazilyCompiledShader::new( + ShaderKind::Brush, + name, + &dual_source_features, + device, + precache_flags, + &shader_list, + profile, + )?; + + Some(shader) + } else { + None + }; + + let mut debug_overdraw_features = features.to_vec(); + debug_overdraw_features.push(DEBUG_OVERDRAW_FEATURE); + + let debug_overdraw = LazilyCompiledShader::new( + ShaderKind::Brush, + name, + &debug_overdraw_features, + device, + precache_flags, + &shader_list, + profile, + )?; + + Ok(BrushShader { + opaque, + alpha, + advanced_blend, + dual_source, + debug_overdraw, + }) + } + + fn get(&mut self, blend_mode: BlendMode, features: BatchFeatures, debug_flags: DebugFlags) + -> &mut LazilyCompiledShader { + match blend_mode { + _ if debug_flags.contains(DebugFlags::SHOW_OVERDRAW) => &mut self.debug_overdraw, + BlendMode::None => &mut self.opaque, + BlendMode::Alpha | + BlendMode::PremultipliedAlpha | + BlendMode::PremultipliedDestOut | + BlendMode::Screen | + BlendMode::PlusLighter | + BlendMode::Exclusion => { + if features.contains(BatchFeatures::ALPHA_PASS) { + &mut self.alpha + } else { + &mut self.opaque + } + } + BlendMode::Advanced(_) => { + self.advanced_blend + .as_mut() + .expect("bug: no advanced blend shader loaded") + } + BlendMode::SubpixelDualSource | + BlendMode::MultiplyDualSource => { + self.dual_source + .as_mut() + .expect("bug: no dual source shader loaded") + } + } + } + + fn deinit(self, device: &mut Device) { + self.opaque.deinit(device); + self.alpha.deinit(device); + if let Some(advanced_blend) = self.advanced_blend { + advanced_blend.deinit(device); + } + if let Some(dual_source) = self.dual_source { + dual_source.deinit(device); + } + self.debug_overdraw.deinit(device); + } +} + +pub struct TextShader { + simple: LazilyCompiledShader, + glyph_transform: LazilyCompiledShader, + debug_overdraw: LazilyCompiledShader, +} + +impl TextShader { + fn new( + name: &'static str, + device: &mut Device, + features: &[&'static str], + precache_flags: ShaderPrecacheFlags, + shader_list: &ShaderFeatures, + profile: &mut TransactionProfile, + ) -> Result<Self, ShaderError> { + let mut simple_features = features.to_vec(); + simple_features.push("ALPHA_PASS"); + simple_features.push("TEXTURE_2D"); + + let simple = LazilyCompiledShader::new( + ShaderKind::Text, + name, + &simple_features, + device, + precache_flags, + &shader_list, + profile, + )?; + + let mut glyph_transform_features = features.to_vec(); + glyph_transform_features.push("GLYPH_TRANSFORM"); + glyph_transform_features.push("ALPHA_PASS"); + glyph_transform_features.push("TEXTURE_2D"); + + let glyph_transform = LazilyCompiledShader::new( + ShaderKind::Text, + name, + &glyph_transform_features, + device, + precache_flags, + &shader_list, + profile, + )?; + + let mut debug_overdraw_features = features.to_vec(); + debug_overdraw_features.push("DEBUG_OVERDRAW"); + debug_overdraw_features.push("TEXTURE_2D"); + + let debug_overdraw = LazilyCompiledShader::new( + ShaderKind::Text, + name, + &debug_overdraw_features, + device, + precache_flags, + &shader_list, + profile, + )?; + + Ok(TextShader { simple, glyph_transform, debug_overdraw }) + } + + pub fn get( + &mut self, + glyph_format: GlyphFormat, + debug_flags: DebugFlags, + ) -> &mut LazilyCompiledShader { + match glyph_format { + _ if debug_flags.contains(DebugFlags::SHOW_OVERDRAW) => &mut self.debug_overdraw, + GlyphFormat::Alpha | + GlyphFormat::Subpixel | + GlyphFormat::Bitmap | + GlyphFormat::ColorBitmap => &mut self.simple, + GlyphFormat::TransformedAlpha | + GlyphFormat::TransformedSubpixel => &mut self.glyph_transform, + } + } + + fn deinit(self, device: &mut Device) { + self.simple.deinit(device); + self.glyph_transform.deinit(device); + self.debug_overdraw.deinit(device); + } +} + +fn create_prim_shader( + name: &'static str, + device: &mut Device, + features: &[&'static str], +) -> Result<Program, ShaderError> { + debug!("PrimShader {}", name); + + device.create_program(name, features) +} + +fn create_clip_shader( + name: &'static str, + device: &mut Device, + features: &[&'static str], +) -> Result<Program, ShaderError> { + debug!("ClipShader {}", name); + + device.create_program(name, features) +} + +// NB: If you add a new shader here, make sure to deinitialize it +// in `Shaders::deinit()` below. +pub struct Shaders { + // These are "cache shaders". These shaders are used to + // draw intermediate results to cache targets. The results + // of these shaders are then used by the primitive shaders. + pub cs_blur_a8: LazilyCompiledShader, + pub cs_blur_rgba8: LazilyCompiledShader, + pub cs_border_segment: LazilyCompiledShader, + pub cs_border_solid: LazilyCompiledShader, + pub cs_scale: Vec<Option<LazilyCompiledShader>>, + pub cs_line_decoration: LazilyCompiledShader, + pub cs_fast_linear_gradient: LazilyCompiledShader, + pub cs_linear_gradient: LazilyCompiledShader, + pub cs_radial_gradient: LazilyCompiledShader, + pub cs_conic_gradient: LazilyCompiledShader, + pub cs_svg_filter: LazilyCompiledShader, + + // Brush shaders + brush_solid: BrushShader, + brush_image: Vec<Option<BrushShader>>, + brush_fast_image: Vec<Option<BrushShader>>, + brush_blend: BrushShader, + brush_mix_blend: BrushShader, + brush_yuv_image: Vec<Option<BrushShader>>, + brush_linear_gradient: BrushShader, + brush_opacity: BrushShader, + brush_opacity_aa: BrushShader, + + /// These are "cache clip shaders". These shaders are used to + /// draw clip instances into the cached clip mask. The results + /// of these shaders are also used by the primitive shaders. + pub cs_clip_rectangle_slow: LazilyCompiledShader, + pub cs_clip_rectangle_fast: LazilyCompiledShader, + pub cs_clip_box_shadow: LazilyCompiledShader, + pub cs_clip_image: LazilyCompiledShader, + + // The are "primitive shaders". These shaders draw and blend + // final results on screen. They are aware of tile boundaries. + // Most draw directly to the framebuffer, but some use inputs + // from the cache shaders to draw. Specifically, the box + // shadow primitive shader stretches the box shadow cache + // output, and the cache_image shader blits the results of + // a cache shader (e.g. blur) to the screen. + pub ps_text_run: TextShader, + pub ps_text_run_dual_source: Option<TextShader>, + + ps_split_composite: LazilyCompiledShader, + pub ps_quad_textured: LazilyCompiledShader, + pub ps_mask: LazilyCompiledShader, + pub ps_mask_fast: LazilyCompiledShader, + pub ps_clear: LazilyCompiledShader, + pub ps_copy: LazilyCompiledShader, + + pub composite: CompositorShaders, +} + +impl Shaders { + pub fn new( + device: &mut Device, + gl_type: GlType, + options: &WebRenderOptions, + ) -> Result<Self, ShaderError> { + // We have to pass a profile around a bunch but we aren't recording the initialization + // so use a dummy one. + let profile = &mut TransactionProfile::new(); + + let use_dual_source_blending = + device.get_capabilities().supports_dual_source_blending && + options.allow_dual_source_blending; + let use_advanced_blend_equation = + device.get_capabilities().supports_advanced_blend_equation && + options.allow_advanced_blend_equation; + + let texture_external_version = if device.get_capabilities().supports_image_external_essl3 { + TextureExternalVersion::ESSL3 + } else { + TextureExternalVersion::ESSL1 + }; + let mut shader_flags = get_shader_feature_flags(gl_type, texture_external_version, device); + shader_flags.set(ShaderFeatureFlags::ADVANCED_BLEND_EQUATION, use_advanced_blend_equation); + shader_flags.set(ShaderFeatureFlags::DUAL_SOURCE_BLENDING, use_dual_source_blending); + shader_flags.set(ShaderFeatureFlags::DITHERING, options.enable_dithering); + let shader_list = get_shader_features(shader_flags); + + let brush_solid = BrushShader::new( + "brush_solid", + device, + &[], + options.precache_flags, + &shader_list, + false /* advanced blend */, + false /* dual source */, + profile, + )?; + + let brush_blend = BrushShader::new( + "brush_blend", + device, + &[], + options.precache_flags, + &shader_list, + false /* advanced blend */, + false /* dual source */, + profile, + )?; + + let brush_mix_blend = BrushShader::new( + "brush_mix_blend", + device, + &[], + options.precache_flags, + &shader_list, + false /* advanced blend */, + false /* dual source */, + profile, + )?; + + let brush_linear_gradient = BrushShader::new( + "brush_linear_gradient", + device, + if options.enable_dithering { + &[DITHERING_FEATURE] + } else { + &[] + }, + options.precache_flags, + &shader_list, + false /* advanced blend */, + false /* dual source */, + profile, + )?; + + let brush_opacity_aa = BrushShader::new( + "brush_opacity", + device, + &["ANTIALIASING"], + options.precache_flags, + &shader_list, + false /* advanced blend */, + false /* dual source */, + profile, + )?; + + let brush_opacity = BrushShader::new( + "brush_opacity", + device, + &[], + options.precache_flags, + &shader_list, + false /* advanced blend */, + false /* dual source */, + profile, + )?; + + let cs_blur_a8 = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::Blur), + "cs_blur", + &["ALPHA_TARGET"], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let cs_blur_rgba8 = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::Blur), + "cs_blur", + &["COLOR_TARGET"], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let cs_svg_filter = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::SvgFilter), + "cs_svg_filter", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let ps_mask = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::Mask), + "ps_quad_mask", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let ps_mask_fast = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::Mask), + "ps_quad_mask", + &[FAST_PATH_FEATURE], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let cs_clip_rectangle_slow = LazilyCompiledShader::new( + ShaderKind::ClipCache(VertexArrayKind::ClipRect), + "cs_clip_rectangle", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let cs_clip_rectangle_fast = LazilyCompiledShader::new( + ShaderKind::ClipCache(VertexArrayKind::ClipRect), + "cs_clip_rectangle", + &[FAST_PATH_FEATURE], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let cs_clip_box_shadow = LazilyCompiledShader::new( + ShaderKind::ClipCache(VertexArrayKind::ClipBoxShadow), + "cs_clip_box_shadow", + &["TEXTURE_2D"], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let cs_clip_image = LazilyCompiledShader::new( + ShaderKind::ClipCache(VertexArrayKind::ClipImage), + "cs_clip_image", + &["TEXTURE_2D"], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let mut cs_scale = Vec::new(); + let scale_shader_num = IMAGE_BUFFER_KINDS.len(); + // PrimitiveShader is not clonable. Use push() to initialize the vec. + for _ in 0 .. scale_shader_num { + cs_scale.push(None); + } + for image_buffer_kind in &IMAGE_BUFFER_KINDS { + if has_platform_support(*image_buffer_kind, device) { + let feature_string = get_feature_string( + *image_buffer_kind, + texture_external_version, + ); + + let mut features = Vec::new(); + if feature_string != "" { + features.push(feature_string); + } + + let shader = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::Scale), + "cs_scale", + &features, + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let index = Self::get_compositing_shader_index( + *image_buffer_kind, + ); + cs_scale[index] = Some(shader); + } + } + + // TODO(gw): The split composite + text shader are special cases - the only + // shaders used during normal scene rendering that aren't a brush + // shader. Perhaps we can unify these in future? + + let ps_text_run = TextShader::new("ps_text_run", + device, + &[], + options.precache_flags, + &shader_list, + profile, + )?; + + let ps_text_run_dual_source = if use_dual_source_blending { + let dual_source_features = vec![DUAL_SOURCE_FEATURE]; + Some(TextShader::new("ps_text_run", + device, + &dual_source_features, + options.precache_flags, + &shader_list, + profile, + )?) + } else { + None + }; + + let ps_quad_textured = LazilyCompiledShader::new( + ShaderKind::Primitive, + "ps_quad_textured", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let ps_split_composite = LazilyCompiledShader::new( + ShaderKind::Primitive, + "ps_split_composite", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let ps_clear = LazilyCompiledShader::new( + ShaderKind::Clear, + "ps_clear", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let ps_copy = LazilyCompiledShader::new( + ShaderKind::Copy, + "ps_copy", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + // All image configuration. + let mut image_features = Vec::new(); + let mut brush_image = Vec::new(); + let mut brush_fast_image = Vec::new(); + // PrimitiveShader is not clonable. Use push() to initialize the vec. + for _ in 0 .. IMAGE_BUFFER_KINDS.len() { + brush_image.push(None); + brush_fast_image.push(None); + } + for buffer_kind in 0 .. IMAGE_BUFFER_KINDS.len() { + if !has_platform_support(IMAGE_BUFFER_KINDS[buffer_kind], device) + // Brush shaders are not ESSL1 compatible + || (IMAGE_BUFFER_KINDS[buffer_kind] == ImageBufferKind::TextureExternal + && texture_external_version == TextureExternalVersion::ESSL1) + { + continue; + } + + let feature_string = get_feature_string( + IMAGE_BUFFER_KINDS[buffer_kind], + texture_external_version, + ); + if feature_string != "" { + image_features.push(feature_string); + } + + brush_fast_image[buffer_kind] = Some(BrushShader::new( + "brush_image", + device, + &image_features, + options.precache_flags, + &shader_list, + use_advanced_blend_equation, + use_dual_source_blending, + profile, + )?); + + image_features.push("REPETITION"); + image_features.push("ANTIALIASING"); + + brush_image[buffer_kind] = Some(BrushShader::new( + "brush_image", + device, + &image_features, + options.precache_flags, + &shader_list, + use_advanced_blend_equation, + use_dual_source_blending, + profile, + )?); + + image_features.clear(); + } + + // All yuv_image configuration. + let mut yuv_features = Vec::new(); + let mut rgba_features = Vec::new(); + let mut fast_path_features = Vec::new(); + let yuv_shader_num = IMAGE_BUFFER_KINDS.len(); + let mut brush_yuv_image = Vec::new(); + // PrimitiveShader is not clonable. Use push() to initialize the vec. + for _ in 0 .. yuv_shader_num { + brush_yuv_image.push(None); + } + for image_buffer_kind in &IMAGE_BUFFER_KINDS { + if has_platform_support(*image_buffer_kind, device) { + yuv_features.push("YUV"); + fast_path_features.push("FAST_PATH"); + + let index = Self::get_compositing_shader_index( + *image_buffer_kind, + ); + + let feature_string = get_feature_string( + *image_buffer_kind, + texture_external_version, + ); + if feature_string != "" { + yuv_features.push(feature_string); + rgba_features.push(feature_string); + fast_path_features.push(feature_string); + } + + // YUV shaders are not compatible with ESSL1 + if *image_buffer_kind != ImageBufferKind::TextureExternal || + texture_external_version == TextureExternalVersion::ESSL3 { + let brush_shader = BrushShader::new( + "brush_yuv_image", + device, + &yuv_features, + options.precache_flags, + &shader_list, + false /* advanced blend */, + false /* dual source */, + profile, + )?; + brush_yuv_image[index] = Some(brush_shader); + } + + yuv_features.clear(); + rgba_features.clear(); + fast_path_features.clear(); + } + } + + let cs_line_decoration = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::LineDecoration), + "cs_line_decoration", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let cs_fast_linear_gradient = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::FastLinearGradient), + "cs_fast_linear_gradient", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let cs_linear_gradient = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::LinearGradient), + "cs_linear_gradient", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let cs_radial_gradient = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::RadialGradient), + "cs_radial_gradient", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let cs_conic_gradient = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::ConicGradient), + "cs_conic_gradient", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let cs_border_segment = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::Border), + "cs_border_segment", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let cs_border_solid = LazilyCompiledShader::new( + ShaderKind::Cache(VertexArrayKind::Border), + "cs_border_solid", + &[], + device, + options.precache_flags, + &shader_list, + profile, + )?; + + let composite = CompositorShaders::new(device, options.precache_flags, gl_type)?; + + Ok(Shaders { + cs_blur_a8, + cs_blur_rgba8, + cs_border_segment, + cs_line_decoration, + cs_fast_linear_gradient, + cs_linear_gradient, + cs_radial_gradient, + cs_conic_gradient, + cs_border_solid, + cs_scale, + cs_svg_filter, + brush_solid, + brush_image, + brush_fast_image, + brush_blend, + brush_mix_blend, + brush_yuv_image, + brush_linear_gradient, + brush_opacity, + brush_opacity_aa, + cs_clip_rectangle_slow, + cs_clip_rectangle_fast, + cs_clip_box_shadow, + cs_clip_image, + ps_text_run, + ps_text_run_dual_source, + ps_quad_textured, + ps_mask, + ps_mask_fast, + ps_split_composite, + ps_clear, + ps_copy, + composite, + }) + } + + fn get_compositing_shader_index(buffer_kind: ImageBufferKind) -> usize { + buffer_kind as usize + } + + pub fn get_composite_shader( + &mut self, + format: CompositeSurfaceFormat, + buffer_kind: ImageBufferKind, + features: CompositeFeatures, + ) -> &mut LazilyCompiledShader { + self.composite.get(format, buffer_kind, features) + } + + pub fn get_scale_shader( + &mut self, + buffer_kind: ImageBufferKind, + ) -> &mut LazilyCompiledShader { + let shader_index = Self::get_compositing_shader_index(buffer_kind); + self.cs_scale[shader_index] + .as_mut() + .expect("bug: unsupported scale shader requested") + } + + pub fn get(& + mut self, + key: &BatchKey, + mut features: BatchFeatures, + debug_flags: DebugFlags, + device: &Device, + ) -> &mut LazilyCompiledShader { + match key.kind { + BatchKind::Primitive => { + &mut self.ps_quad_textured + } + BatchKind::SplitComposite => { + &mut self.ps_split_composite + } + BatchKind::Brush(brush_kind) => { + // SWGL uses a native anti-aliasing implementation that bypasses the shader. + // Don't consider it in that case when deciding whether or not to use + // an alpha-pass shader. + if device.get_capabilities().uses_native_antialiasing { + features.remove(BatchFeatures::ANTIALIASING); + } + let brush_shader = match brush_kind { + BrushBatchKind::Solid => { + &mut self.brush_solid + } + BrushBatchKind::Image(image_buffer_kind) => { + if features.contains(BatchFeatures::ANTIALIASING) || + features.contains(BatchFeatures::REPETITION) { + + self.brush_image[image_buffer_kind as usize] + .as_mut() + .expect("Unsupported image shader kind") + } else { + self.brush_fast_image[image_buffer_kind as usize] + .as_mut() + .expect("Unsupported image shader kind") + } + } + BrushBatchKind::Blend => { + &mut self.brush_blend + } + BrushBatchKind::MixBlend { .. } => { + &mut self.brush_mix_blend + } + BrushBatchKind::LinearGradient => { + // SWGL uses a native clip mask implementation that bypasses the shader. + // Don't consider it in that case when deciding whether or not to use + // an alpha-pass shader. + if device.get_capabilities().uses_native_clip_mask { + features.remove(BatchFeatures::CLIP_MASK); + } + // Gradient brushes can optimistically use the opaque shader even + // with a blend mode if they don't require any features. + if !features.intersects( + BatchFeatures::ANTIALIASING + | BatchFeatures::REPETITION + | BatchFeatures::CLIP_MASK, + ) { + features.remove(BatchFeatures::ALPHA_PASS); + } + match brush_kind { + BrushBatchKind::LinearGradient => &mut self.brush_linear_gradient, + _ => panic!(), + } + } + BrushBatchKind::YuvImage(image_buffer_kind, ..) => { + let shader_index = + Self::get_compositing_shader_index(image_buffer_kind); + self.brush_yuv_image[shader_index] + .as_mut() + .expect("Unsupported YUV shader kind") + } + BrushBatchKind::Opacity => { + if features.contains(BatchFeatures::ANTIALIASING) { + &mut self.brush_opacity_aa + } else { + &mut self.brush_opacity + } + } + }; + brush_shader.get(key.blend_mode, features, debug_flags) + } + BatchKind::TextRun(glyph_format) => { + let text_shader = match key.blend_mode { + BlendMode::SubpixelDualSource => self.ps_text_run_dual_source.as_mut().unwrap(), + _ => &mut self.ps_text_run, + }; + text_shader.get(glyph_format, debug_flags) + } + } + } + + pub fn deinit(mut self, device: &mut Device) { + for shader in self.cs_scale { + if let Some(shader) = shader { + shader.deinit(device); + } + } + self.cs_blur_a8.deinit(device); + self.cs_blur_rgba8.deinit(device); + self.cs_svg_filter.deinit(device); + self.brush_solid.deinit(device); + self.brush_blend.deinit(device); + self.brush_mix_blend.deinit(device); + self.brush_linear_gradient.deinit(device); + self.brush_opacity.deinit(device); + self.brush_opacity_aa.deinit(device); + self.cs_clip_rectangle_slow.deinit(device); + self.cs_clip_rectangle_fast.deinit(device); + self.cs_clip_box_shadow.deinit(device); + self.cs_clip_image.deinit(device); + self.ps_text_run.deinit(device); + if let Some(shader) = self.ps_text_run_dual_source { + shader.deinit(device); + } + for shader in self.brush_image { + if let Some(shader) = shader { + shader.deinit(device); + } + } + for shader in self.brush_fast_image { + if let Some(shader) = shader { + shader.deinit(device); + } + } + for shader in self.brush_yuv_image { + if let Some(shader) = shader { + shader.deinit(device); + } + } + self.cs_border_solid.deinit(device); + self.cs_fast_linear_gradient.deinit(device); + self.cs_linear_gradient.deinit(device); + self.cs_radial_gradient.deinit(device); + self.cs_conic_gradient.deinit(device); + self.cs_line_decoration.deinit(device); + self.cs_border_segment.deinit(device); + self.ps_split_composite.deinit(device); + self.ps_quad_textured.deinit(device); + self.ps_mask.deinit(device); + self.ps_mask_fast.deinit(device); + self.ps_clear.deinit(device); + self.ps_copy.deinit(device); + self.composite.deinit(device); + } +} + +pub type SharedShaders = Rc<RefCell<Shaders>>; + +pub struct CompositorShaders { + // Composite shaders. These are very simple shaders used to composite + // picture cache tiles into the framebuffer on platforms that do not have an + // OS Compositor (or we cannot use it). Such an OS Compositor (such as + // DirectComposite or CoreAnimation) handles the composition of the picture + // cache tiles at a lower level (e.g. in DWM for Windows); in that case we + // directly hand the picture cache surfaces over to the OS Compositor, and + // our own Composite shaders below never run. + // To composite external (RGB) surfaces we need various permutations of + // shaders with WR_FEATURE flags on or off based on the type of image + // buffer we're sourcing from (see IMAGE_BUFFER_KINDS). + rgba: Vec<Option<LazilyCompiledShader>>, + // A faster set of rgba composite shaders that do not support UV clamping + // or color modulation. + rgba_fast_path: Vec<Option<LazilyCompiledShader>>, + // The same set of composite shaders but with WR_FEATURE_YUV added. + yuv: Vec<Option<LazilyCompiledShader>>, +} + +impl CompositorShaders { + pub fn new( + device: &mut Device, + precache_flags: ShaderPrecacheFlags, + gl_type: GlType, + ) -> Result<Self, ShaderError> { + // We have to pass a profile around a bunch but we aren't recording the initialization + // so use a dummy one. + let mut profile = TransactionProfile::new(); + + let mut yuv_features = Vec::new(); + let mut rgba_features = Vec::new(); + let mut fast_path_features = Vec::new(); + let mut rgba = Vec::new(); + let mut rgba_fast_path = Vec::new(); + let mut yuv = Vec::new(); + + let texture_external_version = if device.get_capabilities().supports_image_external_essl3 { + TextureExternalVersion::ESSL3 + } else { + TextureExternalVersion::ESSL1 + }; + + let feature_flags = get_shader_feature_flags(gl_type, texture_external_version, device); + let shader_list = get_shader_features(feature_flags); + + for _ in 0..IMAGE_BUFFER_KINDS.len() { + yuv.push(None); + rgba.push(None); + rgba_fast_path.push(None); + } + + for image_buffer_kind in &IMAGE_BUFFER_KINDS { + if !has_platform_support(*image_buffer_kind, device) { + continue; + } + + yuv_features.push("YUV"); + fast_path_features.push("FAST_PATH"); + + let index = Self::get_shader_index(*image_buffer_kind); + + let feature_string = get_feature_string( + *image_buffer_kind, + texture_external_version, + ); + if feature_string != "" { + yuv_features.push(feature_string); + rgba_features.push(feature_string); + fast_path_features.push(feature_string); + } + + // YUV shaders are not compatible with ESSL1 + if *image_buffer_kind != ImageBufferKind::TextureExternal || + texture_external_version == TextureExternalVersion::ESSL3 { + + yuv[index] = Some(LazilyCompiledShader::new( + ShaderKind::Composite, + "composite", + &yuv_features, + device, + precache_flags, + &shader_list, + &mut profile, + )?); + } + + rgba[index] = Some(LazilyCompiledShader::new( + ShaderKind::Composite, + "composite", + &rgba_features, + device, + precache_flags, + &shader_list, + &mut profile, + )?); + + rgba_fast_path[index] = Some(LazilyCompiledShader::new( + ShaderKind::Composite, + "composite", + &fast_path_features, + device, + precache_flags, + &shader_list, + &mut profile, + )?); + + yuv_features.clear(); + rgba_features.clear(); + fast_path_features.clear(); + } + + Ok(CompositorShaders { + rgba, + rgba_fast_path, + yuv, + }) + } + + pub fn get( + &mut self, + format: CompositeSurfaceFormat, + buffer_kind: ImageBufferKind, + features: CompositeFeatures, + ) -> &mut LazilyCompiledShader { + match format { + CompositeSurfaceFormat::Rgba => { + if features.contains(CompositeFeatures::NO_UV_CLAMP) + && features.contains(CompositeFeatures::NO_COLOR_MODULATION) + { + let shader_index = Self::get_shader_index(buffer_kind); + self.rgba_fast_path[shader_index] + .as_mut() + .expect("bug: unsupported rgba fast path shader requested") + } else { + let shader_index = Self::get_shader_index(buffer_kind); + self.rgba[shader_index] + .as_mut() + .expect("bug: unsupported rgba shader requested") + } + } + CompositeSurfaceFormat::Yuv => { + let shader_index = Self::get_shader_index(buffer_kind); + self.yuv[shader_index] + .as_mut() + .expect("bug: unsupported yuv shader requested") + } + } + } + + fn get_shader_index(buffer_kind: ImageBufferKind) -> usize { + buffer_kind as usize + } + + pub fn deinit(&mut self, device: &mut Device) { + for shader in self.rgba.drain(..) { + if let Some(shader) = shader { + shader.deinit(device); + } + } + for shader in self.rgba_fast_path.drain(..) { + if let Some(shader) = shader { + shader.deinit(device); + } + } + for shader in self.yuv.drain(..) { + if let Some(shader) = shader { + shader.deinit(device); + } + } + } +} + +fn get_shader_feature_flags( + gl_type: GlType, + texture_external_version: TextureExternalVersion, + device: &Device +) -> ShaderFeatureFlags { + match gl_type { + GlType::Gl => ShaderFeatureFlags::GL, + GlType::Gles => { + let mut flags = ShaderFeatureFlags::GLES; + flags |= match texture_external_version { + TextureExternalVersion::ESSL3 => ShaderFeatureFlags::TEXTURE_EXTERNAL, + TextureExternalVersion::ESSL1 => ShaderFeatureFlags::TEXTURE_EXTERNAL_ESSL1, + }; + if device.supports_extension("GL_EXT_YUV_target") { + flags |= ShaderFeatureFlags::TEXTURE_EXTERNAL_BT709; + } + flags + } + } +} diff --git a/gfx/wr/webrender/src/renderer/upload.rs b/gfx/wr/webrender/src/renderer/upload.rs new file mode 100644 index 0000000000..0ba053cd76 --- /dev/null +++ b/gfx/wr/webrender/src/renderer/upload.rs @@ -0,0 +1,847 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! This module contains the convoluted logic that goes into uploading content into +//! the texture cache's textures. +//! +//! We need to support various combinations of code paths depending on the quirks of +//! each hardware/driver configuration: +//! - direct upload, +//! - staged upload via a pixel buffer object, +//! - staged upload via a direct upload to a staging texture where PBO's aren't supported, +//! - copy from the staging to destination textures, either via blits or batched draw calls. +//! +//! Conceptually a lot of this logic should probably be in the device module, but some code +//! here relies on submitting draw calls via the renderer. + + +use std::mem; +use std::collections::VecDeque; +use std::sync::Arc; +use std::time::Duration; +use euclid::{Transform3D, point2}; +use time::precise_time_ns; +use malloc_size_of::MallocSizeOfOps; +use api::units::*; +use api::{ExternalImageSource, ImageBufferKind, ImageFormat}; +use crate::renderer::{ + Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR +}; +use crate::internal_types::{ + FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate, + CacheTextureId, RenderTargetInfo, +}; +use crate::device::{ + Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader, + TextureFilter, +}; +use crate::gpu_types::CopyInstance; +use crate::batch::BatchTextures; +use crate::texture_pack::{GuillotineAllocator, FreeRectSlice}; +use crate::profiler; +use crate::render_api::MemoryReport; + +pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512); + +/// Upload a number of items to texture cache textures. +/// +/// This is the main entry point of the texture cache upload code. +/// See also the module documentation for more information. +pub fn upload_to_texture_cache( + renderer: &mut Renderer, + update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>, +) { + + let mut stats = UploadStats { + num_draw_calls: 0, + upload_time: 0, + cpu_buffer_alloc_time: 0, + texture_alloc_time: 0, + cpu_copy_time: 0, + gpu_copy_commands_time: 0, + bytes_uploaded: 0, + items_uploaded: 0, + }; + + let upload_total_start = precise_time_ns(); + + let mut batch_upload_textures = Vec::new(); + + // A list of copies that must be performed from the temporary textures to the texture cache. + let mut batch_upload_copies = Vec::new(); + + // For each texture format, this stores a list of staging buffers + // and a texture allocator for packing the buffers. + let mut batch_upload_buffers = FastHashMap::default(); + + // For best performance we use a single TextureUploader for all uploads. + // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs. + let mut uploader = renderer.device.upload_texture( + &mut renderer.texture_upload_pbo_pool, + ); + + let num_updates = update_list.len(); + + for (texture_id, updates) in update_list { + let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture; + for update in updates { + let TextureCacheUpdate { rect, stride, offset, format_override, source } = update; + let mut arc_data = None; + let dummy_data; + let data = match source { + TextureUpdateSource::Bytes { ref data } => { + arc_data = Some(data.clone()); + &data[offset as usize ..] + } + TextureUpdateSource::External { id, channel_index } => { + let handler = renderer.external_image_handler + .as_mut() + .expect("Found external image, but no handler set!"); + // The filter is only relevant for NativeTexture external images. + match handler.lock(id, channel_index).source { + ExternalImageSource::RawData(data) => { + &data[offset as usize ..] + } + ExternalImageSource::Invalid => { + // Create a local buffer to fill the pbo. + let bpp = texture.get_format().bytes_per_pixel(); + let width = stride.unwrap_or(rect.width() * bpp); + let total_size = width * rect.height(); + // WR haven't support RGBAF32 format in texture_cache, so + // we use u8 type here. + dummy_data = vec![0xFFu8; total_size as usize]; + &dummy_data + } + ExternalImageSource::NativeTexture(eid) => { + panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id); + } + } + } + TextureUpdateSource::DebugClear => { + let draw_target = DrawTarget::from_texture( + texture, + false, + ); + renderer.device.bind_draw_target(draw_target); + renderer.device.clear_target( + Some(TEXTURE_CACHE_DBG_CLEAR_COLOR), + None, + Some(draw_target.to_framebuffer_rect(update.rect.to_i32())) + ); + + continue; + } + }; + + stats.items_uploaded += 1; + + let use_batch_upload = renderer.device.use_batched_texture_uploads() && + texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) && + rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width && + rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height && + rect.area() < renderer.device.batched_upload_threshold(); + + if use_batch_upload + && arc_data.is_some() + && matches!(renderer.device.upload_method(), &UploadMethod::Immediate) + && rect.area() > BATCH_UPLOAD_TEXTURE_SIZE.area() / 2 { + skip_staging_buffer( + &mut renderer.device, + &mut renderer.staging_texture_pool, + rect, + stride, + arc_data.unwrap(), + texture_id, + texture, + &mut batch_upload_buffers, + &mut batch_upload_textures, + &mut batch_upload_copies, + &mut stats, + ); + } else if use_batch_upload { + copy_into_staging_buffer( + &mut renderer.device, + &mut uploader, + &mut renderer.staging_texture_pool, + rect, + stride, + data, + texture_id, + texture, + &mut batch_upload_buffers, + &mut batch_upload_textures, + &mut batch_upload_copies, + &mut stats, + ); + } else { + let upload_start_time = precise_time_ns(); + + stats.bytes_uploaded += uploader.upload( + &mut renderer.device, + texture, + rect, + stride, + format_override, + data.as_ptr(), + data.len() + ); + + stats.upload_time += precise_time_ns() - upload_start_time; + } + + if let TextureUpdateSource::External { id, channel_index } = source { + let handler = renderer.external_image_handler + .as_mut() + .expect("Found external image, but no handler set!"); + handler.unlock(id, channel_index); + } + } + } + + let upload_start_time = precise_time_ns(); + // Upload batched texture updates to their temporary textures. + for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() { + let texture = &batch_upload_textures[batch_buffer.texture_index]; + match batch_buffer.staging_buffer { + StagingBufferKind::Pbo(pbo) => { + stats.bytes_uploaded += uploader.upload_staged( + &mut renderer.device, + texture, + DeviceIntRect::from_size(texture.get_dimensions()), + None, + pbo, + ); + } + StagingBufferKind::CpuBuffer { bytes, .. } => { + let bpp = texture.get_format().bytes_per_pixel(); + stats.bytes_uploaded += uploader.upload( + &mut renderer.device, + texture, + batch_buffer.upload_rect, + Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp), + None, + bytes.as_ptr(), + bytes.len() + ); + renderer.staging_texture_pool.return_temporary_buffer(bytes); + } + StagingBufferKind::Image { bytes, stride } => { + stats.bytes_uploaded += uploader.upload( + &mut renderer.device, + texture, + batch_buffer.upload_rect, + stride, + None, + bytes.as_ptr(), + bytes.len() + ); + } + } + } + stats.upload_time += precise_time_ns() - upload_start_time; + + + // Flush all uploads, batched or otherwise. + let flush_start_time = precise_time_ns(); + uploader.flush(&mut renderer.device); + stats.upload_time += precise_time_ns() - flush_start_time; + + if !batch_upload_copies.is_empty() { + // Copy updates that were batch uploaded to their correct destination in the texture cache. + // Sort them by destination and source to minimize framebuffer binding changes. + batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index)); + + let gpu_copy_start = precise_time_ns(); + + if renderer.device.use_draw_calls_for_texture_copy() { + // Some drivers have a very high CPU overhead when submitting hundreds of small blit + // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a + // few hundred blits). In this case we do the copy with batched draw calls. + copy_from_staging_to_cache_using_draw_calls( + renderer, + &mut stats, + &batch_upload_textures, + batch_upload_copies, + ); + } else { + copy_from_staging_to_cache( + renderer, + &batch_upload_textures, + batch_upload_copies, + ); + } + + stats.gpu_copy_commands_time += precise_time_ns() - gpu_copy_start; + } + + for texture in batch_upload_textures.drain(..) { + renderer.staging_texture_pool.return_texture(texture); + } + + // Update the profile counters. We use add instead of set because + // this function can be called several times per frame. + // We don't update the counters when their value is zero, so that + // the profiler can treat them as events and we can get notified + // when they happen. + + let upload_total = precise_time_ns() - upload_total_start; + renderer.profile.add( + profiler::TOTAL_UPLOAD_TIME, + profiler::ns_to_ms(upload_total) + ); + + if num_updates > 0 { + renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates); + } + + if stats.bytes_uploaded > 0 { + renderer.profile.add( + profiler::TEXTURE_UPLOADS_MEM, + profiler::bytes_to_mb(stats.bytes_uploaded) + ); + } + + if stats.cpu_copy_time > 0 { + renderer.profile.add( + profiler::UPLOAD_CPU_COPY_TIME, + profiler::ns_to_ms(stats.cpu_copy_time) + ); + } + if stats.upload_time > 0 { + renderer.profile.add( + profiler::UPLOAD_TIME, + profiler::ns_to_ms(stats.upload_time) + ); + } + if stats.texture_alloc_time > 0 { + renderer.profile.add( + profiler::STAGING_TEXTURE_ALLOCATION_TIME, + profiler::ns_to_ms(stats.texture_alloc_time) + ); + } + if stats.cpu_buffer_alloc_time > 0 { + renderer.profile.add( + profiler::CPU_TEXTURE_ALLOCATION_TIME, + profiler::ns_to_ms(stats.cpu_buffer_alloc_time) + ); + } + if stats.num_draw_calls > 0{ + renderer.profile.add( + profiler::UPLOAD_NUM_COPY_BATCHES, + stats.num_draw_calls + ); + } + + if stats.gpu_copy_commands_time > 0 { + renderer.profile.add( + profiler::UPLOAD_GPU_COPY_TIME, + profiler::ns_to_ms(stats.gpu_copy_commands_time) + ); + } + + let add_markers = profiler::thread_is_being_profiled(); + if add_markers && stats.bytes_uploaded > 0 { + let details = format!("{} bytes uploaded, {} items", stats.bytes_uploaded, stats.items_uploaded); + profiler::add_text_marker(&"Texture uploads", &details, Duration::from_nanos(upload_total)); + } +} + +/// Copy an item into a batched upload staging buffer. +fn copy_into_staging_buffer<'a>( + device: &mut Device, + uploader: &mut TextureUploader< 'a>, + staging_texture_pool: &mut UploadTexturePool, + update_rect: DeviceIntRect, + update_stride: Option<i32>, + data: &[u8], + dest_texture_id: CacheTextureId, + texture: &Texture, + batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>, + batch_upload_textures: &mut Vec<Texture>, + batch_upload_copies: &mut Vec<BatchUploadCopy>, + stats: &mut UploadStats +) { + let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format()) + .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new())); + + // Allocate a region within the staging buffer for this update. If there is + // no room in an existing buffer then allocate another texture and buffer. + let (slice, origin) = match allocator.allocate(&update_rect.size()) { + Some((slice, origin)) => (slice, origin), + None => { + let new_slice = FreeRectSlice(buffers.len() as u32); + allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size()); + + let texture_alloc_time_start = precise_time_ns(); + let staging_texture = staging_texture_pool.get_texture(device, texture.get_format()); + stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start; + + let texture_index = batch_upload_textures.len(); + batch_upload_textures.push(staging_texture); + + let cpu_buffer_alloc_start_time = precise_time_ns(); + let staging_buffer = match device.upload_method() { + UploadMethod::Immediate => StagingBufferKind::CpuBuffer { + bytes: staging_texture_pool.get_temporary_buffer(), + }, + UploadMethod::PixelBuffer(_) => { + let pbo = uploader.stage( + device, + texture.get_format(), + BATCH_UPLOAD_TEXTURE_SIZE, + ).unwrap(); + + StagingBufferKind::Pbo(pbo) + } + }; + stats.cpu_buffer_alloc_time += precise_time_ns() - cpu_buffer_alloc_start_time; + + buffers.push(BatchUploadBuffer { + staging_buffer, + texture_index, + upload_rect: DeviceIntRect::zero() + }); + + (new_slice, DeviceIntPoint::zero()) + } + }; + let buffer = &mut buffers[slice.0 as usize]; + let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size()); + buffer.upload_rect = buffer.upload_rect.union(&allocated_rect); + + batch_upload_copies.push(BatchUploadCopy { + src_texture_index: buffer.texture_index, + src_offset: allocated_rect.min, + dest_texture_id, + dest_offset: update_rect.min, + size: update_rect.size(), + }); + + unsafe { + let memcpy_start_time = precise_time_ns(); + let bpp = texture.get_format().bytes_per_pixel() as usize; + let width_bytes = update_rect.width() as usize * bpp; + let src_stride = update_stride.map_or(width_bytes, |stride| { + assert!(stride >= 0); + stride as usize + }); + let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes; + assert!(src_size <= data.len()); + + let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size); + let (dst_stride, dst) = match &mut buffer.staging_buffer { + StagingBufferKind::Pbo(buffer) => ( + buffer.get_stride(), + buffer.get_mapping(), + ), + StagingBufferKind::CpuBuffer { bytes } => ( + BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp, + &mut bytes[..], + ), + StagingBufferKind::Image { .. } => unreachable!(), + }; + + // copy the data line-by-line in to the buffer so that we do not overwrite + // any other region of the buffer. + for y in 0..allocated_rect.height() as usize { + let src_start = y * src_stride; + let src_end = src_start + width_bytes; + let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride + + allocated_rect.min.x as usize * bpp; + let dst_end = dst_start + width_bytes; + + dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end]) + } + + stats.cpu_copy_time += precise_time_ns() - memcpy_start_time; + } +} + +/// Take this code path instead of copying into a staging CPU buffer when the image +/// we would copy is large enough that it's unlikely anything else would fit in the +/// buffer, therefore we might as well copy directly from the source image's pixels. +fn skip_staging_buffer<'a>( + device: &mut Device, + staging_texture_pool: &mut UploadTexturePool, + update_rect: DeviceIntRect, + stride: Option<i32>, + data: Arc<Vec<u8>>, + dest_texture_id: CacheTextureId, + texture: &Texture, + batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>, + batch_upload_textures: &mut Vec<Texture>, + batch_upload_copies: &mut Vec<BatchUploadCopy>, + stats: &mut UploadStats +) { + let (_, buffers) = batch_upload_buffers.entry(texture.get_format()) + .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new())); + + let texture_alloc_time_start = precise_time_ns(); + let staging_texture = staging_texture_pool.get_texture(device, texture.get_format()); + stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start; + + let texture_index = batch_upload_textures.len(); + batch_upload_textures.push(staging_texture); + + buffers.push(BatchUploadBuffer { + staging_buffer: StagingBufferKind::Image { bytes: data, stride }, + texture_index, + upload_rect: DeviceIntRect::from_size(update_rect.size()) + }); + + batch_upload_copies.push(BatchUploadCopy { + src_texture_index: texture_index, + src_offset: point2(0, 0), + dest_texture_id, + dest_offset: update_rect.min, + size: update_rect.size(), + }); +} + + +/// Copy from the staging PBOs or textures to texture cache textures using blit commands. +/// +/// Using blits instead of draw calls is supposedly more efficient but some drivers have +/// a very high per-command overhead so in some configurations we end up using +/// copy_from_staging_to_cache_using_draw_calls instead. +fn copy_from_staging_to_cache( + renderer: &mut Renderer, + batch_upload_textures: &[Texture], + batch_upload_copies: Vec<BatchUploadCopy>, +) { + for copy in batch_upload_copies { + let dest_texture = &renderer.texture_resolver.texture_cache_map[©.dest_texture_id].texture; + + renderer.device.copy_texture_sub_region( + &batch_upload_textures[copy.src_texture_index], + copy.src_offset.x as _, + copy.src_offset.y as _, + dest_texture, + copy.dest_offset.x as _, + copy.dest_offset.y as _, + copy.size.width as _, + copy.size.height as _, + ); + } +} + +/// Generate and submit composite shader batches to copy from +/// the staging textures to the destination cache textures. +/// +/// If this shows up in GPU time ptofiles we could replace it with +/// a simpler shader (composite.glsl is already quite simple). +fn copy_from_staging_to_cache_using_draw_calls( + renderer: &mut Renderer, + stats: &mut UploadStats, + batch_upload_textures: &[Texture], + batch_upload_copies: Vec<BatchUploadCopy>, +) { + let mut copy_instances = Vec::new(); + let mut prev_src = None; + let mut prev_dst = None; + let mut dst_texture_size = DeviceSize::new(0.0, 0.0); + + for copy in batch_upload_copies { + + let src_changed = prev_src != Some(copy.src_texture_index); + let dst_changed = prev_dst != Some(copy.dest_texture_id); + + if (src_changed || dst_changed) && !copy_instances.is_empty() { + renderer.draw_instanced_batch( + ©_instances, + VertexArrayKind::Copy, + // We bind the staging texture manually because it isn't known + // to the texture resolver. + &BatchTextures::empty(), + &mut RendererStats::default(), + ); + + stats.num_draw_calls += 1; + copy_instances.clear(); + } + + if dst_changed { + let dest_texture = &renderer.texture_resolver.texture_cache_map[©.dest_texture_id].texture; + dst_texture_size = dest_texture.get_dimensions().to_f32(); + + let draw_target = DrawTarget::from_texture(dest_texture, false); + renderer.device.bind_draw_target(draw_target); + + renderer.shaders + .borrow_mut() + .ps_copy + .bind( + &mut renderer.device, + &Transform3D::identity(), + None, + &mut renderer.renderer_errors, + &mut renderer.profile, + ); + + prev_dst = Some(copy.dest_texture_id); + } + + if src_changed { + renderer.device.bind_texture( + TextureSampler::Color0, + &batch_upload_textures[copy.src_texture_index], + Swizzle::default(), + ); + + prev_src = Some(copy.src_texture_index) + } + + let src_rect = DeviceRect::from_origin_and_size( + copy.src_offset.to_f32(), + copy.size.to_f32(), + ); + + let dst_rect = DeviceRect::from_origin_and_size( + copy.dest_offset.to_f32(), + copy.size.to_f32(), + ); + + copy_instances.push(CopyInstance { + src_rect, + dst_rect, + dst_texture_size, + }); + } + + if !copy_instances.is_empty() { + renderer.draw_instanced_batch( + ©_instances, + VertexArrayKind::Copy, + &BatchTextures::empty(), + &mut RendererStats::default(), + ); + + stats.num_draw_calls += 1; + } +} + +/// A very basic pool to avoid reallocating staging textures as well as staging +/// CPU side buffers. +pub struct UploadTexturePool { + /// The textures in the pool associated with a last used frame index. + /// + /// The outer array corresponds to each of teh three supported texture formats. + textures: [VecDeque<(Texture, u64)>; 3], + // Frame at which to deallocate some textures if there are too many in the pool, + // for each format. + delay_texture_deallocation: [u64; 3], + current_frame: u64, + + /// Temporary buffers that are used when using staging uploads + glTexImage2D. + /// + /// Temporary buffers aren't used asynchronously so they can be reused every frame. + /// To keep things simple we always allocate enough memory for formats with four bytes + /// per pixel (more than we need for alpha-only textures but it works just as well). + temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>, + min_temporary_buffers: usize, + delay_buffer_deallocation: u64, +} + +impl UploadTexturePool { + pub fn new() -> Self { + UploadTexturePool { + textures: [VecDeque::new(), VecDeque::new(), VecDeque::new()], + delay_texture_deallocation: [0; 3], + current_frame: 0, + temporary_buffers: Vec::new(), + min_temporary_buffers: 0, + delay_buffer_deallocation: 0, + } + } + + fn format_index(&self, format: ImageFormat) -> usize { + match format { + ImageFormat::RGBA8 => 0, + ImageFormat::BGRA8 => 1, + ImageFormat::R8 => 2, + _ => { panic!("unexpected format"); } + } + } + + pub fn begin_frame(&mut self) { + self.current_frame += 1; + self.min_temporary_buffers = self.temporary_buffers.len(); + } + + /// Create or reuse a staging texture. + /// + /// See also return_texture. + pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture { + + // First try to reuse a texture from the pool. + // "available" here means hasn't been used for 2 frames to avoid stalls. + // No need to scan the vector. Newer textures are always pushed at the back + // of the vector so we know the first element is the least recently used. + let format_idx = self.format_index(format); + let can_reuse = self.textures[format_idx].get(0) + .map(|tex| self.current_frame - tex.1 > 2) + .unwrap_or(false); + + if can_reuse { + return self.textures[format_idx].pop_front().unwrap().0; + } + + // If we couldn't find an available texture, create a new one. + + device.create_texture( + ImageBufferKind::Texture2D, + format, + BATCH_UPLOAD_TEXTURE_SIZE.width, + BATCH_UPLOAD_TEXTURE_SIZE.height, + TextureFilter::Nearest, + // Currently we need render target support as we always use glBlitFramebuffer + // to copy the texture data. Instead, we should use glCopyImageSubData on some + // platforms, and avoid creating the FBOs in that case. + Some(RenderTargetInfo { has_depth: false }), + ) + } + + /// Hand the staging texture back to the pool after being done with uploads. + /// + /// The texture must have been obtained from this pool via get_texture. + pub fn return_texture(&mut self, texture: Texture) { + let format_idx = self.format_index(texture.get_format()); + self.textures[format_idx].push_back((texture, self.current_frame)); + } + + /// Create or reuse a temporary CPU buffer. + /// + /// These buffers are used in the batched upload path when PBOs are not supported. + /// Content is first written to the temporary buffer and uploaded via a single + /// glTexSubImage2D call. + pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> { + let buffer = self.temporary_buffers.pop().unwrap_or_else(|| { + vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4] + }); + self.min_temporary_buffers = self.min_temporary_buffers.min(self.temporary_buffers.len()); + buffer + } + + /// Return memory that was obtained from this pool via get_temporary_buffer. + pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) { + assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4); + self.temporary_buffers.push(buffer); + } + + /// Deallocate this pool's CPU and GPU memory. + pub fn delete_textures(&mut self, device: &mut Device) { + for format in &mut self.textures { + while let Some(texture) = format.pop_back() { + device.delete_texture(texture.0) + } + } + self.temporary_buffers.clear(); + } + + /// Deallocate some textures if there are too many for a long time. + pub fn end_frame(&mut self, device: &mut Device) { + for format_idx in 0..self.textures.len() { + // Count the number of reusable staging textures. + // if it stays high for a large number of frames, truncate it back to 8-ish + // over multiple frames. + + let mut num_reusable_textures = 0; + for texture in &self.textures[format_idx] { + if self.current_frame - texture.1 > 2 { + num_reusable_textures += 1; + } + } + + if num_reusable_textures < 8 { + // Don't deallocate textures for another 120 frames. + self.delay_texture_deallocation[format_idx] = self.current_frame + 120; + } + + // Deallocate up to 4 staging textures every frame. + let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] { + num_reusable_textures.min(4) + } else { + 0 + }; + + for _ in 0..to_remove { + let texture = self.textures[format_idx].pop_front().unwrap().0; + device.delete_texture(texture); + } + } + + // Similar logic for temporary CPU buffers. Our calls to get and return + // temporary buffers should have been balanced for this frame, but the call + // get_temporary_buffer will allocate a buffer if the vec is empty. Since we + // carry these buffers from frame to frame, we keep track of the smallest + // length of the temporary_buffers vec that we encountered this frame. Those + // buffers were not touched and we deallocate some if there are a lot of them. + let unused_buffers = self.min_temporary_buffers; + if unused_buffers < 8 { + self.delay_buffer_deallocation = self.current_frame + 120; + } + let to_remove = if self.current_frame > self.delay_buffer_deallocation { + unused_buffers.min(4) + } else { + 0 + }; + for _ in 0..to_remove { + // Unlike textures it doesn't matter whether we pop from the front or back + // of the vector. + self.temporary_buffers.pop(); + } + } + + pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) { + for buf in &self.temporary_buffers { + report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) }; + } + + for format in &self.textures { + for texture in format { + report.upload_staging_textures += texture.0.size_in_bytes(); + } + } + } +} + +struct UploadStats { + num_draw_calls: u32, + upload_time: u64, + cpu_buffer_alloc_time: u64, + texture_alloc_time: u64, + cpu_copy_time: u64, + gpu_copy_commands_time: u64, + bytes_uploaded: usize, + items_uploaded: usize, +} + +#[derive(Debug)] +enum StagingBufferKind<'a> { + Pbo(UploadStagingBuffer<'a>), + CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> }, + Image { bytes: Arc<Vec<u8>>, stride: Option<i32> }, +} +#[derive(Debug)] +struct BatchUploadBuffer<'a> { + staging_buffer: StagingBufferKind<'a>, + texture_index: usize, + // A rectangle containing all items going into this staging texture, so + // that we can avoid uploading the entire area if we are using glTexSubImage2d. + upload_rect: DeviceIntRect, +} + +// On some devices performing many small texture uploads is slow, so instead we batch +// updates in to a small number of uploads to temporary textures, then copy from those +// textures to the correct place in the texture cache. +// A list of temporary textures that batches of updates are uploaded to. +#[derive(Debug)] +struct BatchUploadCopy { + // Index within batch_upload_textures + src_texture_index: usize, + src_offset: DeviceIntPoint, + dest_texture_id: CacheTextureId, + dest_offset: DeviceIntPoint, + size: DeviceIntSize, +} diff --git a/gfx/wr/webrender/src/renderer/vertex.rs b/gfx/wr/webrender/src/renderer/vertex.rs new file mode 100644 index 0000000000..ff555363d8 --- /dev/null +++ b/gfx/wr/webrender/src/renderer/vertex.rs @@ -0,0 +1,1154 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! Rendering logic related to the vertex shaders and their states, uncluding +//! - Vertex Array Objects +//! - vertex layout descriptors +//! - textures bound at vertex stage + +use std::{marker::PhantomData, mem, num::NonZeroUsize, ops}; +use api::units::*; +use crate::{ + device::{ + Device, Texture, TextureFilter, TextureUploader, UploadPBOPool, VertexUsageHint, VAO, + }, + frame_builder::Frame, + gpu_types::{PrimitiveHeaderI, PrimitiveHeaderF, TransformData}, + internal_types::Swizzle, + render_task::RenderTaskData, +}; + +pub const VERTEX_TEXTURE_EXTRA_ROWS: i32 = 10; + +pub const MAX_VERTEX_TEXTURE_WIDTH: usize = webrender_build::MAX_VERTEX_TEXTURE_WIDTH; + +pub mod desc { + use crate::device::{VertexAttribute, VertexAttributeKind, VertexDescriptor}; + + pub const PRIM_INSTANCES: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[VertexAttribute { + name: "aData", + count: 4, + kind: VertexAttributeKind::I32, + }], + }; + + pub const BLUR: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aBlurRenderTaskAddress", + count: 1, + kind: VertexAttributeKind::U16, + }, + VertexAttribute { + name: "aBlurSourceTaskAddress", + count: 1, + kind: VertexAttributeKind::U16, + }, + VertexAttribute { + name: "aBlurDirection", + count: 1, + kind: VertexAttributeKind::I32, + }, + ], + }; + + pub const LINE: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aTaskRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aLocalSize", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aWavyLineThickness", + count: 1, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aStyle", + count: 1, + kind: VertexAttributeKind::I32, + }, + VertexAttribute { + name: "aAxisSelect", + count: 1, + kind: VertexAttributeKind::F32, + }, + ], + }; + + pub const FAST_LINEAR_GRADIENT: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aTaskRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aColor0", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aColor1", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aAxisSelect", + count: 1, + kind: VertexAttributeKind::F32, + }, + ], + }; + + pub const LINEAR_GRADIENT: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aTaskRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aStartPoint", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aEndPoint", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aScale", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aExtendMode", + count: 1, + kind: VertexAttributeKind::I32, + }, + VertexAttribute { + name: "aGradientStopsAddress", + count: 1, + kind: VertexAttributeKind::I32, + }, + ], + }; + + pub const RADIAL_GRADIENT: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aTaskRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aCenter", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aScale", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aStartRadius", + count: 1, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aEndRadius", + count: 1, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aXYRatio", + count: 1, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aExtendMode", + count: 1, + kind: VertexAttributeKind::I32, + }, + VertexAttribute { + name: "aGradientStopsAddress", + count: 1, + kind: VertexAttributeKind::I32, + }, + ], + }; + + pub const CONIC_GRADIENT: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aTaskRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aCenter", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aScale", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aStartOffset", + count: 1, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aEndOffset", + count: 1, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aAngle", + count: 1, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aExtendMode", + count: 1, + kind: VertexAttributeKind::I32, + }, + VertexAttribute { + name: "aGradientStopsAddress", + count: 1, + kind: VertexAttributeKind::I32, + }, + ], + }; + + pub const BORDER: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aTaskOrigin", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aColor0", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aColor1", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aFlags", + count: 1, + kind: VertexAttributeKind::I32, + }, + VertexAttribute { + name: "aWidths", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aRadii", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipParams1", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipParams2", + count: 4, + kind: VertexAttributeKind::F32, + }, + ], + }; + + pub const SCALE: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aScaleTargetRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aScaleSourceRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + ], + }; + + pub const CLIP_RECT: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + // common clip attributes + VertexAttribute { + name: "aClipDeviceArea", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipOrigins", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aDevicePixelScale", + count: 1, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aTransformIds", + count: 2, + kind: VertexAttributeKind::I32, + }, + // specific clip attributes + VertexAttribute { + name: "aClipLocalPos", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipLocalRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipMode", + count: 1, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipRect_TL", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipRadii_TL", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipRect_TR", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipRadii_TR", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipRect_BL", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipRadii_BL", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipRect_BR", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipRadii_BR", + count: 4, + kind: VertexAttributeKind::F32, + }, + ], + }; + + pub const CLIP_BOX_SHADOW: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + // common clip attributes + VertexAttribute { + name: "aClipDeviceArea", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipOrigins", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aDevicePixelScale", + count: 1, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aTransformIds", + count: 2, + kind: VertexAttributeKind::I32, + }, + // specific clip attributes + VertexAttribute { + name: "aClipDataResourceAddress", + count: 2, + kind: VertexAttributeKind::U16, + }, + VertexAttribute { + name: "aClipSrcRectSize", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipMode", + count: 1, + kind: VertexAttributeKind::I32, + }, + VertexAttribute { + name: "aStretchMode", + count: 2, + kind: VertexAttributeKind::I32, + }, + VertexAttribute { + name: "aClipDestRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + ], + }; + + pub const CLIP_IMAGE: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + // common clip attributes + VertexAttribute { + name: "aClipDeviceArea", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipOrigins", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aDevicePixelScale", + count: 1, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aTransformIds", + count: 2, + kind: VertexAttributeKind::I32, + }, + // specific clip attributes + VertexAttribute { + name: "aClipTileRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aClipDataResourceAddress", + count: 2, + kind: VertexAttributeKind::U16, + }, + VertexAttribute { + name: "aClipLocalRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + ], + }; + + pub const GPU_CACHE_UPDATE: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[ + VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U16Norm, + }, + VertexAttribute { + name: "aValue", + count: 4, + kind: VertexAttributeKind::F32, + }, + ], + instance_attributes: &[], + }; + + pub const RESOLVE: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[VertexAttribute { + name: "aRect", + count: 4, + kind: VertexAttributeKind::F32, + }], + }; + + pub const SVG_FILTER: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aFilterRenderTaskAddress", + count: 1, + kind: VertexAttributeKind::U16, + }, + VertexAttribute { + name: "aFilterInput1TaskAddress", + count: 1, + kind: VertexAttributeKind::U16, + }, + VertexAttribute { + name: "aFilterInput2TaskAddress", + count: 1, + kind: VertexAttributeKind::U16, + }, + VertexAttribute { + name: "aFilterKind", + count: 1, + kind: VertexAttributeKind::U16, + }, + VertexAttribute { + name: "aFilterInputCount", + count: 1, + kind: VertexAttributeKind::U16, + }, + VertexAttribute { + name: "aFilterGenericInt", + count: 1, + kind: VertexAttributeKind::U16, + }, + VertexAttribute { + name: "aFilterExtraDataAddress", + count: 2, + kind: VertexAttributeKind::U16, + }, + ], + }; + + pub const MASK: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aData", + count: 4, + kind: VertexAttributeKind::I32, + }, + VertexAttribute { + name: "aClipData", + count: 4, + kind: VertexAttributeKind::I32, + }, + ], + }; + + pub const VECTOR_STENCIL: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aFromPosition", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aCtrlPosition", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aToPosition", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aFromNormal", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aCtrlNormal", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aToNormal", + count: 2, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aPathID", + count: 1, + kind: VertexAttributeKind::U16, + }, + VertexAttribute { + name: "aPad", + count: 1, + kind: VertexAttributeKind::U16, + }, + ], + }; + + pub const VECTOR_COVER: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aTargetRect", + count: 4, + kind: VertexAttributeKind::I32, + }, + VertexAttribute { + name: "aStencilOrigin", + count: 2, + kind: VertexAttributeKind::I32, + }, + VertexAttribute { + name: "aSubpixel", + count: 1, + kind: VertexAttributeKind::U16, + }, + VertexAttribute { + name: "aPad", + count: 1, + kind: VertexAttributeKind::U16, + }, + ], + }; + + pub const COMPOSITE: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aLocalRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aDeviceClipRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aColor", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aParams", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aUvRect0", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aUvRect1", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aUvRect2", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aTransform", + count: 4, + kind: VertexAttributeKind::F32, + }, + ], + }; + + pub const CLEAR: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "aRect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "aColor", + count: 4, + kind: VertexAttributeKind::F32, + }, + ], + }; + + pub const COPY: VertexDescriptor = VertexDescriptor { + vertex_attributes: &[VertexAttribute { + name: "aPosition", + count: 2, + kind: VertexAttributeKind::U8Norm, + }], + instance_attributes: &[ + VertexAttribute { + name: "a_src_rect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "a_dst_rect", + count: 4, + kind: VertexAttributeKind::F32, + }, + VertexAttribute { + name: "a_dst_texture_size", + count: 2, + kind: VertexAttributeKind::F32, + }, + ], + }; +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum VertexArrayKind { + Primitive, + Blur, + ClipImage, + ClipRect, + ClipBoxShadow, + VectorStencil, + VectorCover, + Border, + Scale, + LineDecoration, + FastLinearGradient, + LinearGradient, + RadialGradient, + ConicGradient, + Resolve, + SvgFilter, + Composite, + Clear, + Copy, + Mask, +} + +pub struct VertexDataTexture<T> { + texture: Option<Texture>, + format: api::ImageFormat, + _marker: PhantomData<T>, +} + +impl<T> VertexDataTexture<T> { + pub fn new(format: api::ImageFormat) -> Self { + Self { + texture: None, + format, + _marker: PhantomData, + } + } + + /// Returns a borrow of the GPU texture. Panics if it hasn't been initialized. + pub fn texture(&self) -> &Texture { + self.texture.as_ref().unwrap() + } + + /// Returns an estimate of the GPU memory consumed by this VertexDataTexture. + pub fn size_in_bytes(&self) -> usize { + self.texture.as_ref().map_or(0, |t| t.size_in_bytes()) + } + + pub fn update<'a>( + &'a mut self, + device: &mut Device, + texture_uploader: &mut TextureUploader<'a>, + data: &mut Vec<T>, + ) { + debug_assert!(mem::size_of::<T>() % 16 == 0); + let texels_per_item = mem::size_of::<T>() / 16; + let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / texels_per_item; + debug_assert_ne!(items_per_row, 0); + + // Ensure we always end up with a texture when leaving this method. + let mut len = data.len(); + if len == 0 { + if self.texture.is_some() { + return; + } + data.reserve(items_per_row); + len = items_per_row; + } else { + // Extend the data array to have enough capacity to upload at least + // a multiple of the row size. This ensures memory safety when the + // array is passed to OpenGL to upload to the GPU. + let extra = len % items_per_row; + if extra != 0 { + let padding = items_per_row - extra; + data.reserve(padding); + len += padding; + } + } + + let needed_height = (len / items_per_row) as i32; + let existing_height = self + .texture + .as_ref() + .map_or(0, |t| t.get_dimensions().height); + + // Create a new texture if needed. + // + // These textures are generally very small, which is why we don't bother + // with incremental updates and just re-upload every frame. For most pages + // they're one row each, and on stress tests like css-francine they end up + // in the 6-14 range. So we size the texture tightly to what we need (usually + // 1), and shrink it if the waste would be more than `VERTEX_TEXTURE_EXTRA_ROWS` + // rows. This helps with memory overhead, especially because there are several + // instances of these textures per Renderer. + if needed_height > existing_height + || needed_height + VERTEX_TEXTURE_EXTRA_ROWS < existing_height + { + // Drop the existing texture, if any. + if let Some(t) = self.texture.take() { + device.delete_texture(t); + } + + let texture = device.create_texture( + api::ImageBufferKind::Texture2D, + self.format, + MAX_VERTEX_TEXTURE_WIDTH as i32, + // Ensure height is at least two to work around + // https://bugs.chromium.org/p/angleproject/issues/detail?id=3039 + needed_height.max(2), + TextureFilter::Nearest, + None, + ); + self.texture = Some(texture); + } + + // Note: the actual width can be larger than the logical one, with a few texels + // of each row unused at the tail. This is needed because there is still hardware + // (like Intel iGPUs) that prefers power-of-two sizes of textures ([1]). + // + // [1] https://software.intel.com/en-us/articles/opengl-performance-tips-power-of-two-textures-have-better-performance + let logical_width = if needed_height == 1 { + data.len() * texels_per_item + } else { + MAX_VERTEX_TEXTURE_WIDTH - (MAX_VERTEX_TEXTURE_WIDTH % texels_per_item) + }; + + let rect = DeviceIntRect::from_size( + DeviceIntSize::new(logical_width as i32, needed_height), + ); + + debug_assert!(len <= data.capacity(), "CPU copy will read out of bounds"); + texture_uploader.upload( + device, + self.texture(), + rect, + None, + None, + data.as_ptr(), + len, + ); + } + + pub fn deinit(mut self, device: &mut Device) { + if let Some(t) = self.texture.take() { + device.delete_texture(t); + } + } +} + +pub struct VertexDataTextures { + prim_header_f_texture: VertexDataTexture<PrimitiveHeaderF>, + prim_header_i_texture: VertexDataTexture<PrimitiveHeaderI>, + transforms_texture: VertexDataTexture<TransformData>, + render_task_texture: VertexDataTexture<RenderTaskData>, +} + +impl VertexDataTextures { + pub fn new() -> Self { + VertexDataTextures { + prim_header_f_texture: VertexDataTexture::new(api::ImageFormat::RGBAF32), + prim_header_i_texture: VertexDataTexture::new(api::ImageFormat::RGBAI32), + transforms_texture: VertexDataTexture::new(api::ImageFormat::RGBAF32), + render_task_texture: VertexDataTexture::new(api::ImageFormat::RGBAF32), + } + } + + pub fn update(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool, frame: &mut Frame) { + let mut texture_uploader = device.upload_texture(pbo_pool); + self.prim_header_f_texture.update( + device, + &mut texture_uploader, + &mut frame.prim_headers.headers_float, + ); + self.prim_header_i_texture.update( + device, + &mut texture_uploader, + &mut frame.prim_headers.headers_int, + ); + self.transforms_texture + .update(device, &mut texture_uploader, &mut frame.transform_palette); + self.render_task_texture.update( + device, + &mut texture_uploader, + &mut frame.render_tasks.task_data, + ); + + // Flush and drop the texture uploader now, so that + // we can borrow the textures to bind them. + texture_uploader.flush(device); + + device.bind_texture( + super::TextureSampler::PrimitiveHeadersF, + &self.prim_header_f_texture.texture(), + Swizzle::default(), + ); + device.bind_texture( + super::TextureSampler::PrimitiveHeadersI, + &self.prim_header_i_texture.texture(), + Swizzle::default(), + ); + device.bind_texture( + super::TextureSampler::TransformPalette, + &self.transforms_texture.texture(), + Swizzle::default(), + ); + device.bind_texture( + super::TextureSampler::RenderTasks, + &self.render_task_texture.texture(), + Swizzle::default(), + ); + } + + pub fn size_in_bytes(&self) -> usize { + self.prim_header_f_texture.size_in_bytes() + + self.prim_header_i_texture.size_in_bytes() + + self.transforms_texture.size_in_bytes() + + self.render_task_texture.size_in_bytes() + } + + pub fn deinit(self, device: &mut Device) { + self.transforms_texture.deinit(device); + self.prim_header_f_texture.deinit(device); + self.prim_header_i_texture.deinit(device); + self.render_task_texture.deinit(device); + } +} + +pub struct RendererVAOs { + prim_vao: VAO, + blur_vao: VAO, + clip_rect_vao: VAO, + clip_box_shadow_vao: VAO, + clip_image_vao: VAO, + border_vao: VAO, + line_vao: VAO, + scale_vao: VAO, + fast_linear_gradient_vao: VAO, + linear_gradient_vao: VAO, + radial_gradient_vao: VAO, + conic_gradient_vao: VAO, + resolve_vao: VAO, + svg_filter_vao: VAO, + composite_vao: VAO, + clear_vao: VAO, + copy_vao: VAO, + mask_vao: VAO, +} + +impl RendererVAOs { + pub fn new(device: &mut Device, indexed_quads: Option<NonZeroUsize>) -> Self { + const QUAD_INDICES: [u16; 6] = [0, 1, 2, 2, 1, 3]; + const QUAD_VERTICES: [[u8; 2]; 4] = [[0, 0], [0xFF, 0], [0, 0xFF], [0xFF, 0xFF]]; + + let instance_divisor = if indexed_quads.is_some() { 0 } else { 1 }; + let prim_vao = device.create_vao(&desc::PRIM_INSTANCES, instance_divisor); + + device.bind_vao(&prim_vao); + match indexed_quads { + Some(count) => { + assert!(count.get() < u16::MAX as usize); + let quad_indices = (0 .. count.get() as u16) + .flat_map(|instance| QUAD_INDICES.iter().map(move |&index| instance * 4 + index)) + .collect::<Vec<_>>(); + device.update_vao_indices(&prim_vao, &quad_indices, VertexUsageHint::Static); + let quad_vertices = (0 .. count.get() as u16) + .flat_map(|_| QUAD_VERTICES.iter().cloned()) + .collect::<Vec<_>>(); + device.update_vao_main_vertices(&prim_vao, &quad_vertices, VertexUsageHint::Static); + } + None => { + device.update_vao_indices(&prim_vao, &QUAD_INDICES, VertexUsageHint::Static); + device.update_vao_main_vertices(&prim_vao, &QUAD_VERTICES, VertexUsageHint::Static); + } + } + + RendererVAOs { + blur_vao: device.create_vao_with_new_instances(&desc::BLUR, &prim_vao), + clip_rect_vao: device.create_vao_with_new_instances(&desc::CLIP_RECT, &prim_vao), + clip_box_shadow_vao: device + .create_vao_with_new_instances(&desc::CLIP_BOX_SHADOW, &prim_vao), + clip_image_vao: device.create_vao_with_new_instances(&desc::CLIP_IMAGE, &prim_vao), + border_vao: device.create_vao_with_new_instances(&desc::BORDER, &prim_vao), + scale_vao: device.create_vao_with_new_instances(&desc::SCALE, &prim_vao), + line_vao: device.create_vao_with_new_instances(&desc::LINE, &prim_vao), + fast_linear_gradient_vao: device.create_vao_with_new_instances(&desc::FAST_LINEAR_GRADIENT, &prim_vao), + linear_gradient_vao: device.create_vao_with_new_instances(&desc::LINEAR_GRADIENT, &prim_vao), + radial_gradient_vao: device.create_vao_with_new_instances(&desc::RADIAL_GRADIENT, &prim_vao), + conic_gradient_vao: device.create_vao_with_new_instances(&desc::CONIC_GRADIENT, &prim_vao), + resolve_vao: device.create_vao_with_new_instances(&desc::RESOLVE, &prim_vao), + svg_filter_vao: device.create_vao_with_new_instances(&desc::SVG_FILTER, &prim_vao), + composite_vao: device.create_vao_with_new_instances(&desc::COMPOSITE, &prim_vao), + clear_vao: device.create_vao_with_new_instances(&desc::CLEAR, &prim_vao), + copy_vao: device.create_vao_with_new_instances(&desc::COPY, &prim_vao), + mask_vao: device.create_vao_with_new_instances(&desc::MASK, &prim_vao), + prim_vao, + } + } + + pub fn deinit(self, device: &mut Device) { + device.delete_vao(self.prim_vao); + device.delete_vao(self.resolve_vao); + device.delete_vao(self.clip_rect_vao); + device.delete_vao(self.clip_box_shadow_vao); + device.delete_vao(self.clip_image_vao); + device.delete_vao(self.fast_linear_gradient_vao); + device.delete_vao(self.linear_gradient_vao); + device.delete_vao(self.radial_gradient_vao); + device.delete_vao(self.conic_gradient_vao); + device.delete_vao(self.blur_vao); + device.delete_vao(self.line_vao); + device.delete_vao(self.border_vao); + device.delete_vao(self.scale_vao); + device.delete_vao(self.svg_filter_vao); + device.delete_vao(self.composite_vao); + device.delete_vao(self.clear_vao); + device.delete_vao(self.copy_vao); + device.delete_vao(self.mask_vao); + } +} + +impl ops::Index<VertexArrayKind> for RendererVAOs { + type Output = VAO; + fn index(&self, kind: VertexArrayKind) -> &VAO { + match kind { + VertexArrayKind::Primitive => &self.prim_vao, + VertexArrayKind::ClipImage => &self.clip_image_vao, + VertexArrayKind::ClipRect => &self.clip_rect_vao, + VertexArrayKind::ClipBoxShadow => &self.clip_box_shadow_vao, + VertexArrayKind::Blur => &self.blur_vao, + VertexArrayKind::VectorStencil | VertexArrayKind::VectorCover => unreachable!(), + VertexArrayKind::Border => &self.border_vao, + VertexArrayKind::Scale => &self.scale_vao, + VertexArrayKind::LineDecoration => &self.line_vao, + VertexArrayKind::FastLinearGradient => &self.fast_linear_gradient_vao, + VertexArrayKind::LinearGradient => &self.linear_gradient_vao, + VertexArrayKind::RadialGradient => &self.radial_gradient_vao, + VertexArrayKind::ConicGradient => &self.conic_gradient_vao, + VertexArrayKind::Resolve => &self.resolve_vao, + VertexArrayKind::SvgFilter => &self.svg_filter_vao, + VertexArrayKind::Composite => &self.composite_vao, + VertexArrayKind::Clear => &self.clear_vao, + VertexArrayKind::Copy => &self.copy_vao, + VertexArrayKind::Mask => &self.mask_vao, + } + } +} |