summaryrefslogtreecommitdiffstats
path: root/gfx/wr/webrender/src/renderer/upload.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /gfx/wr/webrender/src/renderer/upload.rs
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'gfx/wr/webrender/src/renderer/upload.rs')
-rw-r--r--gfx/wr/webrender/src/renderer/upload.rs847
1 files changed, 847 insertions, 0 deletions
diff --git a/gfx/wr/webrender/src/renderer/upload.rs b/gfx/wr/webrender/src/renderer/upload.rs
new file mode 100644
index 0000000000..0ba053cd76
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/upload.rs
@@ -0,0 +1,847 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! This module contains the convoluted logic that goes into uploading content into
+//! the texture cache's textures.
+//!
+//! We need to support various combinations of code paths depending on the quirks of
+//! each hardware/driver configuration:
+//! - direct upload,
+//! - staged upload via a pixel buffer object,
+//! - staged upload via a direct upload to a staging texture where PBO's aren't supported,
+//! - copy from the staging to destination textures, either via blits or batched draw calls.
+//!
+//! Conceptually a lot of this logic should probably be in the device module, but some code
+//! here relies on submitting draw calls via the renderer.
+
+
+use std::mem;
+use std::collections::VecDeque;
+use std::sync::Arc;
+use std::time::Duration;
+use euclid::{Transform3D, point2};
+use time::precise_time_ns;
+use malloc_size_of::MallocSizeOfOps;
+use api::units::*;
+use api::{ExternalImageSource, ImageBufferKind, ImageFormat};
+use crate::renderer::{
+ Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR
+};
+use crate::internal_types::{
+ FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate,
+ CacheTextureId, RenderTargetInfo,
+};
+use crate::device::{
+ Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader,
+ TextureFilter,
+};
+use crate::gpu_types::CopyInstance;
+use crate::batch::BatchTextures;
+use crate::texture_pack::{GuillotineAllocator, FreeRectSlice};
+use crate::profiler;
+use crate::render_api::MemoryReport;
+
+pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512);
+
+/// Upload a number of items to texture cache textures.
+///
+/// This is the main entry point of the texture cache upload code.
+/// See also the module documentation for more information.
+pub fn upload_to_texture_cache(
+ renderer: &mut Renderer,
+ update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,
+) {
+
+ let mut stats = UploadStats {
+ num_draw_calls: 0,
+ upload_time: 0,
+ cpu_buffer_alloc_time: 0,
+ texture_alloc_time: 0,
+ cpu_copy_time: 0,
+ gpu_copy_commands_time: 0,
+ bytes_uploaded: 0,
+ items_uploaded: 0,
+ };
+
+ let upload_total_start = precise_time_ns();
+
+ let mut batch_upload_textures = Vec::new();
+
+ // A list of copies that must be performed from the temporary textures to the texture cache.
+ let mut batch_upload_copies = Vec::new();
+
+ // For each texture format, this stores a list of staging buffers
+ // and a texture allocator for packing the buffers.
+ let mut batch_upload_buffers = FastHashMap::default();
+
+ // For best performance we use a single TextureUploader for all uploads.
+ // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs.
+ let mut uploader = renderer.device.upload_texture(
+ &mut renderer.texture_upload_pbo_pool,
+ );
+
+ let num_updates = update_list.len();
+
+ for (texture_id, updates) in update_list {
+ let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture;
+ for update in updates {
+ let TextureCacheUpdate { rect, stride, offset, format_override, source } = update;
+ let mut arc_data = None;
+ let dummy_data;
+ let data = match source {
+ TextureUpdateSource::Bytes { ref data } => {
+ arc_data = Some(data.clone());
+ &data[offset as usize ..]
+ }
+ TextureUpdateSource::External { id, channel_index } => {
+ let handler = renderer.external_image_handler
+ .as_mut()
+ .expect("Found external image, but no handler set!");
+ // The filter is only relevant for NativeTexture external images.
+ match handler.lock(id, channel_index).source {
+ ExternalImageSource::RawData(data) => {
+ &data[offset as usize ..]
+ }
+ ExternalImageSource::Invalid => {
+ // Create a local buffer to fill the pbo.
+ let bpp = texture.get_format().bytes_per_pixel();
+ let width = stride.unwrap_or(rect.width() * bpp);
+ let total_size = width * rect.height();
+ // WR haven't support RGBAF32 format in texture_cache, so
+ // we use u8 type here.
+ dummy_data = vec![0xFFu8; total_size as usize];
+ &dummy_data
+ }
+ ExternalImageSource::NativeTexture(eid) => {
+ panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
+ }
+ }
+ }
+ TextureUpdateSource::DebugClear => {
+ let draw_target = DrawTarget::from_texture(
+ texture,
+ false,
+ );
+ renderer.device.bind_draw_target(draw_target);
+ renderer.device.clear_target(
+ Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
+ None,
+ Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))
+ );
+
+ continue;
+ }
+ };
+
+ stats.items_uploaded += 1;
+
+ let use_batch_upload = renderer.device.use_batched_texture_uploads() &&
+ texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) &&
+ rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width &&
+ rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height &&
+ rect.area() < renderer.device.batched_upload_threshold();
+
+ if use_batch_upload
+ && arc_data.is_some()
+ && matches!(renderer.device.upload_method(), &UploadMethod::Immediate)
+ && rect.area() > BATCH_UPLOAD_TEXTURE_SIZE.area() / 2 {
+ skip_staging_buffer(
+ &mut renderer.device,
+ &mut renderer.staging_texture_pool,
+ rect,
+ stride,
+ arc_data.unwrap(),
+ texture_id,
+ texture,
+ &mut batch_upload_buffers,
+ &mut batch_upload_textures,
+ &mut batch_upload_copies,
+ &mut stats,
+ );
+ } else if use_batch_upload {
+ copy_into_staging_buffer(
+ &mut renderer.device,
+ &mut uploader,
+ &mut renderer.staging_texture_pool,
+ rect,
+ stride,
+ data,
+ texture_id,
+ texture,
+ &mut batch_upload_buffers,
+ &mut batch_upload_textures,
+ &mut batch_upload_copies,
+ &mut stats,
+ );
+ } else {
+ let upload_start_time = precise_time_ns();
+
+ stats.bytes_uploaded += uploader.upload(
+ &mut renderer.device,
+ texture,
+ rect,
+ stride,
+ format_override,
+ data.as_ptr(),
+ data.len()
+ );
+
+ stats.upload_time += precise_time_ns() - upload_start_time;
+ }
+
+ if let TextureUpdateSource::External { id, channel_index } = source {
+ let handler = renderer.external_image_handler
+ .as_mut()
+ .expect("Found external image, but no handler set!");
+ handler.unlock(id, channel_index);
+ }
+ }
+ }
+
+ let upload_start_time = precise_time_ns();
+ // Upload batched texture updates to their temporary textures.
+ for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() {
+ let texture = &batch_upload_textures[batch_buffer.texture_index];
+ match batch_buffer.staging_buffer {
+ StagingBufferKind::Pbo(pbo) => {
+ stats.bytes_uploaded += uploader.upload_staged(
+ &mut renderer.device,
+ texture,
+ DeviceIntRect::from_size(texture.get_dimensions()),
+ None,
+ pbo,
+ );
+ }
+ StagingBufferKind::CpuBuffer { bytes, .. } => {
+ let bpp = texture.get_format().bytes_per_pixel();
+ stats.bytes_uploaded += uploader.upload(
+ &mut renderer.device,
+ texture,
+ batch_buffer.upload_rect,
+ Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp),
+ None,
+ bytes.as_ptr(),
+ bytes.len()
+ );
+ renderer.staging_texture_pool.return_temporary_buffer(bytes);
+ }
+ StagingBufferKind::Image { bytes, stride } => {
+ stats.bytes_uploaded += uploader.upload(
+ &mut renderer.device,
+ texture,
+ batch_buffer.upload_rect,
+ stride,
+ None,
+ bytes.as_ptr(),
+ bytes.len()
+ );
+ }
+ }
+ }
+ stats.upload_time += precise_time_ns() - upload_start_time;
+
+
+ // Flush all uploads, batched or otherwise.
+ let flush_start_time = precise_time_ns();
+ uploader.flush(&mut renderer.device);
+ stats.upload_time += precise_time_ns() - flush_start_time;
+
+ if !batch_upload_copies.is_empty() {
+ // Copy updates that were batch uploaded to their correct destination in the texture cache.
+ // Sort them by destination and source to minimize framebuffer binding changes.
+ batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index));
+
+ let gpu_copy_start = precise_time_ns();
+
+ if renderer.device.use_draw_calls_for_texture_copy() {
+ // Some drivers have a very high CPU overhead when submitting hundreds of small blit
+ // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a
+ // few hundred blits). In this case we do the copy with batched draw calls.
+ copy_from_staging_to_cache_using_draw_calls(
+ renderer,
+ &mut stats,
+ &batch_upload_textures,
+ batch_upload_copies,
+ );
+ } else {
+ copy_from_staging_to_cache(
+ renderer,
+ &batch_upload_textures,
+ batch_upload_copies,
+ );
+ }
+
+ stats.gpu_copy_commands_time += precise_time_ns() - gpu_copy_start;
+ }
+
+ for texture in batch_upload_textures.drain(..) {
+ renderer.staging_texture_pool.return_texture(texture);
+ }
+
+ // Update the profile counters. We use add instead of set because
+ // this function can be called several times per frame.
+ // We don't update the counters when their value is zero, so that
+ // the profiler can treat them as events and we can get notified
+ // when they happen.
+
+ let upload_total = precise_time_ns() - upload_total_start;
+ renderer.profile.add(
+ profiler::TOTAL_UPLOAD_TIME,
+ profiler::ns_to_ms(upload_total)
+ );
+
+ if num_updates > 0 {
+ renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates);
+ }
+
+ if stats.bytes_uploaded > 0 {
+ renderer.profile.add(
+ profiler::TEXTURE_UPLOADS_MEM,
+ profiler::bytes_to_mb(stats.bytes_uploaded)
+ );
+ }
+
+ if stats.cpu_copy_time > 0 {
+ renderer.profile.add(
+ profiler::UPLOAD_CPU_COPY_TIME,
+ profiler::ns_to_ms(stats.cpu_copy_time)
+ );
+ }
+ if stats.upload_time > 0 {
+ renderer.profile.add(
+ profiler::UPLOAD_TIME,
+ profiler::ns_to_ms(stats.upload_time)
+ );
+ }
+ if stats.texture_alloc_time > 0 {
+ renderer.profile.add(
+ profiler::STAGING_TEXTURE_ALLOCATION_TIME,
+ profiler::ns_to_ms(stats.texture_alloc_time)
+ );
+ }
+ if stats.cpu_buffer_alloc_time > 0 {
+ renderer.profile.add(
+ profiler::CPU_TEXTURE_ALLOCATION_TIME,
+ profiler::ns_to_ms(stats.cpu_buffer_alloc_time)
+ );
+ }
+ if stats.num_draw_calls > 0{
+ renderer.profile.add(
+ profiler::UPLOAD_NUM_COPY_BATCHES,
+ stats.num_draw_calls
+ );
+ }
+
+ if stats.gpu_copy_commands_time > 0 {
+ renderer.profile.add(
+ profiler::UPLOAD_GPU_COPY_TIME,
+ profiler::ns_to_ms(stats.gpu_copy_commands_time)
+ );
+ }
+
+ let add_markers = profiler::thread_is_being_profiled();
+ if add_markers && stats.bytes_uploaded > 0 {
+ let details = format!("{} bytes uploaded, {} items", stats.bytes_uploaded, stats.items_uploaded);
+ profiler::add_text_marker(&"Texture uploads", &details, Duration::from_nanos(upload_total));
+ }
+}
+
+/// Copy an item into a batched upload staging buffer.
+fn copy_into_staging_buffer<'a>(
+ device: &mut Device,
+ uploader: &mut TextureUploader< 'a>,
+ staging_texture_pool: &mut UploadTexturePool,
+ update_rect: DeviceIntRect,
+ update_stride: Option<i32>,
+ data: &[u8],
+ dest_texture_id: CacheTextureId,
+ texture: &Texture,
+ batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
+ batch_upload_textures: &mut Vec<Texture>,
+ batch_upload_copies: &mut Vec<BatchUploadCopy>,
+ stats: &mut UploadStats
+) {
+ let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format())
+ .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
+
+ // Allocate a region within the staging buffer for this update. If there is
+ // no room in an existing buffer then allocate another texture and buffer.
+ let (slice, origin) = match allocator.allocate(&update_rect.size()) {
+ Some((slice, origin)) => (slice, origin),
+ None => {
+ let new_slice = FreeRectSlice(buffers.len() as u32);
+ allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size());
+
+ let texture_alloc_time_start = precise_time_ns();
+ let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
+ stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;
+
+ let texture_index = batch_upload_textures.len();
+ batch_upload_textures.push(staging_texture);
+
+ let cpu_buffer_alloc_start_time = precise_time_ns();
+ let staging_buffer = match device.upload_method() {
+ UploadMethod::Immediate => StagingBufferKind::CpuBuffer {
+ bytes: staging_texture_pool.get_temporary_buffer(),
+ },
+ UploadMethod::PixelBuffer(_) => {
+ let pbo = uploader.stage(
+ device,
+ texture.get_format(),
+ BATCH_UPLOAD_TEXTURE_SIZE,
+ ).unwrap();
+
+ StagingBufferKind::Pbo(pbo)
+ }
+ };
+ stats.cpu_buffer_alloc_time += precise_time_ns() - cpu_buffer_alloc_start_time;
+
+ buffers.push(BatchUploadBuffer {
+ staging_buffer,
+ texture_index,
+ upload_rect: DeviceIntRect::zero()
+ });
+
+ (new_slice, DeviceIntPoint::zero())
+ }
+ };
+ let buffer = &mut buffers[slice.0 as usize];
+ let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size());
+ buffer.upload_rect = buffer.upload_rect.union(&allocated_rect);
+
+ batch_upload_copies.push(BatchUploadCopy {
+ src_texture_index: buffer.texture_index,
+ src_offset: allocated_rect.min,
+ dest_texture_id,
+ dest_offset: update_rect.min,
+ size: update_rect.size(),
+ });
+
+ unsafe {
+ let memcpy_start_time = precise_time_ns();
+ let bpp = texture.get_format().bytes_per_pixel() as usize;
+ let width_bytes = update_rect.width() as usize * bpp;
+ let src_stride = update_stride.map_or(width_bytes, |stride| {
+ assert!(stride >= 0);
+ stride as usize
+ });
+ let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes;
+ assert!(src_size <= data.len());
+
+ let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size);
+ let (dst_stride, dst) = match &mut buffer.staging_buffer {
+ StagingBufferKind::Pbo(buffer) => (
+ buffer.get_stride(),
+ buffer.get_mapping(),
+ ),
+ StagingBufferKind::CpuBuffer { bytes } => (
+ BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp,
+ &mut bytes[..],
+ ),
+ StagingBufferKind::Image { .. } => unreachable!(),
+ };
+
+ // copy the data line-by-line in to the buffer so that we do not overwrite
+ // any other region of the buffer.
+ for y in 0..allocated_rect.height() as usize {
+ let src_start = y * src_stride;
+ let src_end = src_start + width_bytes;
+ let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride +
+ allocated_rect.min.x as usize * bpp;
+ let dst_end = dst_start + width_bytes;
+
+ dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
+ }
+
+ stats.cpu_copy_time += precise_time_ns() - memcpy_start_time;
+ }
+}
+
+/// Take this code path instead of copying into a staging CPU buffer when the image
+/// we would copy is large enough that it's unlikely anything else would fit in the
+/// buffer, therefore we might as well copy directly from the source image's pixels.
+fn skip_staging_buffer<'a>(
+ device: &mut Device,
+ staging_texture_pool: &mut UploadTexturePool,
+ update_rect: DeviceIntRect,
+ stride: Option<i32>,
+ data: Arc<Vec<u8>>,
+ dest_texture_id: CacheTextureId,
+ texture: &Texture,
+ batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
+ batch_upload_textures: &mut Vec<Texture>,
+ batch_upload_copies: &mut Vec<BatchUploadCopy>,
+ stats: &mut UploadStats
+) {
+ let (_, buffers) = batch_upload_buffers.entry(texture.get_format())
+ .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
+
+ let texture_alloc_time_start = precise_time_ns();
+ let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
+ stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;
+
+ let texture_index = batch_upload_textures.len();
+ batch_upload_textures.push(staging_texture);
+
+ buffers.push(BatchUploadBuffer {
+ staging_buffer: StagingBufferKind::Image { bytes: data, stride },
+ texture_index,
+ upload_rect: DeviceIntRect::from_size(update_rect.size())
+ });
+
+ batch_upload_copies.push(BatchUploadCopy {
+ src_texture_index: texture_index,
+ src_offset: point2(0, 0),
+ dest_texture_id,
+ dest_offset: update_rect.min,
+ size: update_rect.size(),
+ });
+}
+
+
+/// Copy from the staging PBOs or textures to texture cache textures using blit commands.
+///
+/// Using blits instead of draw calls is supposedly more efficient but some drivers have
+/// a very high per-command overhead so in some configurations we end up using
+/// copy_from_staging_to_cache_using_draw_calls instead.
+fn copy_from_staging_to_cache(
+ renderer: &mut Renderer,
+ batch_upload_textures: &[Texture],
+ batch_upload_copies: Vec<BatchUploadCopy>,
+) {
+ for copy in batch_upload_copies {
+ let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
+
+ renderer.device.copy_texture_sub_region(
+ &batch_upload_textures[copy.src_texture_index],
+ copy.src_offset.x as _,
+ copy.src_offset.y as _,
+ dest_texture,
+ copy.dest_offset.x as _,
+ copy.dest_offset.y as _,
+ copy.size.width as _,
+ copy.size.height as _,
+ );
+ }
+}
+
+/// Generate and submit composite shader batches to copy from
+/// the staging textures to the destination cache textures.
+///
+/// If this shows up in GPU time ptofiles we could replace it with
+/// a simpler shader (composite.glsl is already quite simple).
+fn copy_from_staging_to_cache_using_draw_calls(
+ renderer: &mut Renderer,
+ stats: &mut UploadStats,
+ batch_upload_textures: &[Texture],
+ batch_upload_copies: Vec<BatchUploadCopy>,
+) {
+ let mut copy_instances = Vec::new();
+ let mut prev_src = None;
+ let mut prev_dst = None;
+ let mut dst_texture_size = DeviceSize::new(0.0, 0.0);
+
+ for copy in batch_upload_copies {
+
+ let src_changed = prev_src != Some(copy.src_texture_index);
+ let dst_changed = prev_dst != Some(copy.dest_texture_id);
+
+ if (src_changed || dst_changed) && !copy_instances.is_empty() {
+ renderer.draw_instanced_batch(
+ &copy_instances,
+ VertexArrayKind::Copy,
+ // We bind the staging texture manually because it isn't known
+ // to the texture resolver.
+ &BatchTextures::empty(),
+ &mut RendererStats::default(),
+ );
+
+ stats.num_draw_calls += 1;
+ copy_instances.clear();
+ }
+
+ if dst_changed {
+ let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
+ dst_texture_size = dest_texture.get_dimensions().to_f32();
+
+ let draw_target = DrawTarget::from_texture(dest_texture, false);
+ renderer.device.bind_draw_target(draw_target);
+
+ renderer.shaders
+ .borrow_mut()
+ .ps_copy
+ .bind(
+ &mut renderer.device,
+ &Transform3D::identity(),
+ None,
+ &mut renderer.renderer_errors,
+ &mut renderer.profile,
+ );
+
+ prev_dst = Some(copy.dest_texture_id);
+ }
+
+ if src_changed {
+ renderer.device.bind_texture(
+ TextureSampler::Color0,
+ &batch_upload_textures[copy.src_texture_index],
+ Swizzle::default(),
+ );
+
+ prev_src = Some(copy.src_texture_index)
+ }
+
+ let src_rect = DeviceRect::from_origin_and_size(
+ copy.src_offset.to_f32(),
+ copy.size.to_f32(),
+ );
+
+ let dst_rect = DeviceRect::from_origin_and_size(
+ copy.dest_offset.to_f32(),
+ copy.size.to_f32(),
+ );
+
+ copy_instances.push(CopyInstance {
+ src_rect,
+ dst_rect,
+ dst_texture_size,
+ });
+ }
+
+ if !copy_instances.is_empty() {
+ renderer.draw_instanced_batch(
+ &copy_instances,
+ VertexArrayKind::Copy,
+ &BatchTextures::empty(),
+ &mut RendererStats::default(),
+ );
+
+ stats.num_draw_calls += 1;
+ }
+}
+
+/// A very basic pool to avoid reallocating staging textures as well as staging
+/// CPU side buffers.
+pub struct UploadTexturePool {
+ /// The textures in the pool associated with a last used frame index.
+ ///
+ /// The outer array corresponds to each of teh three supported texture formats.
+ textures: [VecDeque<(Texture, u64)>; 3],
+ // Frame at which to deallocate some textures if there are too many in the pool,
+ // for each format.
+ delay_texture_deallocation: [u64; 3],
+ current_frame: u64,
+
+ /// Temporary buffers that are used when using staging uploads + glTexImage2D.
+ ///
+ /// Temporary buffers aren't used asynchronously so they can be reused every frame.
+ /// To keep things simple we always allocate enough memory for formats with four bytes
+ /// per pixel (more than we need for alpha-only textures but it works just as well).
+ temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>,
+ min_temporary_buffers: usize,
+ delay_buffer_deallocation: u64,
+}
+
+impl UploadTexturePool {
+ pub fn new() -> Self {
+ UploadTexturePool {
+ textures: [VecDeque::new(), VecDeque::new(), VecDeque::new()],
+ delay_texture_deallocation: [0; 3],
+ current_frame: 0,
+ temporary_buffers: Vec::new(),
+ min_temporary_buffers: 0,
+ delay_buffer_deallocation: 0,
+ }
+ }
+
+ fn format_index(&self, format: ImageFormat) -> usize {
+ match format {
+ ImageFormat::RGBA8 => 0,
+ ImageFormat::BGRA8 => 1,
+ ImageFormat::R8 => 2,
+ _ => { panic!("unexpected format"); }
+ }
+ }
+
+ pub fn begin_frame(&mut self) {
+ self.current_frame += 1;
+ self.min_temporary_buffers = self.temporary_buffers.len();
+ }
+
+ /// Create or reuse a staging texture.
+ ///
+ /// See also return_texture.
+ pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture {
+
+ // First try to reuse a texture from the pool.
+ // "available" here means hasn't been used for 2 frames to avoid stalls.
+ // No need to scan the vector. Newer textures are always pushed at the back
+ // of the vector so we know the first element is the least recently used.
+ let format_idx = self.format_index(format);
+ let can_reuse = self.textures[format_idx].get(0)
+ .map(|tex| self.current_frame - tex.1 > 2)
+ .unwrap_or(false);
+
+ if can_reuse {
+ return self.textures[format_idx].pop_front().unwrap().0;
+ }
+
+ // If we couldn't find an available texture, create a new one.
+
+ device.create_texture(
+ ImageBufferKind::Texture2D,
+ format,
+ BATCH_UPLOAD_TEXTURE_SIZE.width,
+ BATCH_UPLOAD_TEXTURE_SIZE.height,
+ TextureFilter::Nearest,
+ // Currently we need render target support as we always use glBlitFramebuffer
+ // to copy the texture data. Instead, we should use glCopyImageSubData on some
+ // platforms, and avoid creating the FBOs in that case.
+ Some(RenderTargetInfo { has_depth: false }),
+ )
+ }
+
+ /// Hand the staging texture back to the pool after being done with uploads.
+ ///
+ /// The texture must have been obtained from this pool via get_texture.
+ pub fn return_texture(&mut self, texture: Texture) {
+ let format_idx = self.format_index(texture.get_format());
+ self.textures[format_idx].push_back((texture, self.current_frame));
+ }
+
+ /// Create or reuse a temporary CPU buffer.
+ ///
+ /// These buffers are used in the batched upload path when PBOs are not supported.
+ /// Content is first written to the temporary buffer and uploaded via a single
+ /// glTexSubImage2D call.
+ pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> {
+ let buffer = self.temporary_buffers.pop().unwrap_or_else(|| {
+ vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4]
+ });
+ self.min_temporary_buffers = self.min_temporary_buffers.min(self.temporary_buffers.len());
+ buffer
+ }
+
+ /// Return memory that was obtained from this pool via get_temporary_buffer.
+ pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) {
+ assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4);
+ self.temporary_buffers.push(buffer);
+ }
+
+ /// Deallocate this pool's CPU and GPU memory.
+ pub fn delete_textures(&mut self, device: &mut Device) {
+ for format in &mut self.textures {
+ while let Some(texture) = format.pop_back() {
+ device.delete_texture(texture.0)
+ }
+ }
+ self.temporary_buffers.clear();
+ }
+
+ /// Deallocate some textures if there are too many for a long time.
+ pub fn end_frame(&mut self, device: &mut Device) {
+ for format_idx in 0..self.textures.len() {
+ // Count the number of reusable staging textures.
+ // if it stays high for a large number of frames, truncate it back to 8-ish
+ // over multiple frames.
+
+ let mut num_reusable_textures = 0;
+ for texture in &self.textures[format_idx] {
+ if self.current_frame - texture.1 > 2 {
+ num_reusable_textures += 1;
+ }
+ }
+
+ if num_reusable_textures < 8 {
+ // Don't deallocate textures for another 120 frames.
+ self.delay_texture_deallocation[format_idx] = self.current_frame + 120;
+ }
+
+ // Deallocate up to 4 staging textures every frame.
+ let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] {
+ num_reusable_textures.min(4)
+ } else {
+ 0
+ };
+
+ for _ in 0..to_remove {
+ let texture = self.textures[format_idx].pop_front().unwrap().0;
+ device.delete_texture(texture);
+ }
+ }
+
+ // Similar logic for temporary CPU buffers. Our calls to get and return
+ // temporary buffers should have been balanced for this frame, but the call
+ // get_temporary_buffer will allocate a buffer if the vec is empty. Since we
+ // carry these buffers from frame to frame, we keep track of the smallest
+ // length of the temporary_buffers vec that we encountered this frame. Those
+ // buffers were not touched and we deallocate some if there are a lot of them.
+ let unused_buffers = self.min_temporary_buffers;
+ if unused_buffers < 8 {
+ self.delay_buffer_deallocation = self.current_frame + 120;
+ }
+ let to_remove = if self.current_frame > self.delay_buffer_deallocation {
+ unused_buffers.min(4)
+ } else {
+ 0
+ };
+ for _ in 0..to_remove {
+ // Unlike textures it doesn't matter whether we pop from the front or back
+ // of the vector.
+ self.temporary_buffers.pop();
+ }
+ }
+
+ pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
+ for buf in &self.temporary_buffers {
+ report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) };
+ }
+
+ for format in &self.textures {
+ for texture in format {
+ report.upload_staging_textures += texture.0.size_in_bytes();
+ }
+ }
+ }
+}
+
+struct UploadStats {
+ num_draw_calls: u32,
+ upload_time: u64,
+ cpu_buffer_alloc_time: u64,
+ texture_alloc_time: u64,
+ cpu_copy_time: u64,
+ gpu_copy_commands_time: u64,
+ bytes_uploaded: usize,
+ items_uploaded: usize,
+}
+
+#[derive(Debug)]
+enum StagingBufferKind<'a> {
+ Pbo(UploadStagingBuffer<'a>),
+ CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> },
+ Image { bytes: Arc<Vec<u8>>, stride: Option<i32> },
+}
+#[derive(Debug)]
+struct BatchUploadBuffer<'a> {
+ staging_buffer: StagingBufferKind<'a>,
+ texture_index: usize,
+ // A rectangle containing all items going into this staging texture, so
+ // that we can avoid uploading the entire area if we are using glTexSubImage2d.
+ upload_rect: DeviceIntRect,
+}
+
+// On some devices performing many small texture uploads is slow, so instead we batch
+// updates in to a small number of uploads to temporary textures, then copy from those
+// textures to the correct place in the texture cache.
+// A list of temporary textures that batches of updates are uploaded to.
+#[derive(Debug)]
+struct BatchUploadCopy {
+ // Index within batch_upload_textures
+ src_texture_index: usize,
+ src_offset: DeviceIntPoint,
+ dest_texture_id: CacheTextureId,
+ dest_offset: DeviceIntPoint,
+ size: DeviceIntSize,
+}