summaryrefslogtreecommitdiffstats
path: root/third_party/rust/wgpu-hal/src
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/wgpu-hal/src')
-rw-r--r--third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs1
-rw-r--r--third_party/rust/wgpu-hal/src/dx12/adapter.rs99
-rw-r--r--third_party/rust/wgpu-hal/src/dx12/conv.rs2
-rw-r--r--third_party/rust/wgpu-hal/src/dx12/device.rs33
-rw-r--r--third_party/rust/wgpu-hal/src/dx12/mod.rs3
-rw-r--r--third_party/rust/wgpu-hal/src/dx12/types.rs22
-rw-r--r--third_party/rust/wgpu-hal/src/gles/adapter.rs33
-rw-r--r--third_party/rust/wgpu-hal/src/gles/conv.rs1
-rw-r--r--third_party/rust/wgpu-hal/src/gles/device.rs37
-rw-r--r--third_party/rust/wgpu-hal/src/gles/egl.rs74
-rw-r--r--third_party/rust/wgpu-hal/src/gles/mod.rs1
-rw-r--r--third_party/rust/wgpu-hal/src/gles/queue.rs71
-rw-r--r--third_party/rust/wgpu-hal/src/gles/wgl.rs2
-rw-r--r--third_party/rust/wgpu-hal/src/lib.rs346
-rw-r--r--third_party/rust/wgpu-hal/src/metal/adapter.rs22
-rw-r--r--third_party/rust/wgpu-hal/src/metal/conv.rs1
-rw-r--r--third_party/rust/wgpu-hal/src/metal/device.rs24
-rw-r--r--third_party/rust/wgpu-hal/src/metal/mod.rs12
-rw-r--r--third_party/rust/wgpu-hal/src/vulkan/adapter.rs123
-rw-r--r--third_party/rust/wgpu-hal/src/vulkan/command.rs5
-rw-r--r--third_party/rust/wgpu-hal/src/vulkan/conv.rs1
-rw-r--r--third_party/rust/wgpu-hal/src/vulkan/device.rs30
-rw-r--r--third_party/rust/wgpu-hal/src/vulkan/mod.rs109
23 files changed, 889 insertions, 163 deletions
diff --git a/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs b/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs
index 6af4b77bb3..e5162362f7 100644
--- a/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs
+++ b/third_party/rust/wgpu-hal/src/auxil/dxgi/conv.rs
@@ -261,6 +261,7 @@ pub fn map_vertex_format(format: wgt::VertexFormat) -> dxgiformat::DXGI_FORMAT {
Vf::Uint32x4 => DXGI_FORMAT_R32G32B32A32_UINT,
Vf::Sint32x4 => DXGI_FORMAT_R32G32B32A32_SINT,
Vf::Float32x4 => DXGI_FORMAT_R32G32B32A32_FLOAT,
+ Vf::Unorm10_10_10_2 => DXGI_FORMAT_R10G10B10A2_UNORM,
Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(),
}
}
diff --git a/third_party/rust/wgpu-hal/src/dx12/adapter.rs b/third_party/rust/wgpu-hal/src/dx12/adapter.rs
index b417a88a6f..faf25cc852 100644
--- a/third_party/rust/wgpu-hal/src/dx12/adapter.rs
+++ b/third_party/rust/wgpu-hal/src/dx12/adapter.rs
@@ -115,18 +115,6 @@ impl super::Adapter {
)
});
- let mut shader_model_support: d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL =
- d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL {
- HighestShaderModel: d3d12_ty::D3D_SHADER_MODEL_6_0,
- };
- assert_eq!(0, unsafe {
- device.CheckFeatureSupport(
- d3d12_ty::D3D12_FEATURE_SHADER_MODEL,
- &mut shader_model_support as *mut _ as *mut _,
- mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_SHADER_MODEL>() as _,
- )
- });
-
let mut workarounds = super::Workarounds::default();
let info = wgt::AdapterInfo {
@@ -181,6 +169,53 @@ impl super::Adapter {
hr == 0 && features3.CastingFullyTypedFormatSupported != 0
};
+ let shader_model = if dxc_container.is_none() {
+ naga::back::hlsl::ShaderModel::V5_1
+ } else {
+ let mut versions = [
+ crate::dx12::types::D3D_SHADER_MODEL_6_7,
+ crate::dx12::types::D3D_SHADER_MODEL_6_6,
+ crate::dx12::types::D3D_SHADER_MODEL_6_5,
+ crate::dx12::types::D3D_SHADER_MODEL_6_4,
+ crate::dx12::types::D3D_SHADER_MODEL_6_3,
+ crate::dx12::types::D3D_SHADER_MODEL_6_2,
+ crate::dx12::types::D3D_SHADER_MODEL_6_1,
+ crate::dx12::types::D3D_SHADER_MODEL_6_0,
+ crate::dx12::types::D3D_SHADER_MODEL_5_1,
+ ]
+ .iter();
+ match loop {
+ if let Some(&sm) = versions.next() {
+ let mut sm = crate::dx12::types::D3D12_FEATURE_DATA_SHADER_MODEL {
+ HighestShaderModel: sm,
+ };
+ if 0 == unsafe {
+ device.CheckFeatureSupport(
+ 7, // D3D12_FEATURE_SHADER_MODEL
+ &mut sm as *mut _ as *mut _,
+ mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_SHADER_MODEL>()
+ as _,
+ )
+ } {
+ break sm.HighestShaderModel;
+ }
+ } else {
+ break crate::dx12::types::D3D_SHADER_MODEL_5_1;
+ }
+ } {
+ crate::dx12::types::D3D_SHADER_MODEL_5_1 => naga::back::hlsl::ShaderModel::V5_1,
+ crate::dx12::types::D3D_SHADER_MODEL_6_0 => naga::back::hlsl::ShaderModel::V6_0,
+ crate::dx12::types::D3D_SHADER_MODEL_6_1 => naga::back::hlsl::ShaderModel::V6_1,
+ crate::dx12::types::D3D_SHADER_MODEL_6_2 => naga::back::hlsl::ShaderModel::V6_2,
+ crate::dx12::types::D3D_SHADER_MODEL_6_3 => naga::back::hlsl::ShaderModel::V6_3,
+ crate::dx12::types::D3D_SHADER_MODEL_6_4 => naga::back::hlsl::ShaderModel::V6_4,
+ crate::dx12::types::D3D_SHADER_MODEL_6_5 => naga::back::hlsl::ShaderModel::V6_5,
+ crate::dx12::types::D3D_SHADER_MODEL_6_6 => naga::back::hlsl::ShaderModel::V6_6,
+ crate::dx12::types::D3D_SHADER_MODEL_6_7 => naga::back::hlsl::ShaderModel::V6_7,
+ _ => unreachable!(),
+ }
+ };
+
let private_caps = super::PrivateCapabilities {
instance_flags,
heterogeneous_resource_heaps: options.ResourceHeapTier
@@ -196,6 +231,7 @@ impl super::Adapter {
casting_fully_typed_format_supported,
// See https://github.com/gfx-rs/wgpu/issues/3552
suballocation_supported: !info.name.contains("Iris(R) Xe"),
+ shader_model,
};
// Theoretically vram limited, but in practice 2^20 is the limit
@@ -273,7 +309,7 @@ impl super::Adapter {
wgt::Features::TEXTURE_BINDING_ARRAY
| wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING
| wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
- shader_model_support.HighestShaderModel >= d3d12_ty::D3D_SHADER_MODEL_5_1,
+ shader_model >= naga::back::hlsl::ShaderModel::V5_1,
);
let bgra8unorm_storage_supported = {
@@ -295,21 +331,28 @@ impl super::Adapter {
bgra8unorm_storage_supported,
);
- // we must be using DXC because uint64_t was added with Shader Model 6
- // and FXC only supports up to 5.1
- let int64_shader_ops_supported = dxc_container.is_some() && {
- let mut features1: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1 =
- unsafe { mem::zeroed() };
- let hr = unsafe {
- device.CheckFeatureSupport(
- d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS1,
- &mut features1 as *mut _ as *mut _,
- mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1>() as _,
- )
- };
- hr == 0 && features1.Int64ShaderOps != 0
+ let mut features1: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1 = unsafe { mem::zeroed() };
+ let hr = unsafe {
+ device.CheckFeatureSupport(
+ d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS1,
+ &mut features1 as *mut _ as *mut _,
+ mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1>() as _,
+ )
};
- features.set(wgt::Features::SHADER_INT64, int64_shader_ops_supported);
+
+ features.set(
+ wgt::Features::SHADER_INT64,
+ shader_model >= naga::back::hlsl::ShaderModel::V6_0
+ && hr == 0
+ && features1.Int64ShaderOps != 0,
+ );
+
+ features.set(
+ wgt::Features::SUBGROUP,
+ shader_model >= naga::back::hlsl::ShaderModel::V6_0
+ && hr == 0
+ && features1.WaveOps != 0,
+ );
// float32-filterable should always be available on d3d12
features.set(wgt::Features::FLOAT32_FILTERABLE, true);
@@ -377,6 +420,8 @@ impl super::Adapter {
.min(crate::MAX_VERTEX_BUFFERS as u32),
max_vertex_attributes: d3d12_ty::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT,
max_vertex_buffer_array_stride: d3d12_ty::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES,
+ min_subgroup_size: 4, // Not using `features1.WaveLaneCountMin` as it is unreliable
+ max_subgroup_size: 128,
// The push constants are part of the root signature which
// has a limit of 64 DWORDS (256 bytes), but other resources
// also share the root signature:
diff --git a/third_party/rust/wgpu-hal/src/dx12/conv.rs b/third_party/rust/wgpu-hal/src/dx12/conv.rs
index 2b6c1d959e..b09ea76080 100644
--- a/third_party/rust/wgpu-hal/src/dx12/conv.rs
+++ b/third_party/rust/wgpu-hal/src/dx12/conv.rs
@@ -224,7 +224,7 @@ pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12_ty::D3D12_FILL_MODE {
}
/// D3D12 doesn't support passing factors ending in `_COLOR` for alpha blending
-/// (see https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_render_target_blend_desc).
+/// (see <https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_render_target_blend_desc>).
/// Therefore this function takes an additional `is_alpha` argument
/// which if set will return an equivalent `_ALPHA` factor.
fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12_ty::D3D12_BLEND {
diff --git a/third_party/rust/wgpu-hal/src/dx12/device.rs b/third_party/rust/wgpu-hal/src/dx12/device.rs
index 23bd409dc4..82075294ee 100644
--- a/third_party/rust/wgpu-hal/src/dx12/device.rs
+++ b/third_party/rust/wgpu-hal/src/dx12/device.rs
@@ -218,21 +218,39 @@ impl super::Device {
use naga::back::hlsl;
let stage_bit = crate::auxil::map_naga_stage(naga_stage);
- let module = &stage.module.naga.module;
+
+ let (module, info) = naga::back::pipeline_constants::process_overrides(
+ &stage.module.naga.module,
+ &stage.module.naga.info,
+ stage.constants,
+ )
+ .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))?;
+
+ let needs_temp_options = stage.zero_initialize_workgroup_memory
+ != layout.naga_options.zero_initialize_workgroup_memory;
+ let mut temp_options;
+ let naga_options = if needs_temp_options {
+ temp_options = layout.naga_options.clone();
+ temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory;
+ &temp_options
+ } else {
+ &layout.naga_options
+ };
+
//TODO: reuse the writer
let mut source = String::new();
- let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options);
+ let mut writer = hlsl::Writer::new(&mut source, naga_options);
let reflection_info = {
profiling::scope!("naga::back::hlsl::write");
writer
- .write(module, &stage.module.naga.info)
+ .write(&module, &info)
.map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))?
};
let full_stage = format!(
"{}_{}\0",
naga_stage.to_hlsl_str(),
- layout.naga_options.shader_model.to_str()
+ naga_options.shader_model.to_str()
);
let ep_index = module
@@ -1062,12 +1080,7 @@ impl crate::Device for super::Device {
},
bind_group_infos,
naga_options: hlsl::Options {
- shader_model: match self.dxc_container {
- // DXC
- Some(_) => hlsl::ShaderModel::V6_0,
- // FXC doesn't support SM 6.0
- None => hlsl::ShaderModel::V5_1,
- },
+ shader_model: self.private_caps.shader_model,
binding_map,
fake_missing_bindings: false,
special_constants_binding,
diff --git a/third_party/rust/wgpu-hal/src/dx12/mod.rs b/third_party/rust/wgpu-hal/src/dx12/mod.rs
index 4f958943ca..9f021bc241 100644
--- a/third_party/rust/wgpu-hal/src/dx12/mod.rs
+++ b/third_party/rust/wgpu-hal/src/dx12/mod.rs
@@ -195,6 +195,7 @@ struct PrivateCapabilities {
heap_create_not_zeroed: bool,
casting_fully_typed_format_supported: bool,
suballocation_supported: bool,
+ shader_model: naga::back::hlsl::ShaderModel,
}
#[derive(Default)]
@@ -439,7 +440,7 @@ impl Texture {
}
}
- /// see https://learn.microsoft.com/en-us/windows/win32/direct3d12/subresources#plane-slice
+ /// see <https://learn.microsoft.com/en-us/windows/win32/direct3d12/subresources#plane-slice>
fn calc_subresource(&self, mip_level: u32, array_layer: u32, plane: u32) -> u32 {
mip_level + (array_layer + plane * self.array_layer_count()) * self.mip_level_count
}
diff --git a/third_party/rust/wgpu-hal/src/dx12/types.rs b/third_party/rust/wgpu-hal/src/dx12/types.rs
index b4ad38324a..17b608b840 100644
--- a/third_party/rust/wgpu-hal/src/dx12/types.rs
+++ b/third_party/rust/wgpu-hal/src/dx12/types.rs
@@ -41,3 +41,25 @@ winapi::STRUCT! {
BarycentricsSupported: winapi::shared::minwindef::BOOL,
}
}
+
+winapi::ENUM! {
+ enum D3D_SHADER_MODEL {
+ D3D_SHADER_MODEL_NONE = 0,
+ D3D_SHADER_MODEL_5_1 = 0x51,
+ D3D_SHADER_MODEL_6_0 = 0x60,
+ D3D_SHADER_MODEL_6_1 = 0x61,
+ D3D_SHADER_MODEL_6_2 = 0x62,
+ D3D_SHADER_MODEL_6_3 = 0x63,
+ D3D_SHADER_MODEL_6_4 = 0x64,
+ D3D_SHADER_MODEL_6_5 = 0x65,
+ D3D_SHADER_MODEL_6_6 = 0x66,
+ D3D_SHADER_MODEL_6_7 = 0x67,
+ D3D_HIGHEST_SHADER_MODEL = 0x67,
+ }
+}
+
+winapi::STRUCT! {
+ struct D3D12_FEATURE_DATA_SHADER_MODEL {
+ HighestShaderModel: D3D_SHADER_MODEL,
+ }
+}
diff --git a/third_party/rust/wgpu-hal/src/gles/adapter.rs b/third_party/rust/wgpu-hal/src/gles/adapter.rs
index b9d044337c..052c77006b 100644
--- a/third_party/rust/wgpu-hal/src/gles/adapter.rs
+++ b/third_party/rust/wgpu-hal/src/gles/adapter.rs
@@ -104,7 +104,7 @@ impl super::Adapter {
}
}
- fn make_info(vendor_orig: String, renderer_orig: String) -> wgt::AdapterInfo {
+ fn make_info(vendor_orig: String, renderer_orig: String, version: String) -> wgt::AdapterInfo {
let vendor = vendor_orig.to_lowercase();
let renderer = renderer_orig.to_lowercase();
@@ -179,13 +179,33 @@ impl super::Adapter {
0
};
+ let driver;
+ let driver_info;
+ if version.starts_with("WebGL ") || version.starts_with("OpenGL ") {
+ let es_sig = " ES";
+ match version.find(es_sig) {
+ Some(pos) => {
+ driver = version[..pos + es_sig.len()].to_owned();
+ driver_info = version[pos + es_sig.len() + 1..].to_owned();
+ }
+ None => {
+ let pos = version.find(' ').unwrap();
+ driver = version[..pos].to_owned();
+ driver_info = version[pos + 1..].to_owned();
+ }
+ }
+ } else {
+ driver = "OpenGL".to_owned();
+ driver_info = version;
+ }
+
wgt::AdapterInfo {
name: renderer_orig,
vendor: vendor_id,
device: 0,
device_type: inferred_device_type,
- driver: String::new(),
- driver_info: String::new(),
+ driver,
+ driver_info,
backend: wgt::Backend::Gl,
}
}
@@ -507,8 +527,7 @@ impl super::Adapter {
let has_etc = if cfg!(any(webgl, Emscripten)) {
extensions.contains("WEBGL_compressed_texture_etc")
} else {
- // This is a required part of GLES3, but not part of Desktop GL at all.
- es_ver.is_some()
+ es_ver.is_some() || extensions.contains("GL_ARB_ES3_compatibility")
};
features.set(wgt::Features::TEXTURE_COMPRESSION_ETC2, has_etc);
@@ -728,6 +747,8 @@ impl super::Adapter {
} else {
!0
},
+ min_subgroup_size: 0,
+ max_subgroup_size: 0,
max_push_constant_size: super::MAX_PUSH_CONSTANTS as u32 * 4,
min_uniform_buffer_offset_alignment,
min_storage_buffer_offset_alignment,
@@ -825,7 +846,7 @@ impl super::Adapter {
max_msaa_samples: max_samples,
}),
},
- info: Self::make_info(vendor, renderer),
+ info: Self::make_info(vendor, renderer, version),
features,
capabilities: crate::Capabilities {
limits,
diff --git a/third_party/rust/wgpu-hal/src/gles/conv.rs b/third_party/rust/wgpu-hal/src/gles/conv.rs
index bde69b8629..a6c924f162 100644
--- a/third_party/rust/wgpu-hal/src/gles/conv.rs
+++ b/third_party/rust/wgpu-hal/src/gles/conv.rs
@@ -212,6 +212,7 @@ pub(super) fn describe_vertex_format(vertex_format: wgt::VertexFormat) -> super:
Vf::Uint32x4 => (4, glow::UNSIGNED_INT, Vak::Integer),
Vf::Sint32x4 => (4, glow::INT, Vak::Integer),
Vf::Float32x4 => (4, glow::FLOAT, Vak::Float),
+ Vf::Unorm10_10_10_2 => (4, glow::UNSIGNED_INT_10_10_10_2, Vak::Float),
Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(),
};
diff --git a/third_party/rust/wgpu-hal/src/gles/device.rs b/third_party/rust/wgpu-hal/src/gles/device.rs
index 50c07f3ff0..a1e2736aa6 100644
--- a/third_party/rust/wgpu-hal/src/gles/device.rs
+++ b/third_party/rust/wgpu-hal/src/gles/device.rs
@@ -220,9 +220,17 @@ impl super::Device {
multiview: context.multiview,
};
- let shader = &stage.module.naga;
- let entry_point_index = shader
- .module
+ let (module, info) = naga::back::pipeline_constants::process_overrides(
+ &stage.module.naga.module,
+ &stage.module.naga.info,
+ stage.constants,
+ )
+ .map_err(|e| {
+ let msg = format!("{e}");
+ crate::PipelineError::Linkage(map_naga_stage(naga_stage), msg)
+ })?;
+
+ let entry_point_index = module
.entry_points
.iter()
.position(|ep| ep.name.as_str() == stage.entry_point)
@@ -247,11 +255,23 @@ impl super::Device {
};
let mut output = String::new();
+ let needs_temp_options = stage.zero_initialize_workgroup_memory
+ != context.layout.naga_options.zero_initialize_workgroup_memory;
+ let mut temp_options;
+ let naga_options = if needs_temp_options {
+ // We use a conditional here, as cloning the naga_options could be expensive
+ // That is, we want to avoid doing that unless we cannot avoid it
+ temp_options = context.layout.naga_options.clone();
+ temp_options.zero_initialize_workgroup_memory = stage.zero_initialize_workgroup_memory;
+ &temp_options
+ } else {
+ &context.layout.naga_options
+ };
let mut writer = glsl::Writer::new(
&mut output,
- &shader.module,
- &shader.info,
- &context.layout.naga_options,
+ &module,
+ &info,
+ naga_options,
&pipeline_options,
policies,
)
@@ -269,8 +289,8 @@ impl super::Device {
context.consume_reflection(
gl,
- &shader.module,
- shader.info.get_entry_point(entry_point_index),
+ &module,
+ info.get_entry_point(entry_point_index),
reflection_info,
naga_stage,
program,
@@ -297,6 +317,7 @@ impl super::Device {
naga_stage: naga_stage.to_owned(),
shader_id: stage.module.id,
entry_point: stage.entry_point.to_owned(),
+ zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory,
});
}
let mut guard = self
diff --git a/third_party/rust/wgpu-hal/src/gles/egl.rs b/third_party/rust/wgpu-hal/src/gles/egl.rs
index b166f4f102..00ef70ba88 100644
--- a/third_party/rust/wgpu-hal/src/gles/egl.rs
+++ b/third_party/rust/wgpu-hal/src/gles/egl.rs
@@ -526,28 +526,51 @@ impl Inner {
}
let (config, supports_native_window) = choose_config(&egl, display, srgb_kind)?;
- egl.bind_api(khronos_egl::OPENGL_ES_API).unwrap();
+
+ let supports_opengl = if version >= (1, 4) {
+ let client_apis = egl
+ .query_string(Some(display), khronos_egl::CLIENT_APIS)
+ .unwrap()
+ .to_string_lossy();
+ client_apis
+ .split(' ')
+ .any(|client_api| client_api == "OpenGL")
+ } else {
+ false
+ };
+ egl.bind_api(if supports_opengl {
+ khronos_egl::OPENGL_API
+ } else {
+ khronos_egl::OPENGL_ES_API
+ })
+ .unwrap();
let needs_robustness = true;
let mut khr_context_flags = 0;
let supports_khr_context = display_extensions.contains("EGL_KHR_create_context");
- //TODO: make it so `Device` == EGL Context
- let mut context_attributes = vec![
- khronos_egl::CONTEXT_MAJOR_VERSION,
- 3, // Request GLES 3.0 or higher
- ];
-
- if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
+ let mut context_attributes = vec![];
+ if supports_opengl {
+ context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
+ context_attributes.push(3);
context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
- context_attributes.push(match force_gles_minor_version {
- wgt::Gles3MinorVersion::Version0 => 0,
- wgt::Gles3MinorVersion::Version1 => 1,
- wgt::Gles3MinorVersion::Version2 => 2,
- _ => unreachable!(),
- });
+ context_attributes.push(3);
+ if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
+ log::warn!("Ignoring specified GLES minor version as OpenGL is used");
+ }
+ } else {
+ context_attributes.push(khronos_egl::CONTEXT_MAJOR_VERSION);
+ context_attributes.push(3); // Request GLES 3.0 or higher
+ if force_gles_minor_version != wgt::Gles3MinorVersion::Automatic {
+ context_attributes.push(khronos_egl::CONTEXT_MINOR_VERSION);
+ context_attributes.push(match force_gles_minor_version {
+ wgt::Gles3MinorVersion::Automatic => unreachable!(),
+ wgt::Gles3MinorVersion::Version0 => 0,
+ wgt::Gles3MinorVersion::Version1 => 1,
+ wgt::Gles3MinorVersion::Version2 => 2,
+ });
+ }
}
-
if flags.contains(wgt::InstanceFlags::DEBUG) {
if version >= (1, 5) {
log::debug!("\tEGL context: +debug");
@@ -577,8 +600,6 @@ impl Inner {
// because it's for desktop GL only, not GLES.
log::warn!("\tEGL context: -robust access");
}
-
- //TODO do we need `khronos_egl::CONTEXT_OPENGL_NOTIFICATION_STRATEGY_EXT`?
}
if khr_context_flags != 0 {
context_attributes.push(EGL_CONTEXT_FLAGS_KHR);
@@ -977,6 +998,7 @@ impl crate::Instance for Instance {
srgb_kind: inner.srgb_kind,
})
}
+
unsafe fn destroy_surface(&self, _surface: Surface) {}
unsafe fn enumerate_adapters(&self) -> Vec<crate::ExposedAdapter<super::Api>> {
@@ -993,6 +1015,12 @@ impl crate::Instance for Instance {
})
};
+ // In contrast to OpenGL ES, OpenGL requires explicitly enabling sRGB conversions,
+ // as otherwise the user has to do the sRGB conversion.
+ if !matches!(inner.srgb_kind, SrgbFrameBufferKind::None) {
+ unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) };
+ }
+
if self.flags.contains(wgt::InstanceFlags::DEBUG) && gl.supports_debug() {
log::debug!("Max label length: {}", unsafe {
gl.get_parameter_i32(glow::MAX_LABEL_LENGTH)
@@ -1106,6 +1134,13 @@ impl Surface {
unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) };
unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(sc.framebuffer)) };
+
+ if !matches!(self.srgb_kind, SrgbFrameBufferKind::None) {
+ // Disable sRGB conversions for `glBlitFramebuffer` as behavior does diverge between
+ // drivers and formats otherwise and we want to ensure no sRGB conversions happen.
+ unsafe { gl.disable(glow::FRAMEBUFFER_SRGB) };
+ }
+
// Note the Y-flipping here. GL's presentation is not flipped,
// but main rendering is. Therefore, we Y-flip the output positions
// in the shader, and also this blit.
@@ -1123,6 +1158,11 @@ impl Surface {
glow::NEAREST,
)
};
+
+ if !matches!(self.srgb_kind, SrgbFrameBufferKind::None) {
+ unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) };
+ }
+
unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, None) };
self.egl
diff --git a/third_party/rust/wgpu-hal/src/gles/mod.rs b/third_party/rust/wgpu-hal/src/gles/mod.rs
index 6f41f7c000..0fcb09be46 100644
--- a/third_party/rust/wgpu-hal/src/gles/mod.rs
+++ b/third_party/rust/wgpu-hal/src/gles/mod.rs
@@ -602,6 +602,7 @@ struct ProgramStage {
naga_stage: naga::ShaderStage,
shader_id: ShaderId,
entry_point: String,
+ zero_initialize_workgroup_memory: bool,
}
#[derive(PartialEq, Eq, Hash)]
diff --git a/third_party/rust/wgpu-hal/src/gles/queue.rs b/third_party/rust/wgpu-hal/src/gles/queue.rs
index 29dfb79d04..7c728d3978 100644
--- a/third_party/rust/wgpu-hal/src/gles/queue.rs
+++ b/third_party/rust/wgpu-hal/src/gles/queue.rs
@@ -213,12 +213,27 @@ impl super::Queue {
instance_count,
ref first_instance_location,
} => {
- match base_vertex {
- 0 => {
- unsafe {
- gl.uniform_1_u32(first_instance_location.as_ref(), first_instance)
- };
+ let supports_full_instancing = self
+ .shared
+ .private_caps
+ .contains(PrivateCapabilities::FULLY_FEATURED_INSTANCING);
+ if supports_full_instancing {
+ unsafe {
+ gl.draw_elements_instanced_base_vertex_base_instance(
+ topology,
+ index_count as i32,
+ index_type,
+ index_offset as i32,
+ instance_count as i32,
+ base_vertex,
+ first_instance,
+ )
+ }
+ } else {
+ unsafe { gl.uniform_1_u32(first_instance_location.as_ref(), first_instance) };
+
+ if base_vertex == 0 {
unsafe {
// Don't use `gl.draw_elements`/`gl.draw_elements_base_vertex` for `instance_count == 1`.
// Angle has a bug where it doesn't consider the instance divisor when `DYNAMIC_DRAW` is used in `gl.draw_elements`/`gl.draw_elements_base_vertex`.
@@ -231,41 +246,17 @@ impl super::Queue {
instance_count as i32,
)
}
- }
- _ => {
- let supports_full_instancing = self
- .shared
- .private_caps
- .contains(PrivateCapabilities::FULLY_FEATURED_INSTANCING);
-
- if supports_full_instancing {
- unsafe {
- gl.draw_elements_instanced_base_vertex_base_instance(
- topology,
- index_count as i32,
- index_type,
- index_offset as i32,
- instance_count as i32,
- base_vertex,
- first_instance,
- )
- }
- } else {
- unsafe {
- gl.uniform_1_u32(first_instance_location.as_ref(), first_instance)
- };
-
- // If we've gotten here, wgpu-core has already validated that this function exists via the DownlevelFlags::BASE_VERTEX feature.
- unsafe {
- gl.draw_elements_instanced_base_vertex(
- topology,
- index_count as _,
- index_type,
- index_offset as i32,
- instance_count as i32,
- base_vertex,
- )
- }
+ } else {
+ // If we've gotten here, wgpu-core has already validated that this function exists via the DownlevelFlags::BASE_VERTEX feature.
+ unsafe {
+ gl.draw_elements_instanced_base_vertex(
+ topology,
+ index_count as _,
+ index_type,
+ index_offset as i32,
+ instance_count as i32,
+ base_vertex,
+ )
}
}
}
diff --git a/third_party/rust/wgpu-hal/src/gles/wgl.rs b/third_party/rust/wgpu-hal/src/gles/wgl.rs
index 2564892969..aae70478b4 100644
--- a/third_party/rust/wgpu-hal/src/gles/wgl.rs
+++ b/third_party/rust/wgpu-hal/src/gles/wgl.rs
@@ -507,6 +507,8 @@ impl crate::Instance for Instance {
.supported_extensions()
.contains("GL_ARB_framebuffer_sRGB");
+ // In contrast to OpenGL ES, OpenGL requires explicitly enabling sRGB conversions,
+ // as otherwise the user has to do the sRGB conversion.
if srgb_capable {
unsafe { gl.enable(glow::FRAMEBUFFER_SRGB) };
}
diff --git a/third_party/rust/wgpu-hal/src/lib.rs b/third_party/rust/wgpu-hal/src/lib.rs
index 79bd54e66e..d300ca30cc 100644
--- a/third_party/rust/wgpu-hal/src/lib.rs
+++ b/third_party/rust/wgpu-hal/src/lib.rs
@@ -1,17 +1,208 @@
-/*! This library describes the internal unsafe graphics abstraction API.
- * It follows WebGPU for the most part, re-using wgpu-types,
- * with the following deviations:
- * - Fully unsafe: zero overhead, zero validation.
- * - Compile-time backend selection via traits.
- * - Objects are passed by references and returned by value. No IDs.
- * - Mapping is persistent, with explicit synchronization.
- * - Resource transitions are explicit.
- * - All layouts are explicit. Binding model has compatibility.
+/*! A cross-platform unsafe graphics abstraction.
*
- * General design direction is to follow the majority by the following weights:
- * - wgpu-core: 1.5
- * - primary backends (Vulkan/Metal/DX12): 1.0 each
- * - secondary backend (GLES): 0.5
+ * This crate defines a set of traits abstracting over modern graphics APIs,
+ * with implementations ("backends") for Vulkan, Metal, Direct3D, and GL.
+ *
+ * `wgpu-hal` is a spiritual successor to
+ * [gfx-hal](https://github.com/gfx-rs/gfx), but with reduced scope, and
+ * oriented towards WebGPU implementation goals. It has no overhead for
+ * validation or tracking, and the API translation overhead is kept to the bare
+ * minimum by the design of WebGPU. This API can be used for resource-demanding
+ * applications and engines.
+ *
+ * The `wgpu-hal` crate's main design choices:
+ *
+ * - Our traits are meant to be *portable*: proper use
+ * should get equivalent results regardless of the backend.
+ *
+ * - Our traits' contracts are *unsafe*: implementations perform minimal
+ * validation, if any, and incorrect use will often cause undefined behavior.
+ * This allows us to minimize the overhead we impose over the underlying
+ * graphics system. If you need safety, the [`wgpu-core`] crate provides a
+ * safe API for driving `wgpu-hal`, implementing all necessary validation,
+ * resource state tracking, and so on. (Note that `wgpu-core` is designed for
+ * use via FFI; the [`wgpu`] crate provides more idiomatic Rust bindings for
+ * `wgpu-core`.) Or, you can do your own validation.
+ *
+ * - In the same vein, returned errors *only cover cases the user can't
+ * anticipate*, like running out of memory or losing the device. Any errors
+ * that the user could reasonably anticipate are their responsibility to
+ * avoid. For example, `wgpu-hal` returns no error for mapping a buffer that's
+ * not mappable: as the buffer creator, the user should already know if they
+ * can map it.
+ *
+ * - We use *static dispatch*. The traits are not
+ * generally object-safe. You must select a specific backend type
+ * like [`vulkan::Api`] or [`metal::Api`], and then use that
+ * according to the main traits, or call backend-specific methods.
+ *
+ * - We use *idiomatic Rust parameter passing*,
+ * taking objects by reference, returning them by value, and so on,
+ * unlike `wgpu-core`, which refers to objects by ID.
+ *
+ * - We map buffer contents *persistently*. This means that the buffer
+ * can remain mapped on the CPU while the GPU reads or writes to it.
+ * You must explicitly indicate when data might need to be
+ * transferred between CPU and GPU, if `wgpu-hal` indicates that the
+ * mapping is not coherent (that is, automatically synchronized
+ * between the two devices).
+ *
+ * - You must record *explicit barriers* between different usages of a
+ * resource. For example, if a buffer is written to by a compute
+ * shader, and then used as and index buffer to a draw call, you
+ * must use [`CommandEncoder::transition_buffers`] between those two
+ * operations.
+ *
+ * - Pipeline layouts are *explicitly specified* when setting bind
+ * group. Incompatible layouts disturb groups bound at higher indices.
+ *
+ * - The API *accepts collections as iterators*, to avoid forcing the user to
+ * store data in particular containers. The implementation doesn't guarantee
+ * that any of the iterators are drained, unless stated otherwise by the
+ * function documentation. For this reason, we recommend that iterators don't
+ * do any mutating work.
+ *
+ * Unfortunately, `wgpu-hal`'s safety requirements are not fully documented.
+ * Ideally, all trait methods would have doc comments setting out the
+ * requirements users must meet to ensure correct and portable behavior. If you
+ * are aware of a specific requirement that a backend imposes that is not
+ * ensured by the traits' documented rules, please file an issue. Or, if you are
+ * a capable technical writer, please file a pull request!
+ *
+ * [`wgpu-core`]: https://crates.io/crates/wgpu-core
+ * [`wgpu`]: https://crates.io/crates/wgpu
+ * [`vulkan::Api`]: vulkan/struct.Api.html
+ * [`metal::Api`]: metal/struct.Api.html
+ *
+ * ## Primary backends
+ *
+ * The `wgpu-hal` crate has full-featured backends implemented on the following
+ * platform graphics APIs:
+ *
+ * - Vulkan, available on Linux, Android, and Windows, using the [`ash`] crate's
+ * Vulkan bindings. It's also available on macOS, if you install [MoltenVK].
+ *
+ * - Metal on macOS, using the [`metal`] crate's bindings.
+ *
+ * - Direct3D 12 on Windows, using the [`d3d12`] crate's bindings.
+ *
+ * [`ash`]: https://crates.io/crates/ash
+ * [MoltenVK]: https://github.com/KhronosGroup/MoltenVK
+ * [`metal`]: https://crates.io/crates/metal
+ * [`d3d12`]: ahttps://crates.io/crates/d3d12
+ *
+ * ## Secondary backends
+ *
+ * The `wgpu-hal` crate has a partial implementation based on the following
+ * platform graphics API:
+ *
+ * - The GL backend is available anywhere OpenGL, OpenGL ES, or WebGL are
+ * available. See the [`gles`] module documentation for details.
+ *
+ * [`gles`]: gles/index.html
+ *
+ * You can see what capabilities an adapter is missing by checking the
+ * [`DownlevelCapabilities`][tdc] in [`ExposedAdapter::capabilities`], available
+ * from [`Instance::enumerate_adapters`].
+ *
+ * The API is generally designed to fit the primary backends better than the
+ * secondary backends, so the latter may impose more overhead.
+ *
+ * [tdc]: wgt::DownlevelCapabilities
+ *
+ * ## Traits
+ *
+ * The `wgpu-hal` crate defines a handful of traits that together
+ * represent a cross-platform abstraction for modern GPU APIs.
+ *
+ * - The [`Api`] trait represents a `wgpu-hal` backend. It has no methods of its
+ * own, only a collection of associated types.
+ *
+ * - [`Api::Instance`] implements the [`Instance`] trait. [`Instance::init`]
+ * creates an instance value, which you can use to enumerate the adapters
+ * available on the system. For example, [`vulkan::Api::Instance::init`][Ii]
+ * returns an instance that can enumerate the Vulkan physical devices on your
+ * system.
+ *
+ * - [`Api::Adapter`] implements the [`Adapter`] trait, representing a
+ * particular device from a particular backend. For example, a Vulkan instance
+ * might have a Lavapipe software adapter and a GPU-based adapter.
+ *
+ * - [`Api::Device`] implements the [`Device`] trait, representing an active
+ * link to a device. You get a device value by calling [`Adapter::open`], and
+ * then use it to create buffers, textures, shader modules, and so on.
+ *
+ * - [`Api::Queue`] implements the [`Queue`] trait, which you use to submit
+ * command buffers to a given device.
+ *
+ * - [`Api::CommandEncoder`] implements the [`CommandEncoder`] trait, which you
+ * use to build buffers of commands to submit to a queue. This has all the
+ * methods for drawing and running compute shaders, which is presumably what
+ * you're here for.
+ *
+ * - [`Api::Surface`] implements the [`Surface`] trait, which represents a
+ * swapchain for presenting images on the screen, via interaction with the
+ * system's window manager.
+ *
+ * The [`Api`] trait has various other associated types like [`Api::Buffer`] and
+ * [`Api::Texture`] that represent resources the rest of the interface can
+ * operate on, but these generally do not have their own traits.
+ *
+ * [Ii]: Instance::init
+ *
+ * ## Validation is the calling code's responsibility, not `wgpu-hal`'s
+ *
+ * As much as possible, `wgpu-hal` traits place the burden of validation,
+ * resource tracking, and state tracking on the caller, not on the trait
+ * implementations themselves. Anything which can reasonably be handled in
+ * backend-independent code should be. A `wgpu_hal` backend's sole obligation is
+ * to provide portable behavior, and report conditions that the calling code
+ * can't reasonably anticipate, like device loss or running out of memory.
+ *
+ * The `wgpu` crate collection is intended for use in security-sensitive
+ * applications, like web browsers, where the API is available to untrusted
+ * code. This means that `wgpu-core`'s validation is not simply a service to
+ * developers, to be provided opportunistically when the performance costs are
+ * acceptable and the necessary data is ready at hand. Rather, `wgpu-core`'s
+ * validation must be exhaustive, to ensure that even malicious content cannot
+ * provoke and exploit undefined behavior in the platform's graphics API.
+ *
+ * Because graphics APIs' requirements are complex, the only practical way for
+ * `wgpu` to provide exhaustive validation is to comprehensively track the
+ * lifetime and state of all the resources in the system. Implementing this
+ * separately for each backend is infeasible; effort would be better spent
+ * making the cross-platform validation in `wgpu-core` legible and trustworthy.
+ * Fortunately, the requirements are largely similar across the various
+ * platforms, so cross-platform validation is practical.
+ *
+ * Some backends have specific requirements that aren't practical to foist off
+ * on the `wgpu-hal` user. For example, properly managing macOS Objective-C or
+ * Microsoft COM reference counts is best handled by using appropriate pointer
+ * types within the backend.
+ *
+ * A desire for "defense in depth" may suggest performing additional validation
+ * in `wgpu-hal` when the opportunity arises, but this must be done with
+ * caution. Even experienced contributors infer the expectations their changes
+ * must meet by considering not just requirements made explicit in types, tests,
+ * assertions, and comments, but also those implicit in the surrounding code.
+ * When one sees validation or state-tracking code in `wgpu-hal`, it is tempting
+ * to conclude, "Oh, `wgpu-hal` checks for this, so `wgpu-core` needn't worry
+ * about it - that would be redundant!" The responsibility for exhaustive
+ * validation always rests with `wgpu-core`, regardless of what may or may not
+ * be checked in `wgpu-hal`.
+ *
+ * To this end, any "defense in depth" validation that does appear in `wgpu-hal`
+ * for requirements that `wgpu-core` should have enforced should report failure
+ * via the `unreachable!` macro, because problems detected at this stage always
+ * indicate a bug in `wgpu-core`.
+ *
+ * ## Debugging
+ *
+ * Most of the information on the wiki [Debugging wgpu Applications][wiki-debug]
+ * page still applies to this API, with the exception of API tracing/replay
+ * functionality, which is only available in `wgpu-core`.
+ *
+ * [wiki-debug]: https://github.com/gfx-rs/wgpu/wiki/Debugging-wgpu-Applications
*/
#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
@@ -198,6 +389,15 @@ pub trait Api: Clone + fmt::Debug + Sized {
type Queue: Queue<A = Self>;
type CommandEncoder: CommandEncoder<A = Self>;
+
+ /// This API's command buffer type.
+ ///
+ /// The only thing you can do with `CommandBuffer`s is build them
+ /// with a [`CommandEncoder`] and then pass them to
+ /// [`Queue::submit`] for execution, or destroy them by passing
+ /// them to [`CommandEncoder::reset_all`].
+ ///
+ /// [`CommandEncoder`]: Api::CommandEncoder
type CommandBuffer: WasmNotSendSync + fmt::Debug;
type Buffer: fmt::Debug + WasmNotSendSync + 'static;
@@ -206,6 +406,24 @@ pub trait Api: Clone + fmt::Debug + Sized {
type TextureView: fmt::Debug + WasmNotSendSync;
type Sampler: fmt::Debug + WasmNotSendSync;
type QuerySet: fmt::Debug + WasmNotSendSync;
+
+ /// A value you can block on to wait for something to finish.
+ ///
+ /// A `Fence` holds a monotonically increasing [`FenceValue`]. You can call
+ /// [`Device::wait`] to block until a fence reaches or passes a value you
+ /// choose. [`Queue::submit`] can take a `Fence` and a [`FenceValue`] to
+ /// store in it when the submitted work is complete.
+ ///
+ /// Attempting to set a fence to a value less than its current value has no
+ /// effect.
+ ///
+ /// Waiting on a fence returns as soon as the fence reaches *or passes* the
+ /// requested value. This implies that, in order to reliably determine when
+ /// an operation has completed, operations must finish in order of
+ /// increasing fence values: if a higher-valued operation were to finish
+ /// before a lower-valued operation, then waiting for the fence to reach the
+ /// lower value could return before the lower-valued operation has actually
+ /// finished.
type Fence: fmt::Debug + WasmNotSendSync;
type BindGroupLayout: fmt::Debug + WasmNotSendSync;
@@ -405,7 +623,25 @@ pub trait Device: WasmNotSendSync {
&self,
fence: &<Self::A as Api>::Fence,
) -> Result<FenceValue, DeviceError>;
- /// Calling wait with a lower value than the current fence value will immediately return.
+
+ /// Wait for `fence` to reach `value`.
+ ///
+ /// Operations like [`Queue::submit`] can accept a [`Fence`] and a
+ /// [`FenceValue`] to store in it, so you can use this `wait` function
+ /// to wait for a given queue submission to finish execution.
+ ///
+ /// The `value` argument must be a value that some actual operation you have
+ /// already presented to the device is going to store in `fence`. You cannot
+ /// wait for values yet to be submitted. (This restriction accommodates
+ /// implementations like the `vulkan` backend's [`FencePool`] that must
+ /// allocate a distinct synchronization object for each fence value one is
+ /// able to wait for.)
+ ///
+ /// Calling `wait` with a lower [`FenceValue`] than `fence`'s current value
+ /// returns immediately.
+ ///
+ /// [`Fence`]: Api::Fence
+ /// [`FencePool`]: vulkan/enum.Fence.html#variant.FencePool
unsafe fn wait(
&self,
fence: &<Self::A as Api>::Fence,
@@ -437,14 +673,48 @@ pub trait Device: WasmNotSendSync {
pub trait Queue: WasmNotSendSync {
type A: Api;
- /// Submits the command buffers for execution on GPU.
+ /// Submit `command_buffers` for execution on GPU.
+ ///
+ /// If `signal_fence` is `Some(fence, value)`, update `fence` to `value`
+ /// when the operation is complete. See [`Fence`] for details.
+ ///
+ /// If two calls to `submit` on a single `Queue` occur in a particular order
+ /// (that is, they happen on the same thread, or on two threads that have
+ /// synchronized to establish an ordering), then the first submission's
+ /// commands all complete execution before any of the second submission's
+ /// commands begin. All results produced by one submission are visible to
+ /// the next.
+ ///
+ /// Within a submission, command buffers execute in the order in which they
+ /// appear in `command_buffers`. All results produced by one buffer are
+ /// visible to the next.
+ ///
+ /// If two calls to `submit` on a single `Queue` from different threads are
+ /// not synchronized to occur in a particular order, they must pass distinct
+ /// [`Fence`]s. As explained in the [`Fence`] documentation, waiting for
+ /// operations to complete is only trustworthy when operations finish in
+ /// order of increasing fence value, but submissions from different threads
+ /// cannot determine how to order the fence values if the submissions
+ /// themselves are unordered. If each thread uses a separate [`Fence`], this
+ /// problem does not arise.
///
/// Valid usage:
- /// - all of the command buffers were created from command pools
- /// that are associated with this queue.
- /// - all of the command buffers had `CommandBuffer::finish()` called.
- /// - all surface textures that the command buffers write to must be
- /// passed to the surface_textures argument.
+ ///
+ /// - All of the [`CommandBuffer`][cb]s were created from
+ /// [`CommandEncoder`][ce]s that are associated with this queue.
+ ///
+ /// - All of those [`CommandBuffer`][cb]s must remain alive until
+ /// the submitted commands have finished execution. (Since
+ /// command buffers must not outlive their encoders, this
+ /// implies that the encoders must remain alive as well.)
+ ///
+ /// - All of the [`SurfaceTexture`][st]s that the command buffers
+ /// write to appear in the `surface_textures` argument.
+ ///
+ /// [`Fence`]: Api::Fence
+ /// [cb]: Api::CommandBuffer
+ /// [ce]: Api::CommandEncoder
+ /// [st]: Api::SurfaceTexture
unsafe fn submit(
&self,
command_buffers: &[&<Self::A as Api>::CommandBuffer],
@@ -459,7 +729,12 @@ pub trait Queue: WasmNotSendSync {
unsafe fn get_timestamp_period(&self) -> f32;
}
-/// Encoder and allocation pool for `CommandBuffer`.
+/// Encoder and allocation pool for `CommandBuffer`s.
+///
+/// A `CommandEncoder` not only constructs `CommandBuffer`s but also
+/// acts as the allocation pool that owns the buffers' underlying
+/// storage. Thus, `CommandBuffer`s must not outlive the
+/// `CommandEncoder` that created them.
///
/// The life cycle of a `CommandBuffer` is as follows:
///
@@ -472,14 +747,17 @@ pub trait Queue: WasmNotSendSync {
///
/// - Call methods like `copy_buffer_to_buffer`, `begin_render_pass`,
/// etc. on a "recording" `CommandEncoder` to add commands to the
-/// list.
+/// list. (If an error occurs, you must call `discard_encoding`; see
+/// below.)
///
/// - Call `end_encoding` on a recording `CommandEncoder` to close the
/// encoder and construct a fresh `CommandBuffer` consisting of the
/// list of commands recorded up to that point.
///
/// - Call `discard_encoding` on a recording `CommandEncoder` to drop
-/// the commands recorded thus far and close the encoder.
+/// the commands recorded thus far and close the encoder. This is
+/// the only safe thing to do on a `CommandEncoder` if an error has
+/// occurred while recording commands.
///
/// - Call `reset_all` on a closed `CommandEncoder`, passing all the
/// live `CommandBuffers` built from it. All the `CommandBuffer`s
@@ -497,6 +775,10 @@ pub trait Queue: WasmNotSendSync {
/// built it.
///
/// - A `CommandEncoder` must not outlive its `Device`.
+///
+/// It is the user's responsibility to meet this requirements. This
+/// allows `CommandEncoder` implementations to keep their state
+/// tracking to a minimum.
pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
type A: Api;
@@ -509,13 +791,20 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug {
/// This `CommandEncoder` must be in the "closed" state.
unsafe fn begin_encoding(&mut self, label: Label) -> Result<(), DeviceError>;
- /// Discard the command list under construction, if any.
+ /// Discard the command list under construction.
+ ///
+ /// If an error has occurred while recording commands, this
+ /// is the only safe thing to do with the encoder.
///
/// This puts this `CommandEncoder` in the "closed" state.
///
/// # Safety
///
/// This `CommandEncoder` must be in the "recording" state.
+ ///
+ /// Callers must not assume that implementations of this
+ /// function are idempotent, and thus should not call it
+ /// multiple times in a row.
unsafe fn discard_encoding(&mut self);
/// Return a fresh [`CommandBuffer`] holding the recorded commands.
@@ -1318,6 +1607,13 @@ pub struct ProgrammableStage<'a, A: Api> {
/// The name of the entry point in the compiled shader. There must be a function with this name
/// in the shader.
pub entry_point: &'a str,
+ /// Pipeline constants
+ pub constants: &'a naga::back::PipelineConstants,
+ /// Whether workgroup scoped memory will be initialized with zero values for this stage.
+ ///
+ /// This is required by the WebGPU spec, but may have overhead which can be avoided
+ /// for cross-platform applications
+ pub zero_initialize_workgroup_memory: bool,
}
// Rust gets confused about the impl requirements for `A`
@@ -1326,6 +1622,8 @@ impl<A: Api> Clone for ProgrammableStage<'_, A> {
Self {
module: self.module,
entry_point: self.entry_point,
+ constants: self.constants,
+ zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
}
}
}
diff --git a/third_party/rust/wgpu-hal/src/metal/adapter.rs b/third_party/rust/wgpu-hal/src/metal/adapter.rs
index 6211896838..cddba472bd 100644
--- a/third_party/rust/wgpu-hal/src/metal/adapter.rs
+++ b/third_party/rust/wgpu-hal/src/metal/adapter.rs
@@ -562,7 +562,11 @@ impl super::PrivateCapabilities {
Self {
family_check,
- msl_version: if os_is_xr || version.at_least((12, 0), (15, 0), os_is_mac) {
+ msl_version: if os_is_xr || version.at_least((14, 0), (17, 0), os_is_mac) {
+ MTLLanguageVersion::V3_1
+ } else if version.at_least((13, 0), (16, 0), os_is_mac) {
+ MTLLanguageVersion::V3_0
+ } else if version.at_least((12, 0), (15, 0), os_is_mac) {
MTLLanguageVersion::V2_4
} else if version.at_least((11, 0), (14, 0), os_is_mac) {
MTLLanguageVersion::V2_3
@@ -809,6 +813,14 @@ impl super::PrivateCapabilities {
None
},
timestamp_query_support,
+ supports_simd_scoped_operations: family_check
+ && (device.supports_family(MTLGPUFamily::Metal3)
+ || device.supports_family(MTLGPUFamily::Mac2)
+ || device.supports_family(MTLGPUFamily::Apple7)),
+ // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=5
+ int64: family_check
+ && (device.supports_family(MTLGPUFamily::Apple3)
+ || device.supports_family(MTLGPUFamily::Metal3)),
}
}
@@ -882,7 +894,7 @@ impl super::PrivateCapabilities {
}
features.set(
F::SHADER_INT64,
- self.msl_version >= MTLLanguageVersion::V2_3,
+ self.int64 && self.msl_version >= MTLLanguageVersion::V2_3,
);
features.set(
@@ -894,6 +906,10 @@ impl super::PrivateCapabilities {
features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all);
features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true);
+ if self.supports_simd_scoped_operations {
+ features.insert(F::SUBGROUP | F::SUBGROUP_BARRIER);
+ }
+
features
}
@@ -948,6 +964,8 @@ impl super::PrivateCapabilities {
max_vertex_buffers: self.max_vertex_buffers,
max_vertex_attributes: 31,
max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride,
+ min_subgroup_size: 4,
+ max_subgroup_size: 64,
max_push_constant_size: 0x1000,
min_uniform_buffer_offset_alignment: self.buffer_alignment as u32,
min_storage_buffer_offset_alignment: self.buffer_alignment as u32,
diff --git a/third_party/rust/wgpu-hal/src/metal/conv.rs b/third_party/rust/wgpu-hal/src/metal/conv.rs
index 8f6439b50b..6ebabee1a6 100644
--- a/third_party/rust/wgpu-hal/src/metal/conv.rs
+++ b/third_party/rust/wgpu-hal/src/metal/conv.rs
@@ -222,6 +222,7 @@ pub fn map_vertex_format(format: wgt::VertexFormat) -> metal::MTLVertexFormat {
Vf::Uint32x4 => UInt4,
Vf::Sint32x4 => Int4,
Vf::Float32x4 => Float4,
+ Vf::Unorm10_10_10_2 => UInt1010102Normalized,
Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(),
}
}
diff --git a/third_party/rust/wgpu-hal/src/metal/device.rs b/third_party/rust/wgpu-hal/src/metal/device.rs
index 179429f5d7..2c8f5a2bfb 100644
--- a/third_party/rust/wgpu-hal/src/metal/device.rs
+++ b/third_party/rust/wgpu-hal/src/metal/device.rs
@@ -69,7 +69,13 @@ impl super::Device {
) -> Result<CompiledShader, crate::PipelineError> {
let stage_bit = map_naga_stage(naga_stage);
- let module = &stage.module.naga.module;
+ let (module, module_info) = naga::back::pipeline_constants::process_overrides(
+ &stage.module.naga.module,
+ &stage.module.naga.info,
+ stage.constants,
+ )
+ .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?;
+
let ep_resources = &layout.per_stage_map[naga_stage];
let bounds_check_policy = if stage.module.runtime_checks {
@@ -88,6 +94,8 @@ impl super::Device {
metal::MTLLanguageVersion::V2_2 => (2, 2),
metal::MTLLanguageVersion::V2_3 => (2, 3),
metal::MTLLanguageVersion::V2_4 => (2, 4),
+ metal::MTLLanguageVersion::V3_0 => (3, 0),
+ metal::MTLLanguageVersion::V3_1 => (3, 1),
},
inline_samplers: Default::default(),
spirv_cross_compatibility: false,
@@ -104,7 +112,7 @@ impl super::Device {
// TODO: support bounds checks on binding arrays
binding_array: naga::proc::BoundsCheckPolicy::Unchecked,
},
- zero_initialize_workgroup_memory: true,
+ zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory,
};
let pipeline_options = naga::back::msl::PipelineOptions {
@@ -114,13 +122,9 @@ impl super::Device {
},
};
- let (source, info) = naga::back::msl::write_string(
- module,
- &stage.module.naga.info,
- &options,
- &pipeline_options,
- )
- .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?;
+ let (source, info) =
+ naga::back::msl::write_string(&module, &module_info, &options, &pipeline_options)
+ .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("MSL: {:?}", e)))?;
log::debug!(
"Naga generated shader for entry point '{}' and stage {:?}\n{}",
@@ -168,7 +172,7 @@ impl super::Device {
})?;
// collect sizes indices, immutable buffers, and work group memory sizes
- let ep_info = &stage.module.naga.info.get_entry_point(ep_index);
+ let ep_info = &module_info.get_entry_point(ep_index);
let mut wg_memory_sizes = Vec::new();
let mut sized_bindings = Vec::new();
let mut immutable_buffer_mask = 0;
diff --git a/third_party/rust/wgpu-hal/src/metal/mod.rs b/third_party/rust/wgpu-hal/src/metal/mod.rs
index 6aeafb0f86..7d547cfe3c 100644
--- a/third_party/rust/wgpu-hal/src/metal/mod.rs
+++ b/third_party/rust/wgpu-hal/src/metal/mod.rs
@@ -269,6 +269,8 @@ struct PrivateCapabilities {
supports_shader_primitive_index: bool,
has_unified_memory: Option<bool>,
timestamp_query_support: TimestampQuerySupport,
+ supports_simd_scoped_operations: bool,
+ int64: bool,
}
#[derive(Clone, Debug)]
@@ -649,7 +651,7 @@ struct BufferResource {
/// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can
/// hold WGSL runtime-sized arrays. When one does, we must pass its size to
/// shader entry points to implement bounds checks and WGSL's `arrayLength`
- /// function. See [`device::CompiledShader::sized_bindings`] for details.
+ /// function. See `device::CompiledShader::sized_bindings` for details.
///
/// [`Storage`]: wgt::BufferBindingType::Storage
binding_size: Option<wgt::BufferSize>,
@@ -680,12 +682,12 @@ struct PipelineStageInfo {
/// The buffer argument table index at which we pass runtime-sized arrays' buffer sizes.
///
- /// See [`device::CompiledShader::sized_bindings`] for more details.
+ /// See `device::CompiledShader::sized_bindings` for more details.
sizes_slot: Option<naga::back::msl::Slot>,
/// Bindings of all WGSL `storage` globals that contain runtime-sized arrays.
///
- /// See [`device::CompiledShader::sized_bindings`] for more details.
+ /// See `device::CompiledShader::sized_bindings` for more details.
sized_bindings: Vec<naga::ResourceBinding>,
}
@@ -801,7 +803,7 @@ struct CommandState {
///
/// Specifically:
///
- /// - The keys are ['ResourceBinding`] values (that is, the WGSL `@group`
+ /// - The keys are [`ResourceBinding`] values (that is, the WGSL `@group`
/// and `@binding` attributes) for `var<storage>` global variables in the
/// current module that contain runtime-sized arrays.
///
@@ -813,7 +815,7 @@ struct CommandState {
/// of the buffers listed in [`stage_infos.S.sized_bindings`], which we must
/// pass to the entry point.
///
- /// See [`device::CompiledShader::sized_bindings`] for more details.
+ /// See `device::CompiledShader::sized_bindings` for more details.
///
/// [`ResourceBinding`]: naga::ResourceBinding
storage_buffer_length_map: rustc_hash::FxHashMap<naga::ResourceBinding, wgt::BufferSize>,
diff --git a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs
index 2665463792..21219361f4 100644
--- a/third_party/rust/wgpu-hal/src/vulkan/adapter.rs
+++ b/third_party/rust/wgpu-hal/src/vulkan/adapter.rs
@@ -35,6 +35,8 @@ fn indexing_features() -> wgt::Features {
/// [`PhysicalDeviceFeatures::from_extensions_and_requested_features`]
/// constructs an value of this type indicating which Vulkan features to
/// enable, based on the `wgpu_types::Features` requested.
+///
+/// [`Instance::expose_adapter`]: super::Instance::expose_adapter
#[derive(Debug, Default)]
pub struct PhysicalDeviceFeatures {
/// Basic Vulkan 1.0 features.
@@ -86,6 +88,9 @@ pub struct PhysicalDeviceFeatures {
///
/// However, we do populate this when creating a device if
/// [`Features::RAY_TRACING_ACCELERATION_STRUCTURE`] is requested.
+ ///
+ /// [`Instance::expose_adapter`]: super::Instance::expose_adapter
+ /// [`Features::RAY_TRACING_ACCELERATION_STRUCTURE`]: wgt::Features::RAY_TRACING_ACCELERATION_STRUCTURE
buffer_device_address: Option<vk::PhysicalDeviceBufferDeviceAddressFeaturesKHR>,
/// Features provided by `VK_KHR_ray_query`,
@@ -95,12 +100,17 @@ pub struct PhysicalDeviceFeatures {
/// this from `vkGetPhysicalDeviceFeatures2`.
///
/// However, we do populate this when creating a device if ray tracing is requested.
+ ///
+ /// [`Instance::expose_adapter`]: super::Instance::expose_adapter
ray_query: Option<vk::PhysicalDeviceRayQueryFeaturesKHR>,
/// Features provided by `VK_KHR_zero_initialize_workgroup_memory`, promoted
/// to Vulkan 1.3.
zero_initialize_workgroup_memory:
Option<vk::PhysicalDeviceZeroInitializeWorkgroupMemoryFeatures>,
+
+ /// Features provided by `VK_EXT_subgroup_size_control`, promoted to Vulkan 1.3.
+ subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlFeatures>,
}
// This is safe because the structs have `p_next: *mut c_void`, which we null out/never read.
@@ -148,6 +158,9 @@ impl PhysicalDeviceFeatures {
if let Some(ref mut feature) = self.ray_query {
info = info.push_next(feature);
}
+ if let Some(ref mut feature) = self.subgroup_size_control {
+ info = info.push_next(feature);
+ }
info
}
@@ -175,6 +188,7 @@ impl PhysicalDeviceFeatures {
/// [`Features`]: wgt::Features
/// [`DownlevelFlags`]: wgt::DownlevelFlags
/// [`PrivateCapabilities`]: super::PrivateCapabilities
+ /// [`add_to_device_create_builder`]: PhysicalDeviceFeatures::add_to_device_create_builder
/// [`DeviceCreateInfoBuilder`]: vk::DeviceCreateInfoBuilder
/// [`Adapter::required_device_extensions`]: super::Adapter::required_device_extensions
fn from_extensions_and_requested_features(
@@ -434,6 +448,17 @@ impl PhysicalDeviceFeatures {
} else {
None
},
+ subgroup_size_control: if device_api_version >= vk::API_VERSION_1_3
+ || enabled_extensions.contains(&vk::ExtSubgroupSizeControlFn::name())
+ {
+ Some(
+ vk::PhysicalDeviceSubgroupSizeControlFeatures::builder()
+ .subgroup_size_control(true)
+ .build(),
+ )
+ } else {
+ None
+ },
}
}
@@ -442,6 +467,9 @@ impl PhysicalDeviceFeatures {
/// Given `self`, together with the instance and physical device it was
/// built from, and a `caps` also built from those, determine which wgpu
/// features and downlevel flags the device can support.
+ ///
+ /// [`Features`]: wgt::Features
+ /// [`DownlevelFlags`]: wgt::DownlevelFlags
fn to_wgpu(
&self,
instance: &ash::Instance,
@@ -638,6 +666,34 @@ impl PhysicalDeviceFeatures {
);
}
+ if let Some(ref subgroup) = caps.subgroup {
+ if (caps.device_api_version >= vk::API_VERSION_1_3
+ || caps.supports_extension(vk::ExtSubgroupSizeControlFn::name()))
+ && subgroup.supported_operations.contains(
+ vk::SubgroupFeatureFlags::BASIC
+ | vk::SubgroupFeatureFlags::VOTE
+ | vk::SubgroupFeatureFlags::ARITHMETIC
+ | vk::SubgroupFeatureFlags::BALLOT
+ | vk::SubgroupFeatureFlags::SHUFFLE
+ | vk::SubgroupFeatureFlags::SHUFFLE_RELATIVE,
+ )
+ {
+ features.set(
+ F::SUBGROUP,
+ subgroup
+ .supported_stages
+ .contains(vk::ShaderStageFlags::COMPUTE | vk::ShaderStageFlags::FRAGMENT),
+ );
+ features.set(
+ F::SUBGROUP_VERTEX,
+ subgroup
+ .supported_stages
+ .contains(vk::ShaderStageFlags::VERTEX),
+ );
+ features.insert(F::SUBGROUP_BARRIER);
+ }
+ }
+
let supports_depth_format = |format| {
supports_format(
instance,
@@ -773,6 +829,13 @@ pub struct PhysicalDeviceProperties {
/// `VK_KHR_driver_properties` extension, promoted to Vulkan 1.2.
driver: Option<vk::PhysicalDeviceDriverPropertiesKHR>,
+ /// Additional `vk::PhysicalDevice` properties from Vulkan 1.1.
+ subgroup: Option<vk::PhysicalDeviceSubgroupProperties>,
+
+ /// Additional `vk::PhysicalDevice` properties from the
+ /// `VK_EXT_subgroup_size_control` extension, promoted to Vulkan 1.3.
+ subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlProperties>,
+
/// The device API version.
///
/// Which is the version of Vulkan supported for device-level functionality.
@@ -888,6 +951,11 @@ impl PhysicalDeviceProperties {
if self.supports_extension(vk::ExtImageRobustnessFn::name()) {
extensions.push(vk::ExtImageRobustnessFn::name());
}
+
+ // Require `VK_EXT_subgroup_size_control` if the associated feature was requested
+ if requested_features.contains(wgt::Features::SUBGROUP) {
+ extensions.push(vk::ExtSubgroupSizeControlFn::name());
+ }
}
// Optional `VK_KHR_swapchain_mutable_format`
@@ -987,6 +1055,14 @@ impl PhysicalDeviceProperties {
.min(crate::MAX_VERTEX_BUFFERS as u32),
max_vertex_attributes: limits.max_vertex_input_attributes,
max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride,
+ min_subgroup_size: self
+ .subgroup_size_control
+ .map(|subgroup_size| subgroup_size.min_subgroup_size)
+ .unwrap_or(0),
+ max_subgroup_size: self
+ .subgroup_size_control
+ .map(|subgroup_size| subgroup_size.max_subgroup_size)
+ .unwrap_or(0),
max_push_constant_size: limits.max_push_constants_size,
min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32,
min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32,
@@ -1042,6 +1118,9 @@ impl super::InstanceShared {
let supports_driver_properties = capabilities.device_api_version
>= vk::API_VERSION_1_2
|| capabilities.supports_extension(vk::KhrDriverPropertiesFn::name());
+ let supports_subgroup_size_control = capabilities.device_api_version
+ >= vk::API_VERSION_1_3
+ || capabilities.supports_extension(vk::ExtSubgroupSizeControlFn::name());
let supports_acceleration_structure =
capabilities.supports_extension(vk::KhrAccelerationStructureFn::name());
@@ -1075,6 +1154,20 @@ impl super::InstanceShared {
builder = builder.push_next(next);
}
+ if capabilities.device_api_version >= vk::API_VERSION_1_1 {
+ let next = capabilities
+ .subgroup
+ .insert(vk::PhysicalDeviceSubgroupProperties::default());
+ builder = builder.push_next(next);
+ }
+
+ if supports_subgroup_size_control {
+ let next = capabilities
+ .subgroup_size_control
+ .insert(vk::PhysicalDeviceSubgroupSizeControlProperties::default());
+ builder = builder.push_next(next);
+ }
+
let mut properties2 = builder.build();
unsafe {
get_device_properties.get_physical_device_properties2(phd, &mut properties2);
@@ -1190,6 +1283,16 @@ impl super::InstanceShared {
builder = builder.push_next(next);
}
+ // `VK_EXT_subgroup_size_control` is promoted to 1.3
+ if capabilities.device_api_version >= vk::API_VERSION_1_3
+ || capabilities.supports_extension(vk::ExtSubgroupSizeControlFn::name())
+ {
+ let next = features
+ .subgroup_size_control
+ .insert(vk::PhysicalDeviceSubgroupSizeControlFeatures::default());
+ builder = builder.push_next(next);
+ }
+
let mut features2 = builder.build();
unsafe {
get_device_properties.get_physical_device_features2(phd, &mut features2);
@@ -1382,6 +1485,9 @@ impl super::Instance {
}),
image_format_list: phd_capabilities.device_api_version >= vk::API_VERSION_1_2
|| phd_capabilities.supports_extension(vk::KhrImageFormatListFn::name()),
+ subgroup_size_control: phd_features
+ .subgroup_size_control
+ .map_or(false, |ext| ext.subgroup_size_control == vk::TRUE),
};
let capabilities = crate::Capabilities {
limits: phd_capabilities.to_wgpu_limits(),
@@ -1581,6 +1687,15 @@ impl super::Adapter {
capabilities.push(spv::Capability::Geometry);
}
+ if features.intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX) {
+ capabilities.push(spv::Capability::GroupNonUniform);
+ capabilities.push(spv::Capability::GroupNonUniformVote);
+ capabilities.push(spv::Capability::GroupNonUniformArithmetic);
+ capabilities.push(spv::Capability::GroupNonUniformBallot);
+ capabilities.push(spv::Capability::GroupNonUniformShuffle);
+ capabilities.push(spv::Capability::GroupNonUniformShuffleRelative);
+ }
+
if features.intersects(
wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING
| wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING,
@@ -1616,7 +1731,13 @@ impl super::Adapter {
true, // could check `super::Workarounds::SEPARATE_ENTRY_POINTS`
);
spv::Options {
- lang_version: (1, 0),
+ lang_version: if features
+ .intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX)
+ {
+ (1, 3)
+ } else {
+ (1, 0)
+ },
flags,
capabilities: Some(capabilities.iter().cloned().collect()),
bounds_check_policies: naga::proc::BoundsCheckPolicies {
diff --git a/third_party/rust/wgpu-hal/src/vulkan/command.rs b/third_party/rust/wgpu-hal/src/vulkan/command.rs
index 43a2471954..ceb44dfbe6 100644
--- a/third_party/rust/wgpu-hal/src/vulkan/command.rs
+++ b/third_party/rust/wgpu-hal/src/vulkan/command.rs
@@ -104,6 +104,11 @@ impl crate::CommandEncoder for super::CommandEncoder {
}
unsafe fn discard_encoding(&mut self) {
+ // Safe use requires this is not called in the "closed" state, so the buffer
+ // shouldn't be null. Assert this to make sure we're not pushing null
+ // buffers to the discard pile.
+ assert_ne!(self.active, vk::CommandBuffer::null());
+
self.discarded.push(self.active);
self.active = vk::CommandBuffer::null();
}
diff --git a/third_party/rust/wgpu-hal/src/vulkan/conv.rs b/third_party/rust/wgpu-hal/src/vulkan/conv.rs
index 8202c93aa3..fe284f32a9 100644
--- a/third_party/rust/wgpu-hal/src/vulkan/conv.rs
+++ b/third_party/rust/wgpu-hal/src/vulkan/conv.rs
@@ -399,6 +399,7 @@ pub fn map_vertex_format(vertex_format: wgt::VertexFormat) -> vk::Format {
Vf::Float64x2 => vk::Format::R64G64_SFLOAT,
Vf::Float64x3 => vk::Format::R64G64B64_SFLOAT,
Vf::Float64x4 => vk::Format::R64G64B64A64_SFLOAT,
+ Vf::Unorm10_10_10_2 => vk::Format::A2B10G10R10_UNORM_PACK32,
}
}
diff --git a/third_party/rust/wgpu-hal/src/vulkan/device.rs b/third_party/rust/wgpu-hal/src/vulkan/device.rs
index 70028cc700..ec392533a0 100644
--- a/third_party/rust/wgpu-hal/src/vulkan/device.rs
+++ b/third_party/rust/wgpu-hal/src/vulkan/device.rs
@@ -2,6 +2,7 @@ use super::conv;
use arrayvec::ArrayVec;
use ash::{extensions::khr, vk};
+use naga::back::spv::ZeroInitializeWorkgroupMemoryMode;
use parking_lot::Mutex;
use std::{
@@ -737,7 +738,8 @@ impl super::Device {
};
let needs_temp_options = !runtime_checks
|| !binding_map.is_empty()
- || naga_shader.debug_source.is_some();
+ || naga_shader.debug_source.is_some()
+ || !stage.zero_initialize_workgroup_memory;
let mut temp_options;
let options = if needs_temp_options {
temp_options = self.naga_options.clone();
@@ -760,27 +762,40 @@ impl super::Device {
file_name: debug.file_name.as_ref().as_ref(),
})
}
+ if !stage.zero_initialize_workgroup_memory {
+ temp_options.zero_initialize_workgroup_memory =
+ ZeroInitializeWorkgroupMemoryMode::None;
+ }
&temp_options
} else {
&self.naga_options
};
+
+ let (module, info) = naga::back::pipeline_constants::process_overrides(
+ &naga_shader.module,
+ &naga_shader.info,
+ stage.constants,
+ )
+ .map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?;
+
let spv = {
profiling::scope!("naga::spv::write_vec");
- naga::back::spv::write_vec(
- &naga_shader.module,
- &naga_shader.info,
- options,
- Some(&pipeline_options),
- )
+ naga::back::spv::write_vec(&module, &info, options, Some(&pipeline_options))
}
.map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{e}")))?;
self.create_shader_module_impl(&spv)?
}
};
+ let mut flags = vk::PipelineShaderStageCreateFlags::empty();
+ if self.shared.private_caps.subgroup_size_control {
+ flags |= vk::PipelineShaderStageCreateFlags::ALLOW_VARYING_SUBGROUP_SIZE
+ }
+
let entry_point = CString::new(stage.entry_point).unwrap();
let create_info = vk::PipelineShaderStageCreateInfo::builder()
+ .flags(flags)
.stage(conv::map_shader_stage(stage_flags))
.module(vk_module)
.name(&entry_point)
@@ -1587,6 +1602,7 @@ impl crate::Device for super::Device {
.shared
.workarounds
.contains(super::Workarounds::SEPARATE_ENTRY_POINTS)
+ || !naga_shader.module.overrides.is_empty()
{
return Ok(super::ShaderModule::Intermediate {
naga_shader,
diff --git a/third_party/rust/wgpu-hal/src/vulkan/mod.rs b/third_party/rust/wgpu-hal/src/vulkan/mod.rs
index 0cd385045c..d1ea82772e 100644
--- a/third_party/rust/wgpu-hal/src/vulkan/mod.rs
+++ b/third_party/rust/wgpu-hal/src/vulkan/mod.rs
@@ -238,6 +238,7 @@ struct PrivateCapabilities {
robust_image_access2: bool,
zero_initialize_workgroup_memory: bool,
image_format_list: bool,
+ subgroup_size_control: bool,
}
bitflags::bitflags!(
@@ -413,6 +414,15 @@ pub struct TextureView {
attachment: FramebufferAttachment,
}
+impl TextureView {
+ /// # Safety
+ ///
+ /// - The image view handle must not be manually destroyed
+ pub unsafe fn raw_handle(&self) -> vk::ImageView {
+ self.raw
+ }
+}
+
#[derive(Debug)]
pub struct Sampler {
raw: vk::Sampler,
@@ -438,6 +448,7 @@ pub struct BindGroup {
set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
}
+/// Miscellaneous allocation recycling pool for `CommandAllocator`.
#[derive(Default)]
struct Temp {
marker: Vec<u8>,
@@ -467,11 +478,31 @@ impl Temp {
pub struct CommandEncoder {
raw: vk::CommandPool,
device: Arc<DeviceShared>,
+
+ /// The current command buffer, if `self` is in the ["recording"]
+ /// state.
+ ///
+ /// ["recording"]: crate::CommandEncoder
+ ///
+ /// If non-`null`, the buffer is in the Vulkan "recording" state.
active: vk::CommandBuffer,
+
+ /// What kind of pass we are currently within: compute or render.
bind_point: vk::PipelineBindPoint,
+
+ /// Allocation recycling pool for this encoder.
temp: Temp,
+
+ /// A pool of available command buffers.
+ ///
+ /// These are all in the Vulkan "initial" state.
free: Vec<vk::CommandBuffer>,
+
+ /// A pool of discarded command buffers.
+ ///
+ /// These could be in any Vulkan state except "pending".
discarded: Vec<vk::CommandBuffer>,
+
/// If this is true, the active renderpass enabled a debug span,
/// and needs to be disabled on renderpass close.
rpass_debug_marker_active: bool,
@@ -481,6 +512,15 @@ pub struct CommandEncoder {
end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
}
+impl CommandEncoder {
+ /// # Safety
+ ///
+ /// - The command buffer handle must not be manually destroyed
+ pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
+ self.active
+ }
+}
+
impl fmt::Debug for CommandEncoder {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("CommandEncoder")
@@ -519,9 +559,47 @@ pub struct QuerySet {
raw: vk::QueryPool,
}
+/// The [`Api::Fence`] type for [`vulkan::Api`].
+///
+/// This is an `enum` because there are two possible implementations of
+/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
+/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
+/// require non-1.0 features.
+///
+/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
+/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
+/// otherwise.
+///
+/// [`Api::Fence`]: crate::Api::Fence
+/// [`vulkan::Api`]: Api
+/// [`Device::create_fence`]: crate::Device::create_fence
+/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
+/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
+/// [`FencePool`]: Fence::FencePool
#[derive(Debug)]
pub enum Fence {
+ /// A Vulkan [timeline semaphore].
+ ///
+ /// These are simpler to use than Vulkan fences, since timeline semaphores
+ /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
+ ///
+ /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
+ /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
TimelineSemaphore(vk::Semaphore),
+
+ /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
+ ///
+ /// The effective [`FenceValue`] of this variant is the greater of
+ /// `last_completed` and the maximum value associated with a signalled fence
+ /// in `active`.
+ ///
+ /// Fences are available in all versions of Vulkan, but since they only have
+ /// two states, "signaled" and "unsignaled", we need to use a separate fence
+ /// for each queue submission we might want to wait for, and remember which
+ /// [`FenceValue`] each one represents.
+ ///
+ /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
+ /// [`FenceValue`]: crate::FenceValue
FencePool {
last_completed: crate::FenceValue,
/// The pending fence values have to be ascending.
@@ -531,21 +609,32 @@ pub enum Fence {
}
impl Fence {
+ /// Return the highest [`FenceValue`] among the signalled fences in `active`.
+ ///
+ /// As an optimization, assume that we already know that the fence has
+ /// reached `last_completed`, and don't bother checking fences whose values
+ /// are less than that: those fences remain in the `active` array only
+ /// because we haven't called `maintain` yet to clean them up.
+ ///
+ /// [`FenceValue`]: crate::FenceValue
fn check_active(
device: &ash::Device,
- mut max_value: crate::FenceValue,
+ mut last_completed: crate::FenceValue,
active: &[(crate::FenceValue, vk::Fence)],
) -> Result<crate::FenceValue, crate::DeviceError> {
for &(value, raw) in active.iter() {
unsafe {
- if value > max_value && device.get_fence_status(raw)? {
- max_value = value;
+ if value > last_completed && device.get_fence_status(raw)? {
+ last_completed = value;
}
}
}
- Ok(max_value)
+ Ok(last_completed)
}
+ /// Return the highest signalled [`FenceValue`] for `self`.
+ ///
+ /// [`FenceValue`]: crate::FenceValue
fn get_latest(
&self,
device: &ash::Device,
@@ -566,6 +655,18 @@ impl Fence {
}
}
+ /// Trim the internal state of this [`Fence`].
+ ///
+ /// This function has no externally visible effect, but you should call it
+ /// periodically to keep this fence's resource consumption under control.
+ ///
+ /// For fences using the [`FencePool`] implementation, this function
+ /// recycles fences that have been signaled. If you don't call this,
+ /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
+ /// time it's called.
+ ///
+ /// [`FencePool`]: Fence::FencePool
+ /// [`Queue::submit`]: crate::Queue::submit
fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
match *self {
Self::TimelineSemaphore(_) => {}