summaryrefslogtreecommitdiffstats
path: root/dom/webgpu/tests/cts/checkout/src/stress
diff options
context:
space:
mode:
Diffstat (limited to 'dom/webgpu/tests/cts/checkout/src/stress')
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/README.txt6
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/adapter/README.txt1
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/adapter/device_allocation.spec.ts290
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/compute/README.txt1
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/compute/compute_pass.spec.ts243
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/README.txt2
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_allocation.spec.ts65
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_layout_allocation.spec.ts20
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/buffer_allocation.spec.ts25
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/command_encoder_allocation.spec.ts20
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/compute_pipeline_allocation.spec.ts20
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/pipeline_layout_allocation.spec.ts20
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/query_set_allocation.spec.ts27
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/render_bundle_allocation.spec.ts20
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/render_pipeline_allocation.spec.ts20
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/sampler_allocation.spec.ts20
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/shader_module_allocation.spec.ts20
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/device/texture_allocation.spec.ts27
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/listing.ts5
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/memory/README.txt1
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/memory/churn.spec.ts17
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/memory/oom.spec.ts45
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/queries/README.txt1
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/queries/occlusion.spec.ts10
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/queries/pipeline_statistics.spec.ts38
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/queries/resolve.spec.ts15
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/queries/timestamps.spec.ts50
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/queue/README.txt1
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/queue/submit.spec.ts102
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/render/README.txt3
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/render/render_pass.spec.ts353
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/render/vertex_buffers.spec.ts130
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/shaders/README.txt1
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/shaders/entry_points.spec.ts78
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/shaders/non_halting.spec.ts194
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/shaders/slow.spec.ts195
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/texture/README.txt1
-rw-r--r--dom/webgpu/tests/cts/checkout/src/stress/texture/large.spec.ts56
38 files changed, 2143 insertions, 0 deletions
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/README.txt
new file mode 100644
index 0000000000..5457e8400d
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/README.txt
@@ -0,0 +1,6 @@
+WebGPU stress tests.
+
+These tests are separated from conformance tests because they are more likely to
+cause browser hangs and crashes.
+
+TODO: Look at dEQP (OpenGL ES and Vulkan) and WebGL for inspiration here.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/adapter/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/adapter/README.txt
new file mode 100644
index 0000000000..3a57f3d87f
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/adapter/README.txt
@@ -0,0 +1 @@
+Stress tests covering use of GPUAdapter.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/adapter/device_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/adapter/device_allocation.spec.ts
new file mode 100644
index 0000000000..184b4e8170
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/adapter/device_allocation.spec.ts
@@ -0,0 +1,290 @@
+export const description = `
+Stress tests for GPUAdapter.requestDevice.
+`;
+
+import { Fixture } from '../../common/framework/fixture.js';
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { attemptGarbageCollection } from '../../common/util/collect_garbage.js';
+import { keysOf } from '../../common/util/data_tables.js';
+import { getGPU } from '../../common/util/navigator_gpu.js';
+import { assert, iterRange } from '../../common/util/util.js';
+import { kLimitInfo } from '../../webgpu/capability_info.js';
+
+export const g = makeTestGroup(Fixture);
+
+/** Adapter preference identifier to option. */
+const kAdapterTypeOptions: {
+ readonly [k in GPUPowerPreference | 'fallback']: GPURequestAdapterOptions;
+} = /* prettier-ignore */ {
+ 'low-power': { powerPreference: 'low-power', forceFallbackAdapter: false },
+ 'high-performance': { powerPreference: 'high-performance', forceFallbackAdapter: false },
+ 'fallback': { powerPreference: undefined, forceFallbackAdapter: true },
+};
+/** List of all adapter hint types. */
+const kAdapterTypes = keysOf(kAdapterTypeOptions);
+
+/**
+ * Creates a device, a valid compute pipeline, valid resources for the pipeline, and
+ * ties them together into a set of compute commands ready to be submitted to the GPU
+ * queue. Does not submit the commands in order to make sure that all resources are
+ * kept alive until the device is destroyed.
+ */
+async function createDeviceAndComputeCommands(adapter: GPUAdapter) {
+ // Constants are computed such that per run, this function should allocate roughly 2G
+ // worth of data. This should be sufficient as we run these creation functions many
+ // times. If the data backing the created objects is not recycled we should OOM.
+ const kNumPipelines = 64;
+ const kNumBindgroups = 128;
+ const kNumBufferElements =
+ kLimitInfo.maxComputeWorkgroupSizeX.default * kLimitInfo.maxComputeWorkgroupSizeY.default;
+ const kBufferSize = kNumBufferElements * 4;
+ const kBufferData = new Uint32Array([...iterRange(kNumBufferElements, x => x)]);
+
+ const device: GPUDevice = await adapter.requestDevice();
+ const commands = [];
+
+ for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) {
+ const pipeline = device.createComputePipeline({
+ layout: 'auto',
+ compute: {
+ module: device.createShaderModule({
+ code: `
+ struct Buffer { data: array<u32>, };
+
+ @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+ @compute @workgroup_size(1) fn main(
+ @builtin(global_invocation_id) id: vec3<u32>) {
+ buffer.data[id.x * ${kLimitInfo.maxComputeWorkgroupSizeX.default}u + id.y] =
+ buffer.data[id.x * ${kLimitInfo.maxComputeWorkgroupSizeX.default}u + id.y] +
+ ${pipelineIndex}u;
+ }
+ `,
+ }),
+ entryPoint: 'main',
+ },
+ });
+ for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) {
+ const buffer = device.createBuffer({
+ size: kBufferSize,
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
+ });
+ device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length);
+ const bindgroup = device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [{ binding: 0, resource: { buffer } }],
+ });
+
+ const encoder = device.createCommandEncoder();
+ const pass = encoder.beginComputePass();
+ pass.setPipeline(pipeline);
+ pass.setBindGroup(0, bindgroup);
+ pass.dispatchWorkgroups(
+ kLimitInfo.maxComputeWorkgroupSizeX.default,
+ kLimitInfo.maxComputeWorkgroupSizeY.default
+ );
+ pass.end();
+ commands.push(encoder.finish());
+ }
+ }
+ return { device, objects: commands };
+}
+
+/**
+ * Creates a device, a valid render pipeline, valid resources for the pipeline, and
+ * ties them together into a set of render commands ready to be submitted to the GPU
+ * queue. Does not submit the commands in order to make sure that all resources are
+ * kept alive until the device is destroyed.
+ */
+async function createDeviceAndRenderCommands(adapter: GPUAdapter) {
+ // Constants are computed such that per run, this function should allocate roughly 2G
+ // worth of data. This should be sufficient as we run these creation functions many
+ // times. If the data backing the created objects is not recycled we should OOM.
+ const kNumPipelines = 128;
+ const kNumBindgroups = 128;
+ const kSize = 128;
+ const kBufferData = new Uint32Array([...iterRange(kSize * kSize, x => x)]);
+
+ const device: GPUDevice = await adapter.requestDevice();
+ const commands = [];
+
+ for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) {
+ const module = device.createShaderModule({
+ code: `
+ struct Buffer { data: array<vec4<u32>, ${(kSize * kSize) / 4}>, };
+
+ @group(0) @binding(0) var<uniform> buffer: Buffer;
+ @vertex fn vmain(
+ @builtin(vertex_index) vertexIndex: u32
+ ) -> @builtin(position) vec4<f32> {
+ let index = buffer.data[vertexIndex / 4u][vertexIndex % 4u];
+ let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u));
+ let r = vec2<f32>(1.0 / f32(${kSize}));
+ let a = 2.0 * r;
+ let b = r - vec2<f32>(1.0);
+ return vec4<f32>(fma(position, a, b), 0.0, 1.0);
+ }
+
+ @fragment fn fmain() -> @location(0) vec4<f32> {
+ return vec4<f32>(${pipelineIndex}.0 / ${kNumPipelines}.0, 0.0, 0.0, 1.0);
+ }
+ `,
+ });
+ const pipeline = device.createRenderPipeline({
+ layout: device.createPipelineLayout({
+ bindGroupLayouts: [
+ device.createBindGroupLayout({
+ entries: [
+ {
+ binding: 0,
+ visibility: GPUShaderStage.VERTEX,
+ buffer: { type: 'uniform' },
+ },
+ ],
+ }),
+ ],
+ }),
+ vertex: { module, entryPoint: 'vmain', buffers: [] },
+ primitive: { topology: 'point-list' },
+ fragment: {
+ targets: [{ format: 'rgba8unorm' }],
+ module,
+ entryPoint: 'fmain',
+ },
+ });
+ for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) {
+ const buffer = device.createBuffer({
+ size: kSize * kSize * 4,
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
+ });
+ device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length);
+ const bindgroup = device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [{ binding: 0, resource: { buffer } }],
+ });
+ const texture = device.createTexture({
+ size: [kSize, kSize],
+ usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+ format: 'rgba8unorm',
+ });
+
+ const encoder = device.createCommandEncoder();
+ const pass = encoder.beginRenderPass({
+ colorAttachments: [
+ {
+ view: texture.createView(),
+ loadOp: 'load',
+ storeOp: 'store',
+ },
+ ],
+ });
+ pass.setPipeline(pipeline);
+ pass.setBindGroup(0, bindgroup);
+ pass.draw(kSize * kSize);
+ pass.end();
+ commands.push(encoder.finish());
+ }
+ }
+ return { device, objects: commands };
+}
+
+/**
+ * Creates a device and a large number of buffers which are immediately written to. The
+ * buffers are expected to be kept alive until they or the device are destroyed.
+ */
+async function createDeviceAndBuffers(adapter: GPUAdapter) {
+ // Currently we just allocate 2G of memory using 512MB blocks. We may be able to
+ // increase this to hit OOM instead, but on integrated GPUs on Metal, this can cause
+ // kernel panics at the moment, and it can greatly increase the time needed.
+ const kTotalMemorySize = 2 * 1024 * 1024 * 1024;
+ const kMemoryBlockSize = 512 * 1024 * 1024;
+ const kMemoryBlockData = new Uint8Array(kMemoryBlockSize);
+
+ const device: GPUDevice = await adapter.requestDevice();
+ const buffers = [];
+ for (let memory = 0; memory < kTotalMemorySize; memory += kMemoryBlockSize) {
+ const buffer = device.createBuffer({
+ size: kMemoryBlockSize,
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
+ });
+
+ // Write out to the buffer to make sure that it has backing memory.
+ device.queue.writeBuffer(buffer, 0, kMemoryBlockData, 0, kMemoryBlockData.length);
+ buffers.push(buffer);
+ }
+ return { device, objects: buffers };
+}
+
+g.test('coexisting')
+ .desc(`Tests allocation of many coexisting GPUDevice objects.`)
+ .params(u => u.combine('adapterType', kAdapterTypes))
+ .fn(async t => {
+ const { adapterType } = t.params;
+ const adapter = await getGPU().requestAdapter(kAdapterTypeOptions[adapterType]);
+ assert(adapter !== null, 'Failed to get adapter.');
+
+ // Based on Vulkan conformance test requirement to be able to create multiple devices.
+ const kNumDevices = 5;
+
+ const devices = [];
+ for (let i = 0; i < kNumDevices; ++i) {
+ const device: GPUDevice = await adapter.requestDevice();
+ devices.push(device);
+ }
+ });
+
+g.test('continuous,with_destroy')
+ .desc(
+ `Tests allocation and destruction of many GPUDevice objects over time. Device objects
+are sequentially requested with a series of device allocated objects created on each
+device. The devices are then destroyed to verify that the device and the device allocated
+objects are recycled over a very large number of iterations.`
+ )
+ .params(u => u.combine('adapterType', kAdapterTypes))
+ .fn(async t => {
+ const { adapterType } = t.params;
+ const adapter = await getGPU().requestAdapter(kAdapterTypeOptions[adapterType]);
+ assert(adapter !== null, 'Failed to get adapter.');
+
+ // Since devices are being destroyed, we should be able to create many devices.
+ const kNumDevices = 100;
+ const kFunctions = [
+ createDeviceAndBuffers,
+ createDeviceAndComputeCommands,
+ createDeviceAndRenderCommands,
+ ];
+
+ const deviceList = [];
+ const objectLists = [];
+ for (let i = 0; i < kNumDevices; ++i) {
+ const { device, objects } = await kFunctions[i % kFunctions.length](adapter);
+ t.expect(objects.length > 0, 'unable to allocate any objects');
+ deviceList.push(device);
+ objectLists.push(objects);
+ device.destroy();
+ }
+ });
+
+g.test('continuous,no_destroy')
+ .desc(
+ `Tests allocation and implicit GC of many GPUDevice objects over time. Objects are
+sequentially requested and dropped for GC over a very large number of iterations. Note
+that without destroy, we do not create device allocated objects because that will
+implicitly keep the device in scope.`
+ )
+ .params(u => u.combine('adapterType', kAdapterTypes))
+ .fn(async t => {
+ const { adapterType } = t.params;
+ const adapter = await getGPU().requestAdapter(kAdapterTypeOptions[adapterType]);
+ assert(adapter !== null, 'Failed to get adapter.');
+
+ const kNumDevices = 10_000;
+ for (let i = 1; i <= kNumDevices; ++i) {
+ await (async () => {
+ t.expect((await adapter.requestDevice()) !== null, 'unexpected null device');
+ })();
+ if (i % 10 === 0) {
+ // We need to occasionally wait for GC to clear out stale devices.
+ await attemptGarbageCollection();
+ }
+ }
+ });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/compute/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/compute/README.txt
new file mode 100644
index 0000000000..b41aabc66b
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/compute/README.txt
@@ -0,0 +1 @@
+Stress tests covering operations specific to GPUComputePipeline and GPUComputePass.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/compute/compute_pass.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/compute/compute_pass.spec.ts
new file mode 100644
index 0000000000..76979f9fbb
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/compute/compute_pass.spec.ts
@@ -0,0 +1,243 @@
+export const description = `
+Stress tests covering GPUComputePassEncoder usage.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { assert, iterRange } from '../../common/util/util.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('many')
+ .desc(
+ `Tests execution of a huge number of compute passes using the same
+GPUComputePipeline.`
+ )
+ .fn(async t => {
+ const kNumElements = 64;
+ const data = new Uint32Array([...iterRange(kNumElements, x => x)]);
+ const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+ const pipeline = t.device.createComputePipeline({
+ layout: 'auto',
+ compute: {
+ module: t.device.createShaderModule({
+ code: `
+ struct Buffer { data: array<u32>, };
+ @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+ @compute @workgroup_size(1) fn main(
+ @builtin(global_invocation_id) id: vec3<u32>) {
+ buffer.data[id.x] = buffer.data[id.x] + 1u;
+ }
+ `,
+ }),
+ entryPoint: 'main',
+ },
+ });
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [{ binding: 0, resource: { buffer } }],
+ });
+ const kNumIterations = 250_000;
+ for (let i = 0; i < kNumIterations; ++i) {
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginComputePass();
+ pass.setPipeline(pipeline);
+ pass.setBindGroup(0, bindGroup);
+ pass.dispatchWorkgroups(kNumElements);
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ }
+ t.expectGPUBufferValuesEqual(
+ buffer,
+ new Uint32Array([...iterRange(kNumElements, x => x + kNumIterations)])
+ );
+ });
+
+g.test('pipeline_churn')
+ .desc(
+ `Tests execution of a huge number of compute passes which each use a different
+GPUComputePipeline.`
+ )
+ .fn(async t => {
+ const buffer = t.makeBufferWithContents(
+ new Uint32Array([0]),
+ GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
+ );
+ const kNumIterations = 10_000;
+ const stages = iterRange(kNumIterations, i => ({
+ module: t.device.createShaderModule({
+ code: `
+ struct Buffer { data: u32, };
+ @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+ @compute @workgroup_size(1) fn main${i}() {
+ buffer.data = buffer.data + 1u;
+ }
+ `,
+ }),
+ entryPoint: `main${i}`,
+ }));
+ for (const compute of stages) {
+ const encoder = t.device.createCommandEncoder();
+ const pipeline = t.device.createComputePipeline({ layout: 'auto', compute });
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [{ binding: 0, resource: { buffer } }],
+ });
+ const pass = encoder.beginComputePass();
+ pass.setPipeline(pipeline);
+ pass.setBindGroup(0, bindGroup);
+ pass.dispatchWorkgroups(1);
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ }
+ t.expectGPUBufferValuesEqual(buffer, new Uint32Array([kNumIterations]));
+ });
+
+g.test('bind_group_churn')
+ .desc(
+ `Tests execution of compute passes which switch between a huge number of bind
+groups.`
+ )
+ .fn(async t => {
+ const kNumElements = 64;
+ const data = new Uint32Array([...iterRange(kNumElements, x => x)]);
+ const buffer1 = t.makeBufferWithContents(
+ data,
+ GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
+ );
+ const buffer2 = t.makeBufferWithContents(
+ data,
+ GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
+ );
+ const module = t.device.createShaderModule({
+ code: `
+ struct Buffer { data: array<u32>, };
+ @group(0) @binding(0) var<storage, read_write> buffer1: Buffer;
+ @group(0) @binding(1) var<storage, read_write> buffer2: Buffer;
+ @compute @workgroup_size(1) fn main(
+ @builtin(global_invocation_id) id: vec3<u32>) {
+ buffer1.data[id.x] = buffer1.data[id.x] + 1u;
+ buffer2.data[id.x] = buffer2.data[id.x] + 2u;
+ }
+ `,
+ });
+ const kNumIterations = 250_000;
+ const pipeline = t.device.createComputePipeline({
+ layout: 'auto',
+ compute: { module, entryPoint: 'main' },
+ });
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginComputePass();
+ pass.setPipeline(pipeline);
+ for (let i = 0; i < kNumIterations; ++i) {
+ const buffer1Binding = i % 2;
+ const buffer2Binding = buffer1Binding ^ 1;
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [
+ { binding: buffer1Binding, resource: { buffer: buffer1 } },
+ { binding: buffer2Binding, resource: { buffer: buffer2 } },
+ ],
+ });
+ pass.setBindGroup(0, bindGroup);
+ pass.dispatchWorkgroups(kNumElements);
+ }
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ const kTotalAddition = (kNumIterations / 2) * 3;
+ t.expectGPUBufferValuesEqual(
+ buffer1,
+ new Uint32Array([...iterRange(kNumElements, x => x + kTotalAddition)])
+ );
+ t.expectGPUBufferValuesEqual(
+ buffer2,
+ new Uint32Array([...iterRange(kNumElements, x => x + kTotalAddition)])
+ );
+ });
+
+g.test('many_dispatches')
+ .desc(`Tests execution of compute passes with a huge number of dispatch calls`)
+ .fn(async t => {
+ const kNumElements = 64;
+ const data = new Uint32Array([...iterRange(kNumElements, x => x)]);
+ const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+ const module = t.device.createShaderModule({
+ code: `
+ struct Buffer { data: array<u32>, };
+ @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+ @compute @workgroup_size(1) fn main(
+ @builtin(global_invocation_id) id: vec3<u32>) {
+ buffer.data[id.x] = buffer.data[id.x] + 1u;
+ }
+ `,
+ });
+ const kNumIterations = 1_000_000;
+ const pipeline = t.device.createComputePipeline({
+ layout: 'auto',
+ compute: { module, entryPoint: 'main' },
+ });
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginComputePass();
+ pass.setPipeline(pipeline);
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [{ binding: 0, resource: { buffer } }],
+ });
+ pass.setBindGroup(0, bindGroup);
+ for (let i = 0; i < kNumIterations; ++i) {
+ pass.dispatchWorkgroups(kNumElements);
+ }
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ t.expectGPUBufferValuesEqual(
+ buffer,
+ new Uint32Array([...iterRange(kNumElements, x => x + kNumIterations)])
+ );
+ });
+
+g.test('huge_dispatches')
+ .desc(`Tests execution of compute passes with huge dispatch calls`)
+ .fn(async t => {
+ const kDimensions = [512, 512, 128];
+ kDimensions.forEach(x => {
+ assert(x <= t.device.limits.maxComputeWorkgroupsPerDimension);
+ });
+
+ const kNumElements = kDimensions[0] * kDimensions[1] * kDimensions[2];
+ const data = new Uint32Array([...iterRange(kNumElements, x => x)]);
+ const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+ const module = t.device.createShaderModule({
+ code: `
+ struct Buffer { data: array<u32>, };
+ @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+ @compute @workgroup_size(1) fn main(
+ @builtin(global_invocation_id) id: vec3<u32>) {
+ let index = (id.z * 512u + id.y) * 512u + id.x;
+ buffer.data[index] = buffer.data[index] + 1u;
+ }
+ `,
+ });
+ const kNumIterations = 16;
+ const pipeline = t.device.createComputePipeline({
+ layout: 'auto',
+ compute: { module, entryPoint: 'main' },
+ });
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [{ binding: 0, resource: { buffer } }],
+ });
+ for (let i = 0; i < kNumIterations; ++i) {
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginComputePass();
+ pass.setBindGroup(0, bindGroup);
+ pass.setPipeline(pipeline);
+ pass.dispatchWorkgroups(kDimensions[0], kDimensions[1], kDimensions[2]);
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ await t.device.queue.onSubmittedWorkDone();
+ }
+ t.expectGPUBufferValuesEqual(
+ buffer,
+ new Uint32Array([...iterRange(kNumElements, x => x + kNumIterations)])
+ );
+ });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/device/README.txt
new file mode 100644
index 0000000000..6ee89fc5fd
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/README.txt
@@ -0,0 +1,2 @@
+Stress tests covering GPUDevice usage, primarily focused on stressing allocation
+of various resources.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_allocation.spec.ts
new file mode 100644
index 0000000000..5d428f3edb
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_allocation.spec.ts
@@ -0,0 +1,65 @@
+export const description = `
+Stress tests for allocation of GPUBindGroup objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+ .desc(`Tests allocation of many coexisting GPUBindGroup objects.`)
+ .fn(t => {
+ const kNumGroups = 1_000_000;
+ const buffer = t.device.createBuffer({
+ size: 64,
+ usage: GPUBufferUsage.STORAGE,
+ });
+ const layout = t.device.createBindGroupLayout({
+ entries: [
+ {
+ binding: 0,
+ visibility: GPUShaderStage.COMPUTE,
+ buffer: { type: 'storage' },
+ },
+ ],
+ });
+ const bindGroups = [];
+ for (let i = 0; i < kNumGroups; ++i) {
+ bindGroups.push(
+ t.device.createBindGroup({
+ layout,
+ entries: [{ binding: 0, resource: { buffer } }],
+ })
+ );
+ }
+ });
+
+g.test('continuous')
+ .desc(
+ `Tests allocation and implicit GC of many GPUBindGroup objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+ )
+ .fn(t => {
+ const kNumGroups = 5_000_000;
+ const buffer = t.device.createBuffer({
+ size: 64,
+ usage: GPUBufferUsage.STORAGE,
+ });
+ const layout = t.device.createBindGroupLayout({
+ entries: [
+ {
+ binding: 0,
+ visibility: GPUShaderStage.COMPUTE,
+ buffer: { type: 'storage' },
+ },
+ ],
+ });
+ for (let i = 0; i < kNumGroups; ++i) {
+ t.device.createBindGroup({
+ layout,
+ entries: [{ binding: 0, resource: { buffer } }],
+ });
+ }
+ });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_layout_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_layout_allocation.spec.ts
new file mode 100644
index 0000000000..0933cd1b59
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_layout_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPUBindGroupLayout objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+ .desc(`Tests allocation of many coexisting GPUBindGroupLayout objects.`)
+ .unimplemented();
+
+g.test('continuous')
+ .desc(
+ `Tests allocation and implicit GC of many GPUBindGroupLayout objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/buffer_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/buffer_allocation.spec.ts
new file mode 100644
index 0000000000..f55ec79c44
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/buffer_allocation.spec.ts
@@ -0,0 +1,25 @@
+export const description = `
+Stress tests for allocation of GPUBuffer objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting').desc(`Tests allocation of many coexisting GPUBuffer objects.`).unimplemented();
+
+g.test('continuous,with_destroy')
+ .desc(
+ `Tests allocation and destruction of many GPUBuffer objects over time. Objects
+are sequentially created and destroyed over a very large number of iterations.`
+ )
+ .unimplemented();
+
+g.test('continuous,no_destroy')
+ .desc(
+ `Tests allocation and implicit GC of many GPUBuffer objects over time. Objects
+are sequentially created and dropped for GC over a very large number of
+iterations.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/command_encoder_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/command_encoder_allocation.spec.ts
new file mode 100644
index 0000000000..e41769ee06
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/command_encoder_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPUCommandEncoder objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+ .desc(`Tests allocation of many coexisting GPUCommandEncoder objects.`)
+ .unimplemented();
+
+g.test('continuous')
+ .desc(
+ `Tests allocation and implicit GC of many GPUCommandEncoder objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/compute_pipeline_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/compute_pipeline_allocation.spec.ts
new file mode 100644
index 0000000000..5c03bc9674
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/compute_pipeline_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPUComputePipeline objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+ .desc(`Tests allocation of many coexisting GPUComputePipeline objects.`)
+ .unimplemented();
+
+g.test('continuous')
+ .desc(
+ `Tests allocation and implicit GC of many GPUComputePipeline objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/pipeline_layout_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/pipeline_layout_allocation.spec.ts
new file mode 100644
index 0000000000..15d417fd7e
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/pipeline_layout_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPUPipelineLayout objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+ .desc(`Tests allocation of many coexisting GPUPipelineLayout objects.`)
+ .unimplemented();
+
+g.test('continuous')
+ .desc(
+ `Tests allocation and implicit GC of many GPUPipelineLayout objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/query_set_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/query_set_allocation.spec.ts
new file mode 100644
index 0000000000..757645cbf6
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/query_set_allocation.spec.ts
@@ -0,0 +1,27 @@
+export const description = `
+Stress tests for allocation of GPUQuerySet objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+ .desc(`Tests allocation of many coexisting GPUQuerySet objects.`)
+ .unimplemented();
+
+g.test('continuous,with_destroy')
+ .desc(
+ `Tests allocation and destruction of many GPUQuerySet objects over time. Objects
+are sequentially created and destroyed over a very large number of iterations.`
+ )
+ .unimplemented();
+
+g.test('continuous,no_destroy')
+ .desc(
+ `Tests allocation and implicit GC of many GPUQuerySet objects over time. Objects
+are sequentially created and dropped for GC over a very large number of
+iterations.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/render_bundle_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/render_bundle_allocation.spec.ts
new file mode 100644
index 0000000000..d7448412a1
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/render_bundle_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPURenderBundle objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+ .desc(`Tests allocation of many coexisting GPURenderBundle objects.`)
+ .unimplemented();
+
+g.test('continuous')
+ .desc(
+ `Tests allocation and implicit GC of many GPURenderBundle objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/render_pipeline_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/render_pipeline_allocation.spec.ts
new file mode 100644
index 0000000000..21eb92cf7c
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/render_pipeline_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPURenderPipeline objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+ .desc(`Tests allocation of many coexisting GPURenderPipeline objects.`)
+ .unimplemented();
+
+g.test('continuous')
+ .desc(
+ `Tests allocation and implicit GC of many GPURenderPipeline objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/sampler_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/sampler_allocation.spec.ts
new file mode 100644
index 0000000000..c34dae3f67
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/sampler_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPUSampler objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+ .desc(`Tests allocation of many coexisting GPUSampler objects.`)
+ .unimplemented();
+
+g.test('continuous')
+ .desc(
+ `Tests allocation and implicit GC of many GPUSampler objects over time. Objects
+are sequentially created and dropped for GC over a very large number of
+iterations.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/shader_module_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/shader_module_allocation.spec.ts
new file mode 100644
index 0000000000..97ef73d2c9
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/shader_module_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPUShaderModule objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+ .desc(`Tests allocation of many coexisting GPUShaderModule objects.`)
+ .unimplemented();
+
+g.test('continuous')
+ .desc(
+ `Tests allocation and implicit GC of many GPUShaderModule objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/texture_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/texture_allocation.spec.ts
new file mode 100644
index 0000000000..5cef598804
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/texture_allocation.spec.ts
@@ -0,0 +1,27 @@
+export const description = `
+Stress tests for allocation of GPUTexture objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+ .desc(`Tests allocation of many coexisting GPUTexture objects.`)
+ .unimplemented();
+
+g.test('continuous,with_destroy')
+ .desc(
+ `Tests allocation and destruction of many GPUTexture objects over time. Objects
+are sequentially created and destroyed over a very large number of iterations.`
+ )
+ .unimplemented();
+
+g.test('continuous,no_destroy')
+ .desc(
+ `Tests allocation and implicit GC of many GPUTexture objects over time. Objects
+are sequentially created and dropped for GC over a very large number of
+iterations.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/listing.ts b/dom/webgpu/tests/cts/checkout/src/stress/listing.ts
new file mode 100644
index 0000000000..823639c692
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/listing.ts
@@ -0,0 +1,5 @@
+/* eslint-disable import/no-restricted-paths */
+import { TestSuiteListing } from '../common/internal/test_suite_listing.js';
+import { makeListing } from '../common/tools/crawl.js';
+
+export const listing: Promise<TestSuiteListing> = makeListing(__filename);
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/memory/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/memory/README.txt
new file mode 100644
index 0000000000..ac0c90bfb7
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/memory/README.txt
@@ -0,0 +1 @@
+Stress tests covering allocation and usage of various types of GPUBuffer objects.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/memory/churn.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/memory/churn.spec.ts
new file mode 100644
index 0000000000..fcb899eb29
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/memory/churn.spec.ts
@@ -0,0 +1,17 @@
+export const description = `
+Stress tests covering robustness in the presence of heavy buffer and texture
+memory churn.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('churn')
+ .desc(
+ `Allocates and populates a huge number of buffers and textures over time,
+retaining some while dropping or explicitly destroying others. When finished,
+verifies the expected contents of any remaining buffers and textures.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/memory/oom.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/memory/oom.spec.ts
new file mode 100644
index 0000000000..3ab542c940
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/memory/oom.spec.ts
@@ -0,0 +1,45 @@
+export const description = `
+Stress tests covering robustness when available VRAM is exhausted.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+import { exhaustVramUntilUnder64MB } from '../../webgpu/util/memory.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('vram_oom')
+ .desc(`Tests that we can allocate buffers until we run out of VRAM.`)
+ .fn(async t => {
+ await exhaustVramUntilUnder64MB(t.device);
+ });
+
+g.test('get_mapped_range')
+ .desc(
+ `Tests getMappedRange on a mappedAtCreation GPUBuffer that failed allocation due
+to OOM. This should throw a RangeError, but below a certain threshold may just
+crash the page.`
+ )
+ .unimplemented();
+
+g.test('map_after_vram_oom')
+ .desc(
+ `Allocates tons of buffers and textures with varying mapping states (unmappable,
+mappable, mapAtCreation, mapAtCreation-then-unmapped) until OOM; then attempts
+to mapAsync all the mappable objects.`
+ )
+ .unimplemented();
+
+g.test('validation_vs_oom')
+ .desc(
+ `Tests that calls affected by both OOM and validation errors expose the
+validation error with precedence.`
+ )
+ .unimplemented();
+
+g.test('recovery')
+ .desc(
+ `Tests that after going VRAM-OOM, destroying allocated resources eventually
+allows new resources to be allocated.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queries/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/queries/README.txt
new file mode 100644
index 0000000000..fe466205c4
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queries/README.txt
@@ -0,0 +1 @@
+Stress tests covering use of GPUQuerySet objects and related operations.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queries/occlusion.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/queries/occlusion.spec.ts
new file mode 100644
index 0000000000..056d6bdaea
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queries/occlusion.spec.ts
@@ -0,0 +1,10 @@
+export const description = `
+Stress tests for occlusion queries.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('many').desc(`Tests a huge number of occlusion queries in a render pass.`).unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queries/pipeline_statistics.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/queries/pipeline_statistics.spec.ts
new file mode 100644
index 0000000000..ce8a16f462
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queries/pipeline_statistics.spec.ts
@@ -0,0 +1,38 @@
+export const description = `
+Stress tests for pipeline statistics queries.
+
+TODO: pipeline statistics queries are removed from core; consider moving tests to another suite.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('render_pass_one_query_set')
+ .desc(
+ `Tests a huge number of pipeline statistics queries over a single query set in a
+single render pass.`
+ )
+ .unimplemented();
+
+g.test('render_pass_many_query_sets')
+ .desc(
+ `Tests a huge number of pipeline statistics queries over a huge number of query
+sets in a single render pass.`
+ )
+ .unimplemented();
+
+g.test('compute_pass_one_query_set')
+ .desc(
+ `Tests a huge number of pipeline statistics queries over a single query set in a
+single compute pass.`
+ )
+ .unimplemented();
+
+g.test('compute_pass_many_query_sets')
+ .desc(
+ `Tests a huge number of pipeline statistics queries over a huge number of query
+sets in a single compute pass.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queries/resolve.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/queries/resolve.spec.ts
new file mode 100644
index 0000000000..da67977395
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queries/resolve.spec.ts
@@ -0,0 +1,15 @@
+export const description = `
+Stress tests for query resolution.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('many_large_sets')
+ .desc(
+ `Tests a huge number of resolveQuerySet operations on a huge number of
+query sets between render passes.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queries/timestamps.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/queries/timestamps.spec.ts
new file mode 100644
index 0000000000..da3e1eb472
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queries/timestamps.spec.ts
@@ -0,0 +1,50 @@
+export const description = `
+Stress tests for timestamp queries.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('command_encoder_one_query_set')
+ .desc(
+ `Tests a huge number of timestamp queries over a single query set between render
+passes on a single command encoder.`
+ )
+ .unimplemented();
+
+g.test('command_encoder_many_query_sets')
+ .desc(
+ `Tests a huge number of timestamp queries over many query sets between render
+passes on a single command encoder.`
+ )
+ .unimplemented();
+
+g.test('render_pass_one_query_set')
+ .desc(
+ `Tests a huge number of timestamp queries over a single query set in a single
+render pass.`
+ )
+ .unimplemented();
+
+g.test('render_pass_many_query_sets')
+ .desc(
+ `Tests a huge number of timestamp queries over a huge number of query sets in a
+single render pass.`
+ )
+ .unimplemented();
+
+g.test('compute_pass_one_query_set')
+ .desc(
+ `Tests a huge number of timestamp queries over a single query set in a single
+compute pass.`
+ )
+ .unimplemented();
+
+g.test('compute_pass_many_query_sets')
+ .desc(
+ `Tests a huge number of timestamp queries over a huge number of query sets in a
+single compute pass.`
+ )
+ .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queue/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/queue/README.txt
new file mode 100644
index 0000000000..adb4ec40ce
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queue/README.txt
@@ -0,0 +1 @@
+Stress tests covering GPUQueue usage.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queue/submit.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/queue/submit.spec.ts
new file mode 100644
index 0000000000..e1551727e2
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queue/submit.spec.ts
@@ -0,0 +1,102 @@
+export const description = `
+Stress tests for command submission to GPUQueue objects.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { iterRange } from '../../common/util/util.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('huge_command_buffer')
+ .desc(
+ `Tests submission of huge command buffers to a GPUQueue. Huge buffers are
+encoded by chaining together long sequences of compute passes, with expected
+results verified at the end of the test.`
+ )
+ .fn(async t => {
+ const kNumElements = 64;
+ const data = new Uint32Array([...iterRange(kNumElements, x => x)]);
+ const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+ const pipeline = t.device.createComputePipeline({
+ layout: 'auto',
+ compute: {
+ module: t.device.createShaderModule({
+ code: `
+ struct Buffer { data: array<u32>, };
+ @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+ @compute @workgroup_size(1) fn main(
+ @builtin(global_invocation_id) id: vec3<u32>) {
+ buffer.data[id.x] = buffer.data[id.x] + 1u;
+ }
+ `,
+ }),
+ entryPoint: 'main',
+ },
+ });
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [{ binding: 0, resource: { buffer } }],
+ });
+ const encoder = t.device.createCommandEncoder();
+ const kNumIterations = 500_000;
+ for (let i = 0; i < kNumIterations; ++i) {
+ const pass = encoder.beginComputePass();
+ pass.setPipeline(pipeline);
+ pass.setBindGroup(0, bindGroup);
+ pass.dispatchWorkgroups(kNumElements);
+ pass.end();
+ }
+ t.device.queue.submit([encoder.finish()]);
+ t.expectGPUBufferValuesEqual(
+ buffer,
+ new Uint32Array([...iterRange(kNumElements, x => x + kNumIterations)])
+ );
+ });
+
+g.test('many_command_buffers')
+ .desc(
+ `Tests submission of a huge number of command buffers to a GPUQueue by a single
+submit() call.`
+ )
+ .fn(async t => {
+ const kNumElements = 64;
+ const data = new Uint32Array([...iterRange(kNumElements, x => x)]);
+ const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+ const pipeline = t.device.createComputePipeline({
+ layout: 'auto',
+ compute: {
+ module: t.device.createShaderModule({
+ code: `
+ struct Buffer { data: array<u32>, };
+ @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+ @compute @workgroup_size(1) fn main(
+ @builtin(global_invocation_id) id: vec3<u32>) {
+ buffer.data[id.x] = buffer.data[id.x] + 1u;
+ }
+ `,
+ }),
+ entryPoint: 'main',
+ },
+ });
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [{ binding: 0, resource: { buffer } }],
+ });
+ const kNumIterations = 500_000;
+ const buffers = [];
+ for (let i = 0; i < kNumIterations; ++i) {
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginComputePass();
+ pass.setPipeline(pipeline);
+ pass.setBindGroup(0, bindGroup);
+ pass.dispatchWorkgroups(kNumElements);
+ pass.end();
+ buffers.push(encoder.finish());
+ }
+ t.device.queue.submit(buffers);
+ t.expectGPUBufferValuesEqual(
+ buffer,
+ new Uint32Array([...iterRange(kNumElements, x => x + kNumIterations)])
+ );
+ });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/render/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/render/README.txt
new file mode 100644
index 0000000000..7dcc73fbc3
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/render/README.txt
@@ -0,0 +1,3 @@
+Stress tests covering operations specific to GPURenderPipeline, GPURenderPass, and GPURenderBundle.
+
+- Issuing draw calls with huge counts.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/render/render_pass.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/render/render_pass.spec.ts
new file mode 100644
index 0000000000..d064e5f95b
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/render/render_pass.spec.ts
@@ -0,0 +1,353 @@
+export const description = `
+Stress tests covering GPURenderPassEncoder usage.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { range } from '../../common/util/util.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('many')
+ .desc(
+ `Tests execution of a huge number of render passes using the same GPURenderPipeline. This uses
+a single render pass for every output fragment, with each pass executing a one-vertex draw call.`
+ )
+ .fn(async t => {
+ const kSize = 1024;
+ const module = t.device.createShaderModule({
+ code: `
+ @vertex fn vmain(@builtin(vertex_index) index: u32)
+ -> @builtin(position) vec4<f32> {
+ let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u));
+ let r = vec2<f32>(1.0 / f32(${kSize}));
+ let a = 2.0 * r;
+ let b = r - vec2<f32>(1.0);
+ return vec4<f32>(fma(position, a, b), 0.0, 1.0);
+ }
+ @fragment fn fmain() -> @location(0) vec4<f32> {
+ return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+ }
+ `,
+ });
+ const pipeline = t.device.createRenderPipeline({
+ layout: 'auto',
+ vertex: { module, entryPoint: 'vmain', buffers: [] },
+ primitive: { topology: 'point-list' },
+ fragment: {
+ targets: [{ format: 'rgba8unorm' }],
+ module,
+ entryPoint: 'fmain',
+ },
+ });
+ const renderTarget = t.device.createTexture({
+ size: [kSize, kSize],
+ usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+ format: 'rgba8unorm',
+ });
+ const renderPassDescriptor: GPURenderPassDescriptor = {
+ colorAttachments: [
+ {
+ view: renderTarget.createView(),
+ loadOp: 'load',
+ storeOp: 'store',
+ },
+ ],
+ };
+ const encoder = t.device.createCommandEncoder();
+ range(kSize * kSize, i => {
+ const pass = encoder.beginRenderPass(renderPassDescriptor);
+ pass.setPipeline(pipeline);
+ pass.draw(1, 1, i);
+ pass.end();
+ });
+ t.device.queue.submit([encoder.finish()]);
+ t.expectSingleColor(renderTarget, 'rgba8unorm', {
+ size: [kSize, kSize, 1],
+ exp: { R: 1, G: 0, B: 1, A: 1 },
+ });
+ });
+
+g.test('pipeline_churn')
+ .desc(
+ `Tests execution of a large number of render pipelines, each within its own render pass. Each
+pass does a single draw call, with one pass per output fragment.`
+ )
+ .fn(async t => {
+ const kWidth = 64;
+ const kHeight = 8;
+ const module = t.device.createShaderModule({
+ code: `
+ @vertex fn vmain(@builtin(vertex_index) index: u32)
+ -> @builtin(position) vec4<f32> {
+ let position = vec2<f32>(f32(index % ${kWidth}u), f32(index / ${kWidth}u));
+ let size = vec2<f32>(f32(${kWidth}), f32(${kHeight}));
+ let r = vec2<f32>(1.0) / size;
+ let a = 2.0 * r;
+ let b = r - vec2<f32>(1.0);
+ return vec4<f32>(fma(position, a, b), 0.0, 1.0);
+ }
+ @fragment fn fmain() -> @location(0) vec4<f32> {
+ return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+ }
+ `,
+ });
+ const renderTarget = t.device.createTexture({
+ size: [kWidth, kHeight],
+ usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+ format: 'rgba8unorm',
+ });
+ const depthTarget = t.device.createTexture({
+ size: [kWidth, kHeight],
+ usage: GPUTextureUsage.RENDER_ATTACHMENT,
+ format: 'depth24plus-stencil8',
+ });
+ const renderPassDescriptor: GPURenderPassDescriptor = {
+ colorAttachments: [
+ {
+ view: renderTarget.createView(),
+ loadOp: 'load',
+ storeOp: 'store',
+ },
+ ],
+ depthStencilAttachment: {
+ view: depthTarget.createView(),
+ depthLoadOp: 'load',
+ depthStoreOp: 'store',
+ stencilLoadOp: 'load',
+ stencilStoreOp: 'discard',
+ },
+ };
+ const encoder = t.device.createCommandEncoder();
+ range(kWidth * kHeight, i => {
+ const pipeline = t.device.createRenderPipeline({
+ layout: 'auto',
+ vertex: { module, entryPoint: 'vmain', buffers: [] },
+ primitive: { topology: 'point-list' },
+ depthStencil: {
+ format: 'depth24plus-stencil8',
+
+ // Not really used, but it ensures that each pipeline is unique.
+ depthBias: i,
+ },
+ fragment: {
+ targets: [{ format: 'rgba8unorm' }],
+ module,
+ entryPoint: 'fmain',
+ },
+ });
+ const pass = encoder.beginRenderPass(renderPassDescriptor);
+ pass.setPipeline(pipeline);
+ pass.draw(1, 1, i);
+ pass.end();
+ });
+ t.device.queue.submit([encoder.finish()]);
+ t.expectSingleColor(renderTarget, 'rgba8unorm', {
+ size: [kWidth, kHeight, 1],
+ exp: { R: 1, G: 0, B: 1, A: 1 },
+ });
+ });
+
+g.test('bind_group_churn')
+ .desc(
+ `Tests execution of render passes which switch between a huge number of bind groups. This uses
+a single render pass with a single pipeline, and one draw call per fragment of the output texture.
+Each draw call is made with a unique bind group 0, with binding 0 referencing a unique uniform
+buffer.`
+ )
+ .fn(async t => {
+ const kSize = 128;
+ const module = t.device.createShaderModule({
+ code: `
+ struct Uniforms { index: u32, };
+ @group(0) @binding(0) var<uniform> uniforms: Uniforms;
+ @vertex fn vmain() -> @builtin(position) vec4<f32> {
+ let index = uniforms.index;
+ let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u));
+ let r = vec2<f32>(1.0 / f32(${kSize}));
+ let a = 2.0 * r;
+ let b = r - vec2<f32>(1.0);
+ return vec4<f32>(fma(position, a, b), 0.0, 1.0);
+ }
+ @fragment fn fmain() -> @location(0) vec4<f32> {
+ return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+ }
+ `,
+ });
+ const layout = t.device.createBindGroupLayout({
+ entries: [
+ {
+ binding: 0,
+ visibility: GPUShaderStage.VERTEX,
+ buffer: { type: 'uniform' },
+ },
+ ],
+ });
+ const pipeline = t.device.createRenderPipeline({
+ layout: t.device.createPipelineLayout({ bindGroupLayouts: [layout] }),
+ vertex: { module, entryPoint: 'vmain', buffers: [] },
+ primitive: { topology: 'point-list' },
+ fragment: {
+ targets: [{ format: 'rgba8unorm' }],
+ module,
+ entryPoint: 'fmain',
+ },
+ });
+ const renderTarget = t.device.createTexture({
+ size: [kSize, kSize],
+ usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+ format: 'rgba8unorm',
+ });
+ const renderPassDescriptor: GPURenderPassDescriptor = {
+ colorAttachments: [
+ {
+ view: renderTarget.createView(),
+ loadOp: 'load',
+ storeOp: 'store',
+ },
+ ],
+ };
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginRenderPass(renderPassDescriptor);
+ pass.setPipeline(pipeline);
+ range(kSize * kSize, i => {
+ const buffer = t.device.createBuffer({
+ size: 4,
+ usage: GPUBufferUsage.UNIFORM,
+ mappedAtCreation: true,
+ });
+ new Uint32Array(buffer.getMappedRange())[0] = i;
+ buffer.unmap();
+ pass.setBindGroup(
+ 0,
+ t.device.createBindGroup({ layout, entries: [{ binding: 0, resource: { buffer } }] })
+ );
+ pass.draw(1, 1);
+ });
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ t.expectSingleColor(renderTarget, 'rgba8unorm', {
+ size: [kSize, kSize, 1],
+ exp: { R: 1, G: 0, B: 1, A: 1 },
+ });
+ });
+
+g.test('many_draws')
+ .desc(
+ `Tests execution of render passes with a huge number of draw calls. This uses a single
+render pass with a single pipeline, and one draw call per fragment of the output texture.`
+ )
+ .fn(async t => {
+ const kSize = 4096;
+ const module = t.device.createShaderModule({
+ code: `
+ @vertex fn vmain(@builtin(vertex_index) index: u32)
+ -> @builtin(position) vec4<f32> {
+ let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u));
+ let r = vec2<f32>(1.0 / f32(${kSize}));
+ let a = 2.0 * r;
+ let b = r - vec2<f32>(1.0);
+ return vec4<f32>(fma(position, a, b), 0.0, 1.0);
+ }
+ @fragment fn fmain() -> @location(0) vec4<f32> {
+ return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+ }
+ `,
+ });
+ const pipeline = t.device.createRenderPipeline({
+ layout: 'auto',
+ vertex: { module, entryPoint: 'vmain', buffers: [] },
+ primitive: { topology: 'point-list' },
+ fragment: {
+ targets: [{ format: 'rgba8unorm' }],
+ module,
+ entryPoint: 'fmain',
+ },
+ });
+ const renderTarget = t.device.createTexture({
+ size: [kSize, kSize],
+ usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+ format: 'rgba8unorm',
+ });
+ const renderPassDescriptor: GPURenderPassDescriptor = {
+ colorAttachments: [
+ {
+ view: renderTarget.createView(),
+ loadOp: 'load',
+ storeOp: 'store',
+ },
+ ],
+ };
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginRenderPass(renderPassDescriptor);
+ pass.setPipeline(pipeline);
+ range(kSize * kSize, i => pass.draw(1, 1, i));
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ t.expectSingleColor(renderTarget, 'rgba8unorm', {
+ size: [kSize, kSize, 1],
+ exp: { R: 1, G: 0, B: 1, A: 1 },
+ });
+ });
+
+g.test('huge_draws')
+ .desc(
+ `Tests execution of several render passes with huge draw calls. Each pass uses a single draw
+call which draws multiple vertices for each fragment of a large output texture.`
+ )
+ .fn(async t => {
+ const kSize = 32768;
+ const kTextureSize = 4096;
+ const kVertsPerFragment = (kSize * kSize) / (kTextureSize * kTextureSize);
+ const module = t.device.createShaderModule({
+ code: `
+ @vertex fn vmain(@builtin(vertex_index) vert_index: u32)
+ -> @builtin(position) vec4<f32> {
+ let index = vert_index / ${kVertsPerFragment}u;
+ let position = vec2<f32>(f32(index % ${kTextureSize}u), f32(index / ${kTextureSize}u));
+ let r = vec2<f32>(1.0 / f32(${kTextureSize}));
+ let a = 2.0 * r;
+ let b = r - vec2<f32>(1.0);
+ return vec4<f32>(fma(position, a, b), 0.0, 1.0);
+ }
+ @fragment fn fmain() -> @location(0) vec4<f32> {
+ return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+ }
+ `,
+ });
+ const pipeline = t.device.createRenderPipeline({
+ layout: 'auto',
+ vertex: { module, entryPoint: 'vmain', buffers: [] },
+ primitive: { topology: 'point-list' },
+ fragment: {
+ targets: [{ format: 'rgba8unorm' }],
+ module,
+ entryPoint: 'fmain',
+ },
+ });
+ const renderTarget = t.device.createTexture({
+ size: [kTextureSize, kTextureSize],
+ usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+ format: 'rgba8unorm',
+ });
+ const renderPassDescriptor: GPURenderPassDescriptor = {
+ colorAttachments: [
+ {
+ view: renderTarget.createView(),
+ loadOp: 'load',
+ storeOp: 'store',
+ },
+ ],
+ };
+
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginRenderPass(renderPassDescriptor);
+ pass.setPipeline(pipeline);
+ pass.draw(kSize * kSize);
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ t.expectSingleColor(renderTarget, 'rgba8unorm', {
+ size: [kTextureSize, kTextureSize, 1],
+ exp: { R: 1, G: 0, B: 1, A: 1 },
+ });
+ });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/render/vertex_buffers.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/render/vertex_buffers.spec.ts
new file mode 100644
index 0000000000..bba129feec
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/render/vertex_buffers.spec.ts
@@ -0,0 +1,130 @@
+export const description = `
+Stress tests covering vertex buffer usage.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+function createHugeVertexBuffer(t: GPUTest, size: number) {
+ const kBufferSize = size * size * 8;
+ const buffer = t.device.createBuffer({
+ size: kBufferSize,
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
+ });
+ const pipeline = t.device.createComputePipeline({
+ layout: 'auto',
+ compute: {
+ module: t.device.createShaderModule({
+ code: `
+ struct Buffer { data: array<vec2<u32>>, };
+ @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+ @compute @workgroup_size(1) fn main(
+ @builtin(global_invocation_id) id: vec3<u32>) {
+ let base = id.x * ${size}u;
+ for (var x: u32 = 0u; x < ${size}u; x = x + 1u) {
+ buffer.data[base + x] = vec2<u32>(x, id.x);
+ }
+ }
+ `,
+ }),
+ entryPoint: 'main',
+ },
+ });
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [
+ {
+ binding: 0,
+ resource: { buffer },
+ },
+ ],
+ });
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginComputePass();
+ pass.setPipeline(pipeline);
+ pass.setBindGroup(0, bindGroup);
+ pass.dispatchWorkgroups(size);
+ pass.end();
+
+ const vertexBuffer = t.device.createBuffer({
+ size: kBufferSize,
+ usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST,
+ });
+ encoder.copyBufferToBuffer(buffer, 0, vertexBuffer, 0, kBufferSize);
+ t.device.queue.submit([encoder.finish()]);
+ return vertexBuffer;
+}
+
+g.test('many')
+ .desc(`Tests execution of draw calls using a huge vertex buffer.`)
+ .fn(async t => {
+ const kSize = 4096;
+ const buffer = createHugeVertexBuffer(t, kSize);
+ const module = t.device.createShaderModule({
+ code: `
+ @vertex fn vmain(@location(0) position: vec2<u32>)
+ -> @builtin(position) vec4<f32> {
+ let r = vec2<f32>(1.0 / f32(${kSize}));
+ let a = 2.0 * r;
+ let b = r - vec2<f32>(1.0);
+ return vec4<f32>(fma(vec2<f32>(position), a, b), 0.0, 1.0);
+ }
+ @fragment fn fmain() -> @location(0) vec4<f32> {
+ return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+ }
+ `,
+ });
+ const pipeline = t.device.createRenderPipeline({
+ layout: 'auto',
+ vertex: {
+ module,
+ entryPoint: 'vmain',
+ buffers: [
+ {
+ arrayStride: 8,
+ attributes: [
+ {
+ format: 'uint32x2',
+ offset: 0,
+ shaderLocation: 0,
+ },
+ ],
+ },
+ ],
+ },
+ primitive: { topology: 'point-list' },
+ fragment: {
+ targets: [{ format: 'rgba8unorm' }],
+ module,
+ entryPoint: 'fmain',
+ },
+ });
+ const renderTarget = t.device.createTexture({
+ size: [kSize, kSize],
+ usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+ format: 'rgba8unorm',
+ });
+ const renderPassDescriptor: GPURenderPassDescriptor = {
+ colorAttachments: [
+ {
+ view: renderTarget.createView(),
+ loadOp: 'load',
+ storeOp: 'store',
+ },
+ ],
+ };
+
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginRenderPass(renderPassDescriptor);
+ pass.setPipeline(pipeline);
+ pass.setVertexBuffer(0, buffer);
+ pass.draw(kSize * kSize);
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ t.expectSingleColor(renderTarget, 'rgba8unorm', {
+ size: [kSize, kSize, 1],
+ exp: { R: 1, G: 0, B: 1, A: 1 },
+ });
+ });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/shaders/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/shaders/README.txt
new file mode 100644
index 0000000000..628b4e86fa
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/shaders/README.txt
@@ -0,0 +1 @@
+Stress tests covering very long-running and/or resource-intensive shaders.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/shaders/entry_points.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/shaders/entry_points.spec.ts
new file mode 100644
index 0000000000..95b647ba73
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/shaders/entry_points.spec.ts
@@ -0,0 +1,78 @@
+export const description = `
+Stress tests covering behavior around shader entry points.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { range } from '../../common/util/util.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+const makeCode = (numEntryPoints: number) => {
+ const kBaseCode = `
+ struct Buffer { data: u32, };
+ @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+ fn main() { buffer.data = buffer.data + 1u; }
+ `;
+ const makeEntryPoint = (i: number) => `
+ @compute @workgroup_size(1) fn computeMain${i}() { main(); }
+ `;
+ return kBaseCode + range(numEntryPoints, makeEntryPoint).join('');
+};
+
+g.test('many')
+ .desc(
+ `Tests compilation and usage of shaders with a huge number of entry points.
+
+TODO: There may be a normative limit to the number of entry points allowed in
+a shader, in which case this would become a validation test instead.`
+ )
+ .fn(async t => {
+ const data = new Uint32Array([0]);
+ const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+
+ // NOTE: Initial shader compilation time seems to scale exponentially with
+ // this value in Chrome.
+ const kNumEntryPoints = 200;
+
+ const shader = t.device.createShaderModule({
+ code: makeCode(kNumEntryPoints),
+ });
+
+ const layout = t.device.createBindGroupLayout({
+ entries: [
+ {
+ binding: 0,
+ visibility: GPUShaderStage.COMPUTE,
+ buffer: { type: 'storage' },
+ },
+ ],
+ });
+ const pipelineLayout = t.device.createPipelineLayout({
+ bindGroupLayouts: [layout],
+ });
+ const bindGroup = t.device.createBindGroup({
+ layout,
+ entries: [{ binding: 0, resource: { buffer } }],
+ });
+
+ const encoder = t.device.createCommandEncoder();
+ range(kNumEntryPoints, i => {
+ const pipeline = t.device.createComputePipeline({
+ layout: pipelineLayout,
+ compute: {
+ module: shader,
+ entryPoint: `computeMain${i}`,
+ },
+ });
+
+ const pass = encoder.beginComputePass();
+ pass.setPipeline(pipeline);
+ pass.setBindGroup(0, bindGroup);
+ pass.dispatchWorkgroups(1);
+ pass.end();
+ });
+
+ t.device.queue.submit([encoder.finish()]);
+ t.expectGPUBufferValuesEqual(buffer, new Uint32Array([kNumEntryPoints]));
+ });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/shaders/non_halting.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/shaders/non_halting.spec.ts
new file mode 100644
index 0000000000..b88aa083b3
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/shaders/non_halting.spec.ts
@@ -0,0 +1,194 @@
+export const description = `
+Stress tests covering robustness in the presence of non-halting shaders.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('compute')
+ .desc(
+ `Tests execution of compute passes with non-halting dispatch operations.
+
+This is expected to hang for a bit, but it should ultimately result in graceful
+device loss.`
+ )
+ .fn(async t => {
+ const data = new Uint32Array([0]);
+ const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+ const module = t.device.createShaderModule({
+ code: `
+ struct Buffer { data: u32, };
+ @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+ @compute @workgroup_size(1) fn main() {
+ loop {
+ if (buffer.data == 1u) {
+ break;
+ }
+ buffer.data = buffer.data + 2u;
+ }
+ }
+ `,
+ });
+ const pipeline = t.device.createComputePipeline({
+ layout: 'auto',
+ compute: { module, entryPoint: 'main' },
+ });
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginComputePass();
+ pass.setPipeline(pipeline);
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [{ binding: 0, resource: { buffer } }],
+ });
+ pass.setBindGroup(0, bindGroup);
+ pass.dispatchWorkgroups(1);
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ await t.device.lost;
+ });
+
+g.test('vertex')
+ .desc(
+ `Tests execution of render passes with a non-halting vertex stage.
+
+This is expected to hang for a bit, but it should ultimately result in graceful
+device loss.`
+ )
+ .fn(async t => {
+ const module = t.device.createShaderModule({
+ code: `
+ struct Data { counter: u32, increment: u32, };
+ @group(0) @binding(0) var<uniform> data: Data;
+ @vertex fn vmain() -> @builtin(position) vec4<f32> {
+ var counter: u32 = data.counter;
+ loop {
+ if (counter % 2u == 1u) {
+ break;
+ }
+ counter = counter + data.increment;
+ }
+ return vec4<f32>(1.0, 1.0, 0.0, f32(counter));
+ }
+ @fragment fn fmain() -> @location(0) vec4<f32> {
+ return vec4<f32>(1.0);
+ }
+ `,
+ });
+
+ const pipeline = t.device.createRenderPipeline({
+ layout: 'auto',
+ vertex: { module, entryPoint: 'vmain', buffers: [] },
+ primitive: { topology: 'point-list' },
+ fragment: {
+ targets: [{ format: 'rgba8unorm' }],
+ module,
+ entryPoint: 'fmain',
+ },
+ });
+ const uniforms = t.makeBufferWithContents(new Uint32Array([0, 2]), GPUBufferUsage.UNIFORM);
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [
+ {
+ binding: 0,
+ resource: { buffer: uniforms },
+ },
+ ],
+ });
+ const renderTarget = t.device.createTexture({
+ size: [1, 1],
+ usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+ format: 'rgba8unorm',
+ });
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginRenderPass({
+ colorAttachments: [
+ {
+ view: renderTarget.createView(),
+ clearValue: [0, 0, 0, 0],
+ loadOp: 'clear',
+ storeOp: 'store',
+ },
+ ],
+ });
+ pass.setPipeline(pipeline);
+ pass.setBindGroup(0, bindGroup);
+ pass.draw(1);
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ await t.device.lost;
+ });
+
+g.test('fragment')
+ .desc(
+ `Tests execution of render passes with a non-halting fragment stage.
+
+This is expected to hang for a bit, but it should ultimately result in graceful
+device loss.`
+ )
+ .fn(async t => {
+ const module = t.device.createShaderModule({
+ code: `
+ struct Data { counter: u32, increment: u32, };
+ @group(0) @binding(0) var<uniform> data: Data;
+ @vertex fn vmain() -> @builtin(position) vec4<f32> {
+ return vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ }
+ @fragment fn fmain() -> @location(0) vec4<f32> {
+ var counter: u32 = data.counter;
+ loop {
+ if (counter % 2u == 1u) {
+ break;
+ }
+ counter = counter + data.increment;
+ }
+ return vec4<f32>(1.0 / f32(counter), 0.0, 0.0, 1.0);
+ }
+ `,
+ });
+
+ const pipeline = t.device.createRenderPipeline({
+ layout: 'auto',
+ vertex: { module, entryPoint: 'vmain', buffers: [] },
+ primitive: { topology: 'point-list' },
+ fragment: {
+ targets: [{ format: 'rgba8unorm' }],
+ module,
+ entryPoint: 'fmain',
+ },
+ });
+ const uniforms = t.makeBufferWithContents(new Uint32Array([0, 2]), GPUBufferUsage.UNIFORM);
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [
+ {
+ binding: 0,
+ resource: { buffer: uniforms },
+ },
+ ],
+ });
+ const renderTarget = t.device.createTexture({
+ size: [1, 1],
+ usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+ format: 'rgba8unorm',
+ });
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginRenderPass({
+ colorAttachments: [
+ {
+ view: renderTarget.createView(),
+ clearValue: [0, 0, 0, 0],
+ loadOp: 'clear',
+ storeOp: 'store',
+ },
+ ],
+ });
+ pass.setPipeline(pipeline);
+ pass.setBindGroup(0, bindGroup);
+ pass.draw(1);
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ await t.device.lost;
+ });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/shaders/slow.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/shaders/slow.spec.ts
new file mode 100644
index 0000000000..9359b976a3
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/shaders/slow.spec.ts
@@ -0,0 +1,195 @@
+export const description = `
+Stress tests covering robustness in the presence of slow shaders.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('compute')
+ .desc(`Tests execution of compute passes with very long-running dispatch operations.`)
+ .fn(async t => {
+ const kDispatchSize = 1000;
+ const data = new Uint32Array(kDispatchSize);
+ const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+ const module = t.device.createShaderModule({
+ code: `
+ struct Buffer { data: array<u32>, };
+ @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+ @compute @workgroup_size(1) fn main(
+ @builtin(global_invocation_id) id: vec3<u32>) {
+ loop {
+ if (buffer.data[id.x] == 1000000u) {
+ break;
+ }
+ buffer.data[id.x] = buffer.data[id.x] + 1u;
+ }
+ }
+ `,
+ });
+ const pipeline = t.device.createComputePipeline({
+ layout: 'auto',
+ compute: { module, entryPoint: 'main' },
+ });
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginComputePass();
+ pass.setPipeline(pipeline);
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [{ binding: 0, resource: { buffer } }],
+ });
+ pass.setBindGroup(0, bindGroup);
+ pass.dispatchWorkgroups(kDispatchSize);
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ t.expectGPUBufferValuesEqual(buffer, new Uint32Array(new Array(kDispatchSize).fill(1000000)));
+ });
+
+g.test('vertex')
+ .desc(`Tests execution of render passes with a very long-running vertex stage.`)
+ .fn(async t => {
+ const module = t.device.createShaderModule({
+ code: `
+ struct Data { counter: u32, increment: u32, };
+ @group(0) @binding(0) var<uniform> data: Data;
+ @vertex fn vmain() -> @builtin(position) vec4<f32> {
+ var counter: u32 = data.counter;
+ loop {
+ counter = counter + data.increment;
+ if (counter % 50000000u == 0u) {
+ break;
+ }
+ }
+ return vec4<f32>(1.0, 1.0, 0.0, f32(counter));
+ }
+ @fragment fn fmain() -> @location(0) vec4<f32> {
+ return vec4<f32>(1.0, 1.0, 0.0, 1.0);
+ }
+ `,
+ });
+
+ const pipeline = t.device.createRenderPipeline({
+ layout: 'auto',
+ vertex: { module, entryPoint: 'vmain', buffers: [] },
+ primitive: { topology: 'point-list' },
+ fragment: {
+ targets: [{ format: 'rgba8unorm' }],
+ module,
+ entryPoint: 'fmain',
+ },
+ });
+ const uniforms = t.makeBufferWithContents(new Uint32Array([0, 1]), GPUBufferUsage.UNIFORM);
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [
+ {
+ binding: 0,
+ resource: { buffer: uniforms },
+ },
+ ],
+ });
+ const renderTarget = t.device.createTexture({
+ size: [3, 3],
+ usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+ format: 'rgba8unorm',
+ });
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginRenderPass({
+ colorAttachments: [
+ {
+ view: renderTarget.createView(),
+ clearValue: [0, 0, 0, 0],
+ loadOp: 'clear',
+ storeOp: 'store',
+ },
+ ],
+ });
+ pass.setPipeline(pipeline);
+ pass.setBindGroup(0, bindGroup);
+ pass.draw(1);
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ t.expectSinglePixelIn2DTexture(
+ renderTarget,
+ 'rgba8unorm',
+ { x: 1, y: 1 },
+ {
+ exp: new Uint8Array([255, 255, 0, 255]),
+ }
+ );
+ });
+
+g.test('fragment')
+ .desc(`Tests execution of render passes with a very long-running fragment stage.`)
+ .fn(async t => {
+ const module = t.device.createShaderModule({
+ code: `
+ struct Data { counter: u32, increment: u32, };
+ @group(0) @binding(0) var<uniform> data: Data;
+ @vertex fn vmain() -> @builtin(position) vec4<f32> {
+ return vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ }
+ @fragment fn fmain() -> @location(0) vec4<f32> {
+ var counter: u32 = data.counter;
+ loop {
+ counter = counter + data.increment;
+ if (counter % 50000000u == 0u) {
+ break;
+ }
+ }
+ return vec4<f32>(1.0, 1.0, 1.0 / f32(counter), 1.0);
+ }
+ `,
+ });
+
+ const pipeline = t.device.createRenderPipeline({
+ layout: 'auto',
+ vertex: { module, entryPoint: 'vmain', buffers: [] },
+ primitive: { topology: 'point-list' },
+ fragment: {
+ targets: [{ format: 'rgba8unorm' }],
+ module,
+ entryPoint: 'fmain',
+ },
+ });
+ const uniforms = t.makeBufferWithContents(new Uint32Array([0, 1]), GPUBufferUsage.UNIFORM);
+ const bindGroup = t.device.createBindGroup({
+ layout: pipeline.getBindGroupLayout(0),
+ entries: [
+ {
+ binding: 0,
+ resource: { buffer: uniforms },
+ },
+ ],
+ });
+ const renderTarget = t.device.createTexture({
+ size: [3, 3],
+ usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+ format: 'rgba8unorm',
+ });
+ const encoder = t.device.createCommandEncoder();
+ const pass = encoder.beginRenderPass({
+ colorAttachments: [
+ {
+ view: renderTarget.createView(),
+ clearValue: [0, 0, 0, 0],
+ loadOp: 'clear',
+ storeOp: 'store',
+ },
+ ],
+ });
+ pass.setPipeline(pipeline);
+ pass.setBindGroup(0, bindGroup);
+ pass.draw(1);
+ pass.end();
+ t.device.queue.submit([encoder.finish()]);
+ t.expectSinglePixelIn2DTexture(
+ renderTarget,
+ 'rgba8unorm',
+ { x: 1, y: 1 },
+ {
+ exp: new Uint8Array([255, 255, 0, 255]),
+ }
+ );
+ });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/texture/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/texture/README.txt
new file mode 100644
index 0000000000..db40963b2e
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/texture/README.txt
@@ -0,0 +1 @@
+Stress tests covering texture usage.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/texture/large.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/texture/large.spec.ts
new file mode 100644
index 0000000000..cba2053d38
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/texture/large.spec.ts
@@ -0,0 +1,56 @@
+export const description = `
+Stress tests covering usage of very large textures.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('loading,2d')
+ .desc(
+ `Tests execution of shaders loading values from very large (up to at least
+8192x8192) 2D textures. The texture size is selected according to the limit
+supported by the GPUDevice.`
+ )
+ .unimplemented();
+
+g.test('loading,2d_array')
+ .desc(
+ `Tests execution of shaders loading values from very large (up to at least
+8192x8192x2048) arrays of 2D textures. The texture and array size is selected
+according to limits supported by the GPUDevice.`
+ )
+ .unimplemented();
+
+g.test('loading,3d')
+ .desc(
+ `Tests execution of shaders loading values from very large (up to at least
+2048x2048x2048) textures. The texture size is selected according to the limit
+supported by the GPUDevice.`
+ )
+ .unimplemented();
+
+g.test('sampling,2d')
+ .desc(
+ `Tests execution of shaders sampling values from very large (up to at least
+8192x8192) 2D textures. The texture size is selected according to the limit
+supported by the GPUDevice.`
+ )
+ .unimplemented();
+
+g.test('sampling,2d_array')
+ .desc(
+ `Tests execution of shaders sampling values from very large (up to at least
+8192x8192x2048) arrays of 2D textures. The texture and array size is selected
+according to limits supported by the GPUDevice.`
+ )
+ .unimplemented();
+
+g.test('sampling,3d')
+ .desc(
+ `Tests execution of shaders sampling values from very large (up to at least
+2048x2048x2048) textures. The texture size is selected according to the limit
+supported by the GPUDevice.`
+ )
+ .unimplemented();