37 files changed, 2237 insertions, 0 deletions
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/README.txt
new file mode 100644
index 0000000000..5457e8400d
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/README.txt
@@ -0,0 +1,6 @@
+WebGPU stress tests.
+
+These tests are separated from conformance tests because they are more likely to
+cause browser hangs and crashes.
+
+TODO: Look at dEQP (OpenGL ES and Vulkan) and WebGL for inspiration here.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/adapter/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/adapter/README.txt
new file mode 100644
index 0000000000..3a57f3d87f
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/adapter/README.txt
@@ -0,0 +1 @@
+Stress tests covering use of GPUAdapter.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/adapter/device_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/adapter/device_allocation.spec.ts
new file mode 100644
index 0000000000..27bb5f6a32
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/adapter/device_allocation.spec.ts
@@ -0,0 +1,292 @@
+export const description = `
+Stress tests for GPUAdapter.requestDevice.
+`;
+
+import { Fixture } from '../../common/framework/fixture.js';
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { attemptGarbageCollection } from '../../common/util/collect_garbage.js';
+import { keysOf } from '../../common/util/data_tables.js';
+import { getGPU } from '../../common/util/navigator_gpu.js';
+import { assert, iterRange } from '../../common/util/util.js';
+import { getDefaultLimitsForAdapter } from '../../webgpu/capability_info.js';
+
+export const g = makeTestGroup(Fixture);
+
+/** Adapter preference identifier to option. */
+const kAdapterTypeOptions: {
+  readonly [k in GPUPowerPreference | 'fallback']: GPURequestAdapterOptions;
+} =
+  /* prettier-ignore */ {
+  'low-power':        { powerPreference:        'low-power', forceFallbackAdapter: false },
+  'high-performance': { powerPreference: 'high-performance', forceFallbackAdapter: false },
+  'fallback':         { powerPreference:          undefined, forceFallbackAdapter:  true },
+};
+/** List of all adapter hint types. */
+const kAdapterTypes = keysOf(kAdapterTypeOptions);
+
+/**
+ * Creates a device, a valid compute pipeline, valid resources for the pipeline, and
+ * ties them together into a set of compute commands ready to be submitted to the GPU
+ * queue. Does not submit the commands in order to make sure that all resources are
+ * kept alive until the device is destroyed.
+ */
+async function createDeviceAndComputeCommands(adapter: GPUAdapter) {
+  // Constants are computed such that per run, this function should allocate roughly 2G
+  // worth of data. This should be sufficient as we run these creation functions many
+  // times. If the data backing the created objects is not recycled we should OOM.
+  const limitInfo = getDefaultLimitsForAdapter(adapter);
+  const kNumPipelines = 64;
+  const kNumBindgroups = 128;
+  const kNumBufferElements =
+    limitInfo.maxComputeWorkgroupSizeX.default * limitInfo.maxComputeWorkgroupSizeY.default;
+  const kBufferSize = kNumBufferElements * 4;
+  const kBufferData = new Uint32Array([...iterRange(kNumBufferElements, x => x)]);
+
+  const device: GPUDevice = await adapter.requestDevice();
+  const commands = [];
+
+  for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) {
+    const pipeline = device.createComputePipeline({
+      layout: 'auto',
+      compute: {
+        module: device.createShaderModule({
+          code: `
+              struct Buffer { data: array<u32>, };
+
+              @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+              @compute @workgroup_size(1) fn main(
+                  @builtin(global_invocation_id) id: vec3<u32>) {
+                buffer.data[id.x * ${limitInfo.maxComputeWorkgroupSizeX.default}u + id.y] =
+                  buffer.data[id.x * ${limitInfo.maxComputeWorkgroupSizeX.default}u + id.y] +
+                    ${pipelineIndex}u;
+              }
+            `,
+        }),
+        entryPoint: 'main',
+      },
+    });
+    for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) {
+      const buffer = device.createBuffer({
+        size: kBufferSize,
+        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
+      });
+      device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length);
+      const bindgroup = device.createBindGroup({
+        layout: pipeline.getBindGroupLayout(0),
+        entries: [{ binding: 0, resource: { buffer } }],
+      });
+
+      const encoder = device.createCommandEncoder();
+      const pass = encoder.beginComputePass();
+      pass.setPipeline(pipeline);
+      pass.setBindGroup(0, bindgroup);
+      pass.dispatchWorkgroups(
+        limitInfo.maxComputeWorkgroupSizeX.default,
+        limitInfo.maxComputeWorkgroupSizeY.default
+      );
+      pass.end();
+      commands.push(encoder.finish());
+    }
+  }
+  return { device, objects: commands };
+}
+
+/**
+ * Creates a device, a valid render pipeline, valid resources for the pipeline, and
+ * ties them together into a set of render commands ready to be submitted to the GPU
+ * queue. Does not submit the commands in order to make sure that all resources are
+ * kept alive until the device is destroyed.
+ */
+async function createDeviceAndRenderCommands(adapter: GPUAdapter) {
+  // Constants are computed such that per run, this function should allocate roughly 2G
+  // worth of data. This should be sufficient as we run these creation functions many
+  // times. If the data backing the created objects is not recycled we should OOM.
+  const kNumPipelines = 128;
+  const kNumBindgroups = 128;
+  const kSize = 128;
+  const kBufferData = new Uint32Array([...iterRange(kSize * kSize, x => x)]);
+
+  const device: GPUDevice = await adapter.requestDevice();
+  const commands = [];
+
+  for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) {
+    const module = device.createShaderModule({
+      code: `
+          struct Buffer { data: array<vec4<u32>, ${(kSize * kSize) / 4}>, };
+
+          @group(0) @binding(0) var<uniform> buffer: Buffer;
+          @vertex fn vmain(
+            @builtin(vertex_index) vertexIndex: u32
+          ) -> @builtin(position) vec4<f32> {
+            let index = buffer.data[vertexIndex / 4u][vertexIndex % 4u];
+            let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u));
+            let r = vec2<f32>(1.0 / f32(${kSize}));
+            let a = 2.0 * r;
+            let b = r - vec2<f32>(1.0);
+            return vec4<f32>(fma(position, a, b), 0.0, 1.0);
+          }
+
+          @fragment fn fmain() -> @location(0) vec4<f32> {
+            return vec4<f32>(${pipelineIndex}.0 / ${kNumPipelines}.0, 0.0, 0.0, 1.0);
+          }
+        `,
+    });
+    const pipeline = device.createRenderPipeline({
+      layout: device.createPipelineLayout({
+        bindGroupLayouts: [
+          device.createBindGroupLayout({
+            entries: [
+              {
+                binding: 0,
+                visibility: GPUShaderStage.VERTEX,
+                buffer: { type: 'uniform' },
+              },
+            ],
+          }),
+        ],
+      }),
+      vertex: { module, entryPoint: 'vmain', buffers: [] },
+      primitive: { topology: 'point-list' },
+      fragment: {
+        targets: [{ format: 'rgba8unorm' }],
+        module,
+        entryPoint: 'fmain',
+      },
+    });
+    for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) {
+      const buffer = device.createBuffer({
+        size: kSize * kSize * 4,
+        usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
+      });
+      device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length);
+      const bindgroup = device.createBindGroup({
+        layout: pipeline.getBindGroupLayout(0),
+        entries: [{ binding: 0, resource: { buffer } }],
+      });
+      const texture = device.createTexture({
+        size: [kSize, kSize],
+        usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+        format: 'rgba8unorm',
+      });
+
+      const encoder = device.createCommandEncoder();
+      const pass = encoder.beginRenderPass({
+        colorAttachments: [
+          {
+            view: texture.createView(),
+            loadOp: 'load',
+            storeOp: 'store',
+          },
+        ],
+      });
+      pass.setPipeline(pipeline);
+      pass.setBindGroup(0, bindgroup);
+      pass.draw(kSize * kSize);
+      pass.end();
+      commands.push(encoder.finish());
+    }
+  }
+  return { device, objects: commands };
+}
+
+/**
+ * Creates a device and a large number of buffers which are immediately written to. The
+ * buffers are expected to be kept alive until they or the device are destroyed.
+ */
+async function createDeviceAndBuffers(adapter: GPUAdapter) {
+  // Currently we just allocate 2G of memory using 512MB blocks. We may be able to
+  // increase this to hit OOM instead, but on integrated GPUs on Metal, this can cause
+  // kernel panics at the moment, and it can greatly increase the time needed.
+  const kTotalMemorySize = 2 * 1024 * 1024 * 1024;
+  const kMemoryBlockSize = 512 * 1024 * 1024;
+  const kMemoryBlockData = new Uint8Array(kMemoryBlockSize);
+
+  const device: GPUDevice = await adapter.requestDevice();
+  const buffers = [];
+  for (let memory = 0; memory < kTotalMemorySize; memory += kMemoryBlockSize) {
+    const buffer = device.createBuffer({
+      size: kMemoryBlockSize,
+      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
+    });
+
+    // Write out to the buffer to make sure that it has backing memory.
+    device.queue.writeBuffer(buffer, 0, kMemoryBlockData, 0, kMemoryBlockData.length);
+    buffers.push(buffer);
+  }
+  return { device, objects: buffers };
+}
+
+g.test('coexisting')
+  .desc(`Tests allocation of many coexisting GPUDevice objects.`)
+  .params(u => u.combine('adapterType', kAdapterTypes))
+  .fn(async t => {
+    const { adapterType } = t.params;
+    const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]);
+    assert(adapter !== null, 'Failed to get adapter.');
+
+    // Based on Vulkan conformance test requirement to be able to create multiple devices.
+    const kNumDevices = 5;
+
+    const devices = [];
+    for (let i = 0; i < kNumDevices; ++i) {
+      const device: GPUDevice = await adapter.requestDevice();
+      devices.push(device);
+    }
+  });
+
+g.test('continuous,with_destroy')
+  .desc(
+    `Tests allocation and destruction of many GPUDevice objects over time. Device objects
+are sequentially requested with a series of device allocated objects created on each
+device. The devices are then destroyed to verify that the device and the device allocated
+objects are recycled over a very large number of iterations.`
+  )
+  .params(u => u.combine('adapterType', kAdapterTypes))
+  .fn(async t => {
+    const { adapterType } = t.params;
+    const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]);
+    assert(adapter !== null, 'Failed to get adapter.');
+
+    // Since devices are being destroyed, we should be able to create many devices.
+    const kNumDevices = 100;
+    const kFunctions = [
+      createDeviceAndBuffers,
+      createDeviceAndComputeCommands,
+      createDeviceAndRenderCommands,
+    ];
+
+    const deviceList = [];
+    const objectLists = [];
+    for (let i = 0; i < kNumDevices; ++i) {
+      const { device, objects } = await kFunctions[i % kFunctions.length](adapter);
+      t.expect(objects.length > 0, 'unable to allocate any objects');
+      deviceList.push(device);
+      objectLists.push(objects);
+      device.destroy();
+    }
+  });
+
+g.test('continuous,no_destroy')
+  .desc(
+    `Tests allocation and implicit GC of many GPUDevice objects over time. Objects are
+sequentially requested and dropped for GC over a very large number of iterations. Note
+that without destroy, we do not create device allocated objects because that will
+implicitly keep the device in scope.`
+  )
+  .params(u => u.combine('adapterType', kAdapterTypes))
+  .fn(async t => {
+    const { adapterType } = t.params;
+    const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]);
+    assert(adapter !== null, 'Failed to get adapter.');
+
+    const kNumDevices = 10_000;
+    for (let i = 1; i <= kNumDevices; ++i) {
+      await (async () => {
+        t.expect((await adapter.requestDevice()) !== null, 'unexpected null device');
+      })();
+      if (i % 10 === 0) {
+        // We need to occasionally wait for GC to clear out stale devices.
+        await attemptGarbageCollection();
+      }
+    }
+  });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/compute/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/compute/README.txt
new file mode 100644
index 0000000000..b41aabc66b
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/compute/README.txt
@@ -0,0 +1 @@
+Stress tests covering operations specific to GPUComputePipeline and GPUComputePass.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/compute/compute_pass.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/compute/compute_pass.spec.ts
new file mode 100644
index 0000000000..bd63ca1450
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/compute/compute_pass.spec.ts
@@ -0,0 +1,243 @@
+export const description = `
+Stress tests covering GPUComputePassEncoder usage.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { assert, iterRange } from '../../common/util/util.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('many')
+  .desc(
+    `Tests execution of a huge number of compute passes using the same
+GPUComputePipeline.`
+  )
+  .fn(t => {
+    const kNumElements = 64;
+    const data = new Uint32Array([...iterRange(kNumElements, x => x)]);
+    const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+    const pipeline = t.device.createComputePipeline({
+      layout: 'auto',
+      compute: {
+        module: t.device.createShaderModule({
+          code: `
+            struct Buffer { data: array<u32>, };
+            @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+            @compute @workgroup_size(1) fn main(
+                @builtin(global_invocation_id) id: vec3<u32>) {
+              buffer.data[id.x] = buffer.data[id.x] + 1u;
+            }
+          `,
+        }),
+        entryPoint: 'main',
+      },
+    });
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [{ binding: 0, resource: { buffer } }],
+    });
+    const kNumIterations = 250_000;
+    for (let i = 0; i < kNumIterations; ++i) {
+      const encoder = t.device.createCommandEncoder();
+      const pass = encoder.beginComputePass();
+      pass.setPipeline(pipeline);
+      pass.setBindGroup(0, bindGroup);
+      pass.dispatchWorkgroups(kNumElements);
+      pass.end();
+      t.device.queue.submit([encoder.finish()]);
+    }
+    t.expectGPUBufferValuesEqual(
+      buffer,
+      new Uint32Array([...iterRange(kNumElements, x => x + kNumIterations)])
+    );
+  });
+
+g.test('pipeline_churn')
+  .desc(
+    `Tests execution of a huge number of compute passes which each use a different
+GPUComputePipeline.`
+  )
+  .fn(t => {
+    const buffer = t.makeBufferWithContents(
+      new Uint32Array([0]),
+      GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
+    );
+    const kNumIterations = 10_000;
+    const stages = iterRange(kNumIterations, i => ({
+      module: t.device.createShaderModule({
+        code: `
+        struct Buffer { data: u32, };
+        @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+        @compute @workgroup_size(1) fn main${i}() {
+          buffer.data = buffer.data + 1u;
+        }
+        `,
+      }),
+      entryPoint: `main${i}`,
+    }));
+    for (const compute of stages) {
+      const encoder = t.device.createCommandEncoder();
+      const pipeline = t.device.createComputePipeline({ layout: 'auto', compute });
+      const bindGroup = t.device.createBindGroup({
+        layout: pipeline.getBindGroupLayout(0),
+        entries: [{ binding: 0, resource: { buffer } }],
+      });
+      const pass = encoder.beginComputePass();
+      pass.setPipeline(pipeline);
+      pass.setBindGroup(0, bindGroup);
+      pass.dispatchWorkgroups(1);
+      pass.end();
+      t.device.queue.submit([encoder.finish()]);
+    }
+    t.expectGPUBufferValuesEqual(buffer, new Uint32Array([kNumIterations]));
+  });
+
+g.test('bind_group_churn')
+  .desc(
+    `Tests execution of compute passes which switch between a huge number of bind
+groups.`
+  )
+  .fn(t => {
+    const kNumElements = 64;
+    const data = new Uint32Array([...iterRange(kNumElements, x => x)]);
+    const buffer1 = t.makeBufferWithContents(
+      data,
+      GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
+    );
+    const buffer2 = t.makeBufferWithContents(
+      data,
+      GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
+    );
+    const module = t.device.createShaderModule({
+      code: `
+        struct Buffer { data: array<u32>, };
+        @group(0) @binding(0) var<storage, read_write> buffer1: Buffer;
+        @group(0) @binding(1) var<storage, read_write> buffer2: Buffer;
+        @compute @workgroup_size(1) fn main(
+            @builtin(global_invocation_id) id: vec3<u32>) {
+          buffer1.data[id.x] = buffer1.data[id.x] + 1u;
+          buffer2.data[id.x] = buffer2.data[id.x] + 2u;
+        }
+      `,
+    });
+    const kNumIterations = 250_000;
+    const pipeline = t.device.createComputePipeline({
+      layout: 'auto',
+      compute: { module, entryPoint: 'main' },
+    });
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginComputePass();
+    pass.setPipeline(pipeline);
+    for (let i = 0; i < kNumIterations; ++i) {
+      const buffer1Binding = i % 2;
+      const buffer2Binding = buffer1Binding ^ 1;
+      const bindGroup = t.device.createBindGroup({
+        layout: pipeline.getBindGroupLayout(0),
+        entries: [
+          { binding: buffer1Binding, resource: { buffer: buffer1 } },
+          { binding: buffer2Binding, resource: { buffer: buffer2 } },
+        ],
+      });
+      pass.setBindGroup(0, bindGroup);
+      pass.dispatchWorkgroups(kNumElements);
+    }
+    pass.end();
+    t.device.queue.submit([encoder.finish()]);
+    const kTotalAddition = (kNumIterations / 2) * 3;
+    t.expectGPUBufferValuesEqual(
+      buffer1,
+      new Uint32Array([...iterRange(kNumElements, x => x + kTotalAddition)])
+    );
+    t.expectGPUBufferValuesEqual(
+      buffer2,
+      new Uint32Array([...iterRange(kNumElements, x => x + kTotalAddition)])
+    );
+  });
+
+g.test('many_dispatches')
+  .desc(`Tests execution of compute passes with a huge number of dispatch calls`)
+  .fn(t => {
+    const kNumElements = 64;
+    const data = new Uint32Array([...iterRange(kNumElements, x => x)]);
+    const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+    const module = t.device.createShaderModule({
+      code: `
+        struct Buffer { data: array<u32>, };
+        @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+        @compute @workgroup_size(1) fn main(
+            @builtin(global_invocation_id) id: vec3<u32>) {
+          buffer.data[id.x] = buffer.data[id.x] + 1u;
+        }
+      `,
+    });
+    const kNumIterations = 1_000_000;
+    const pipeline = t.device.createComputePipeline({
+      layout: 'auto',
+      compute: { module, entryPoint: 'main' },
+    });
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginComputePass();
+    pass.setPipeline(pipeline);
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [{ binding: 0, resource: { buffer } }],
+    });
+    pass.setBindGroup(0, bindGroup);
+    for (let i = 0; i < kNumIterations; ++i) {
+      pass.dispatchWorkgroups(kNumElements);
+    }
+    pass.end();
+    t.device.queue.submit([encoder.finish()]);
+    t.expectGPUBufferValuesEqual(
+      buffer,
+      new Uint32Array([...iterRange(kNumElements, x => x + kNumIterations)])
+    );
+  });
+
+g.test('huge_dispatches')
+  .desc(`Tests execution of compute passes with huge dispatch calls`)
+  .fn(async t => {
+    const kDimensions = [512, 512, 128];
+    kDimensions.forEach(x => {
+      assert(x <= t.device.limits.maxComputeWorkgroupsPerDimension);
+    });
+
+    const kNumElements = kDimensions[0] * kDimensions[1] * kDimensions[2];
+    const data = new Uint32Array([...iterRange(kNumElements, x => x)]);
+    const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+    const module = t.device.createShaderModule({
+      code: `
+        struct Buffer { data: array<u32>, };
+        @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+        @compute @workgroup_size(1) fn main(
+            @builtin(global_invocation_id) id: vec3<u32>) {
+          let index = (id.z * 512u + id.y) * 512u + id.x;
+          buffer.data[index] = buffer.data[index] + 1u;
+        }
+      `,
+    });
+    const kNumIterations = 16;
+    const pipeline = t.device.createComputePipeline({
+      layout: 'auto',
+      compute: { module, entryPoint: 'main' },
+    });
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [{ binding: 0, resource: { buffer } }],
+    });
+    for (let i = 0; i < kNumIterations; ++i) {
+      const encoder = t.device.createCommandEncoder();
+      const pass = encoder.beginComputePass();
+      pass.setBindGroup(0, bindGroup);
+      pass.setPipeline(pipeline);
+      pass.dispatchWorkgroups(kDimensions[0], kDimensions[1], kDimensions[2]);
+      pass.end();
+      t.device.queue.submit([encoder.finish()]);
+      await t.device.queue.onSubmittedWorkDone();
+    }
+    t.expectGPUBufferValuesEqual(
+      buffer,
+      new Uint32Array([...iterRange(kNumElements, x => x + kNumIterations)])
+    );
+  });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/device/README.txt
new file mode 100644
index 0000000000..6ee89fc5fd
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/README.txt
@@ -0,0 +1,2 @@
+Stress tests covering GPUDevice usage, primarily focused on stressing allocation
+of various resources.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_allocation.spec.ts
new file mode 100644
index 0000000000..5d428f3edb
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_allocation.spec.ts
@@ -0,0 +1,65 @@
+export const description = `
+Stress tests for allocation of GPUBindGroup objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+  .desc(`Tests allocation of many coexisting GPUBindGroup objects.`)
+  .fn(t => {
+    const kNumGroups = 1_000_000;
+    const buffer = t.device.createBuffer({
+      size: 64,
+      usage: GPUBufferUsage.STORAGE,
+    });
+    const layout = t.device.createBindGroupLayout({
+      entries: [
+        {
+          binding: 0,
+          visibility: GPUShaderStage.COMPUTE,
+          buffer: { type: 'storage' },
+        },
+      ],
+    });
+    const bindGroups = [];
+    for (let i = 0; i < kNumGroups; ++i) {
+      bindGroups.push(
+        t.device.createBindGroup({
+          layout,
+          entries: [{ binding: 0, resource: { buffer } }],
+        })
+      );
+    }
+  });
+
+g.test('continuous')
+  .desc(
+    `Tests allocation and implicit GC of many GPUBindGroup objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+  )
+  .fn(t => {
+    const kNumGroups = 5_000_000;
+    const buffer = t.device.createBuffer({
+      size: 64,
+      usage: GPUBufferUsage.STORAGE,
+    });
+    const layout = t.device.createBindGroupLayout({
+      entries: [
+        {
+          binding: 0,
+          visibility: GPUShaderStage.COMPUTE,
+          buffer: { type: 'storage' },
+        },
+      ],
+    });
+    for (let i = 0; i < kNumGroups; ++i) {
+      t.device.createBindGroup({
+        layout,
+        entries: [{ binding: 0, resource: { buffer } }],
+      });
+    }
+  });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_layout_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_layout_allocation.spec.ts
new file mode 100644
index 0000000000..0933cd1b59
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/bind_group_layout_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPUBindGroupLayout objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+  .desc(`Tests allocation of many coexisting GPUBindGroupLayout objects.`)
+  .unimplemented();
+
+g.test('continuous')
+  .desc(
+    `Tests allocation and implicit GC of many GPUBindGroupLayout objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/buffer_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/buffer_allocation.spec.ts
new file mode 100644
index 0000000000..f55ec79c44
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/buffer_allocation.spec.ts
@@ -0,0 +1,25 @@
+export const description = `
+Stress tests for allocation of GPUBuffer objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting').desc(`Tests allocation of many coexisting GPUBuffer objects.`).unimplemented();
+
+g.test('continuous,with_destroy')
+  .desc(
+    `Tests allocation and destruction of many GPUBuffer objects over time. Objects
+are sequentially created and destroyed over a very large number of iterations.`
+  )
+  .unimplemented();
+
+g.test('continuous,no_destroy')
+  .desc(
+    `Tests allocation and implicit GC of many GPUBuffer objects over time. Objects
+are sequentially created and dropped for GC over a very large number of
+iterations.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/command_encoder_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/command_encoder_allocation.spec.ts
new file mode 100644
index 0000000000..e41769ee06
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/command_encoder_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPUCommandEncoder objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+  .desc(`Tests allocation of many coexisting GPUCommandEncoder objects.`)
+  .unimplemented();
+
+g.test('continuous')
+  .desc(
+    `Tests allocation and implicit GC of many GPUCommandEncoder objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/compute_pipeline_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/compute_pipeline_allocation.spec.ts
new file mode 100644
index 0000000000..5c03bc9674
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/compute_pipeline_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPUComputePipeline objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+  .desc(`Tests allocation of many coexisting GPUComputePipeline objects.`)
+  .unimplemented();
+
+g.test('continuous')
+  .desc(
+    `Tests allocation and implicit GC of many GPUComputePipeline objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/pipeline_layout_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/pipeline_layout_allocation.spec.ts
new file mode 100644
index 0000000000..15d417fd7e
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/pipeline_layout_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPUPipelineLayout objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+  .desc(`Tests allocation of many coexisting GPUPipelineLayout objects.`)
+  .unimplemented();
+
+g.test('continuous')
+  .desc(
+    `Tests allocation and implicit GC of many GPUPipelineLayout objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/query_set_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/query_set_allocation.spec.ts
new file mode 100644
index 0000000000..757645cbf6
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/query_set_allocation.spec.ts
@@ -0,0 +1,27 @@
+export const description = `
+Stress tests for allocation of GPUQuerySet objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+  .desc(`Tests allocation of many coexisting GPUQuerySet objects.`)
+  .unimplemented();
+
+g.test('continuous,with_destroy')
+  .desc(
+    `Tests allocation and destruction of many GPUQuerySet objects over time. Objects
+are sequentially created and destroyed over a very large number of iterations.`
+  )
+  .unimplemented();
+
+g.test('continuous,no_destroy')
+  .desc(
+    `Tests allocation and implicit GC of many GPUQuerySet objects over time. Objects
+are sequentially created and dropped for GC over a very large number of
+iterations.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/render_bundle_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/render_bundle_allocation.spec.ts
new file mode 100644
index 0000000000..d7448412a1
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/render_bundle_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPURenderBundle objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+  .desc(`Tests allocation of many coexisting GPURenderBundle objects.`)
+  .unimplemented();
+
+g.test('continuous')
+  .desc(
+    `Tests allocation and implicit GC of many GPURenderBundle objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/render_pipeline_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/render_pipeline_allocation.spec.ts
new file mode 100644
index 0000000000..21eb92cf7c
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/render_pipeline_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPURenderPipeline objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+  .desc(`Tests allocation of many coexisting GPURenderPipeline objects.`)
+  .unimplemented();
+
+g.test('continuous')
+  .desc(
+    `Tests allocation and implicit GC of many GPURenderPipeline objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/sampler_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/sampler_allocation.spec.ts
new file mode 100644
index 0000000000..c34dae3f67
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/sampler_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPUSampler objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+  .desc(`Tests allocation of many coexisting GPUSampler objects.`)
+  .unimplemented();
+
+g.test('continuous')
+  .desc(
+    `Tests allocation and implicit GC of many GPUSampler objects over time. Objects
+are sequentially created and dropped for GC over a very large number of
+iterations.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/shader_module_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/shader_module_allocation.spec.ts
new file mode 100644
index 0000000000..97ef73d2c9
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/shader_module_allocation.spec.ts
@@ -0,0 +1,20 @@
+export const description = `
+Stress tests for allocation of GPUShaderModule objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+  .desc(`Tests allocation of many coexisting GPUShaderModule objects.`)
+  .unimplemented();
+
+g.test('continuous')
+  .desc(
+    `Tests allocation and implicit GC of many GPUShaderModule objects over time.
+Objects are sequentially created and dropped for GC over a very large number of
+iterations.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/device/texture_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/device/texture_allocation.spec.ts
new file mode 100644
index 0000000000..5cef598804
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/device/texture_allocation.spec.ts
@@ -0,0 +1,27 @@
+export const description = `
+Stress tests for allocation of GPUTexture objects through GPUDevice.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('coexisting')
+  .desc(`Tests allocation of many coexisting GPUTexture objects.`)
+  .unimplemented();
+
+g.test('continuous,with_destroy')
+  .desc(
+    `Tests allocation and destruction of many GPUTexture objects over time. Objects
+are sequentially created and destroyed over a very large number of iterations.`
+  )
+  .unimplemented();
+
+g.test('continuous,no_destroy')
+  .desc(
+    `Tests allocation and implicit GC of many GPUTexture objects over time. Objects
+are sequentially created and dropped for GC over a very large number of
+iterations.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/listing.ts b/dom/webgpu/tests/cts/checkout/src/stress/listing.ts
new file mode 100644
index 0000000000..823639c692
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/listing.ts
@@ -0,0 +1,5 @@
+/* eslint-disable import/no-restricted-paths */
+import { TestSuiteListing } from '../common/internal/test_suite_listing.js';
+import { makeListing } from '../common/tools/crawl.js';
+
+export const listing: Promise<TestSuiteListing> = makeListing(__filename);
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/memory/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/memory/README.txt
new file mode 100644
index 0000000000..ac0c90bfb7
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/memory/README.txt
@@ -0,0 +1 @@
+Stress tests covering allocation and usage of various types of GPUBuffer objects.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/memory/churn.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/memory/churn.spec.ts
new file mode 100644
index 0000000000..fcb899eb29
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/memory/churn.spec.ts
@@ -0,0 +1,17 @@
+export const description = `
+Stress tests covering robustness in the presence of heavy buffer and texture
+memory churn.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('churn')
+  .desc(
+    `Allocates and populates a huge number of buffers and textures over time,
+retaining some while dropping or explicitly destroying others. When finished,
+verifies the expected contents of any remaining buffers and textures.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/memory/oom.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/memory/oom.spec.ts
new file mode 100644
index 0000000000..d14486ecee
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/memory/oom.spec.ts
@@ -0,0 +1,178 @@
+export const description = `
+Stress tests covering robustness when available VRAM is exhausted.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { unreachable } from '../../common/util/util.js';
+import { GPUConst } from '../../webgpu/constants.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+import { exhaustVramUntilUnder64MB } from '../../webgpu/util/memory.js';
+
+export const g = makeTestGroup(GPUTest);
+
+function createBufferWithMapState(
+  device: GPUDevice,
+  size: number,
+  mapState: GPUBufferMapState,
+  mode: GPUMapModeFlags,
+  mappedAtCreation: boolean
+) {
+  const mappable = mapState === 'unmapped';
+  if (!mappable && !mappedAtCreation) {
+    return device.createBuffer({
+      size,
+      usage: GPUBufferUsage.UNIFORM,
+      mappedAtCreation,
+    });
+  }
+  let buffer: GPUBuffer;
+  switch (mode) {
+    case GPUMapMode.READ:
+      buffer = device.createBuffer({
+        size,
+        usage: GPUBufferUsage.MAP_READ,
+        mappedAtCreation,
+      });
+      break;
+    case GPUMapMode.WRITE:
+      buffer = device.createBuffer({
+        size,
+        usage: GPUBufferUsage.MAP_WRITE,
+        mappedAtCreation,
+      });
+      break;
+    default:
+      unreachable();
+  }
+  // If we want the buffer to be mappable and also mappedAtCreation, we call unmap on it now.
+  if (mappable && mappedAtCreation) {
+    buffer.unmap();
+  }
+  return buffer;
+}
+
+g.test('vram_oom')
+  .desc(`Tests that we can allocate buffers until we run out of VRAM.`)
+  .fn(async t => {
+    await exhaustVramUntilUnder64MB(t.device);
+  });
+
+g.test('map_after_vram_oom')
+  .desc(
+    `Allocates tons of buffers and textures with varying mapping states (unmappable,
+mappable, mapAtCreation, mapAtCreation-then-unmapped) until OOM; then attempts
+to mapAsync all the mappable objects. The last buffer should be an error buffer so
+mapAsync on it should reject and produce a validation error. `
+  )
+  .params(u =>
+    u
+      .combine('mapState', ['mapped', 'unmapped'] as GPUBufferMapState[])
+      .combine('mode', [GPUConst.MapMode.READ, GPUConst.MapMode.WRITE])
+      .combine('mappedAtCreation', [true, false])
+      .combine('unmapBeforeResolve', [true, false])
+  )
+  .fn(async t => {
+    // Use a relatively large size to quickly hit OOM.
+    const kSize = 512 * 1024 * 1024;
+
+    const { mapState, mode, mappedAtCreation, unmapBeforeResolve } = t.params;
+    const mappable = mapState === 'unmapped';
+    const buffers: GPUBuffer[] = [];
+    // Closure to call map and verify results on all of the buffers.
+    const finish = async () => {
+      if (mappable) {
+        await Promise.all(buffers.map(value => value.mapAsync(mode)));
+      } else {
+        buffers.forEach(value => {
+          t.expectValidationError(() => {
+            void value.mapAsync(mode);
+          });
+        });
+      }
+      // Finally, destroy all the buffers to free the resources.
+      buffers.forEach(buffer => buffer.destroy());
+    };
+
+    let errorBuffer: GPUBuffer;
+    for (;;) {
+      if (mappedAtCreation) {
+        // When mappedAtCreation is true, OOM can happen on the client which throws a RangeError. In
+        // this case, we don't do any validations on the OOM buffer.
+        try {
+          t.device.pushErrorScope('out-of-memory');
+          const buffer = t.trackForCleanup(
+            createBufferWithMapState(t.device, kSize, mapState, mode, mappedAtCreation)
+          );
+          if (await t.device.popErrorScope()) {
+            errorBuffer = buffer;
+            break;
+          }
+          buffers.push(buffer);
+        } catch (ex) {
+          t.expect(ex instanceof RangeError);
+          await finish();
+          return;
+        }
+      } else {
+        t.device.pushErrorScope('out-of-memory');
+        const buffer = t.trackForCleanup(
+          createBufferWithMapState(t.device, kSize, mapState, mode, mappedAtCreation)
+        );
+        if (await t.device.popErrorScope()) {
+          errorBuffer = buffer;
+          break;
+        }
+        buffers.push(buffer);
+      }
+    }
+
+    // Do some validation on the OOM buffer.
+    let promise: Promise<void>;
+    t.expectValidationError(() => {
+      promise = errorBuffer.mapAsync(mode);
+    });
+    if (unmapBeforeResolve) {
+      // Should reject with abort error because buffer will be unmapped
+      // before validation check finishes.
+      t.shouldReject('AbortError', promise!);
+    } else {
+      // Should also reject in addition to the validation error.
+      t.shouldReject('OperationError', promise!);
+
+      // Wait for validation error before unmap to ensure validation check
+      // ends before unmap.
+      try {
+        await promise!;
+        throw new Error('The promise should be rejected.');
+      } catch {
+        // Should cause an exception because the promise should be rejected.
+      }
+    }
+
+    // Should throw an OperationError because the buffer is not mapped.
+    // Note: not a RangeError because the state of the buffer is checked first.
+    t.shouldThrow('OperationError', () => {
+      errorBuffer.getMappedRange();
+    });
+
+    // Should't be a validation error even if the buffer failed to be mapped.
+    errorBuffer.unmap();
+    errorBuffer.destroy();
+
+    // Finish the rest of the test w.r.t the mappable buffers.
+    await finish();
+  });
+
+g.test('validation_vs_oom')
+  .desc(
+    `Tests that calls affected by both OOM and validation errors expose the
+validation error with precedence.`
+  )
+  .unimplemented();
+
+g.test('recovery')
+  .desc(
+    `Tests that after going VRAM-OOM, destroying allocated resources eventually
+allows new resources to be allocated.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queries/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/queries/README.txt
new file mode 100644
index 0000000000..fe466205c4
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queries/README.txt
@@ -0,0 +1 @@
+Stress tests covering use of GPUQuerySet objects and related operations.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queries/occlusion.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/queries/occlusion.spec.ts
new file mode 100644
index 0000000000..056d6bdaea
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queries/occlusion.spec.ts
@@ -0,0 +1,10 @@
+export const description = `
+Stress tests for occlusion queries.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('many').desc(`Tests a huge number of occlusion queries in a render pass.`).unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queries/resolve.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/queries/resolve.spec.ts
new file mode 100644
index 0000000000..da67977395
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queries/resolve.spec.ts
@@ -0,0 +1,15 @@
+export const description = `
+Stress tests for query resolution.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('many_large_sets')
+  .desc(
+    `Tests a huge number of resolveQuerySet operations on a huge number of
+query sets between render passes.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queries/timestamps.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/queries/timestamps.spec.ts
new file mode 100644
index 0000000000..da3e1eb472
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queries/timestamps.spec.ts
@@ -0,0 +1,50 @@
+export const description = `
+Stress tests for timestamp queries.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('command_encoder_one_query_set')
+  .desc(
+    `Tests a huge number of timestamp queries over a single query set between render
+passes on a single command encoder.`
+  )
+  .unimplemented();
+
+g.test('command_encoder_many_query_sets')
+  .desc(
+    `Tests a huge number of timestamp queries over many query sets between render
+passes on a single command encoder.`
+  )
+  .unimplemented();
+
+g.test('render_pass_one_query_set')
+  .desc(
+    `Tests a huge number of timestamp queries over a single query set in a single
+render pass.`
+  )
+  .unimplemented();
+
+g.test('render_pass_many_query_sets')
+  .desc(
+    `Tests a huge number of timestamp queries over a huge number of query sets in a
+single render pass.`
+  )
+  .unimplemented();
+
+g.test('compute_pass_one_query_set')
+  .desc(
+    `Tests a huge number of timestamp queries over a single query set in a single
+compute pass.`
+  )
+  .unimplemented();
+
+g.test('compute_pass_many_query_sets')
+  .desc(
+    `Tests a huge number of timestamp queries over a huge number of query sets in a
+single compute pass.`
+  )
+  .unimplemented();
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queue/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/queue/README.txt
new file mode 100644
index 0000000000..adb4ec40ce
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queue/README.txt
@@ -0,0 +1 @@
+Stress tests covering GPUQueue usage.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/queue/submit.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/queue/submit.spec.ts
new file mode 100644
index 0000000000..fcce353272
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/queue/submit.spec.ts
@@ -0,0 +1,102 @@
+export const description = `
+Stress tests for command submission to GPUQueue objects.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { iterRange } from '../../common/util/util.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('huge_command_buffer')
+  .desc(
+    `Tests submission of huge command buffers to a GPUQueue. Huge buffers are
+encoded by chaining together long sequences of compute passes, with expected
+results verified at the end of the test.`
+  )
+  .fn(t => {
+    const kNumElements = 64;
+    const data = new Uint32Array([...iterRange(kNumElements, x => x)]);
+    const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+    const pipeline = t.device.createComputePipeline({
+      layout: 'auto',
+      compute: {
+        module: t.device.createShaderModule({
+          code: `
+            struct Buffer { data: array<u32>, };
+            @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+            @compute @workgroup_size(1) fn main(
+                @builtin(global_invocation_id) id: vec3<u32>) {
+              buffer.data[id.x] = buffer.data[id.x] + 1u;
+            }
+          `,
+        }),
+        entryPoint: 'main',
+      },
+    });
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [{ binding: 0, resource: { buffer } }],
+    });
+    const encoder = t.device.createCommandEncoder();
+    const kNumIterations = 500_000;
+    for (let i = 0; i < kNumIterations; ++i) {
+      const pass = encoder.beginComputePass();
+      pass.setPipeline(pipeline);
+      pass.setBindGroup(0, bindGroup);
+      pass.dispatchWorkgroups(kNumElements);
+      pass.end();
+    }
+    t.device.queue.submit([encoder.finish()]);
+    t.expectGPUBufferValuesEqual(
+      buffer,
+      new Uint32Array([...iterRange(kNumElements, x => x + kNumIterations)])
+    );
+  });
+
+g.test('many_command_buffers')
+  .desc(
+    `Tests submission of a huge number of command buffers to a GPUQueue by a single
+submit() call.`
+  )
+  .fn(t => {
+    const kNumElements = 64;
+    const data = new Uint32Array([...iterRange(kNumElements, x => x)]);
+    const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+    const pipeline = t.device.createComputePipeline({
+      layout: 'auto',
+      compute: {
+        module: t.device.createShaderModule({
+          code: `
+            struct Buffer { data: array<u32>, };
+            @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+            @compute @workgroup_size(1) fn main(
+                @builtin(global_invocation_id) id: vec3<u32>) {
+              buffer.data[id.x] = buffer.data[id.x] + 1u;
+            }
+          `,
+        }),
+        entryPoint: 'main',
+      },
+    });
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [{ binding: 0, resource: { buffer } }],
+    });
+    const kNumIterations = 500_000;
+    const buffers = [];
+    for (let i = 0; i < kNumIterations; ++i) {
+      const encoder = t.device.createCommandEncoder();
+      const pass = encoder.beginComputePass();
+      pass.setPipeline(pipeline);
+      pass.setBindGroup(0, bindGroup);
+      pass.dispatchWorkgroups(kNumElements);
+      pass.end();
+      buffers.push(encoder.finish());
+    }
+    t.device.queue.submit(buffers);
+    t.expectGPUBufferValuesEqual(
+      buffer,
+      new Uint32Array([...iterRange(kNumElements, x => x + kNumIterations)])
+    );
+  });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/render/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/render/README.txt
new file mode 100644
index 0000000000..7dcc73fbc3
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/render/README.txt
@@ -0,0 +1,3 @@
+Stress tests covering operations specific to GPURenderPipeline, GPURenderPass, and GPURenderBundle.
+
+- Issuing draw calls with huge counts.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/render/render_pass.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/render/render_pass.spec.ts
new file mode 100644
index 0000000000..6d2917a090
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/render/render_pass.spec.ts
@@ -0,0 +1,354 @@
+export const description = `
+Stress tests covering GPURenderPassEncoder usage.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { range } from '../../common/util/util.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('many')
+  .desc(
+    `Tests execution of a huge number of render passes using the same GPURenderPipeline. This uses
+a single render pass for every output fragment, with each pass executing a one-vertex draw call.`
+  )
+  .fn(t => {
+    const kSize = 1024;
+    const module = t.device.createShaderModule({
+      code: `
+    @vertex fn vmain(@builtin(vertex_index) index: u32)
+        -> @builtin(position) vec4<f32> {
+      let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u));
+      let r = vec2<f32>(1.0 / f32(${kSize}));
+      let a = 2.0 * r;
+      let b = r - vec2<f32>(1.0);
+      return vec4<f32>(fma(position, a, b), 0.0, 1.0);
+    }
+    @fragment fn fmain() -> @location(0) vec4<f32> {
+      return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+    }
+    `,
+    });
+    const pipeline = t.device.createRenderPipeline({
+      layout: 'auto',
+      vertex: { module, entryPoint: 'vmain', buffers: [] },
+      primitive: { topology: 'point-list' },
+      fragment: {
+        targets: [{ format: 'rgba8unorm' }],
+        module,
+        entryPoint: 'fmain',
+      },
+    });
+    const renderTarget = t.device.createTexture({
+      size: [kSize, kSize],
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+      format: 'rgba8unorm',
+    });
+    const renderPassDescriptor: GPURenderPassDescriptor = {
+      colorAttachments: [
+        {
+          view: renderTarget.createView(),
+          loadOp: 'load',
+          storeOp: 'store',
+        },
+      ],
+    };
+    const encoder = t.device.createCommandEncoder();
+    range(kSize * kSize, i => {
+      const pass = encoder.beginRenderPass(renderPassDescriptor);
+      pass.setPipeline(pipeline);
+      pass.draw(1, 1, i);
+      pass.end();
+    });
+    t.device.queue.submit([encoder.finish()]);
+    t.expectSingleColor(renderTarget, 'rgba8unorm', {
+      size: [kSize, kSize, 1],
+      exp: { R: 1, G: 0, B: 1, A: 1 },
+    });
+  });
+
+g.test('pipeline_churn')
+  .desc(
+    `Tests execution of a large number of render pipelines, each within its own render pass. Each
+pass does a single draw call, with one pass per output fragment.`
+  )
+  .fn(t => {
+    const kWidth = 64;
+    const kHeight = 8;
+    const module = t.device.createShaderModule({
+      code: `
+    @vertex fn vmain(@builtin(vertex_index) index: u32)
+        -> @builtin(position) vec4<f32> {
+      let position = vec2<f32>(f32(index % ${kWidth}u), f32(index / ${kWidth}u));
+      let size = vec2<f32>(f32(${kWidth}), f32(${kHeight}));
+      let r = vec2<f32>(1.0) / size;
+      let a = 2.0 * r;
+      let b = r - vec2<f32>(1.0);
+      return vec4<f32>(fma(position, a, b), 0.0, 1.0);
+    }
+    @fragment fn fmain() -> @location(0) vec4<f32> {
+      return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+    }
+    `,
+    });
+    const renderTarget = t.device.createTexture({
+      size: [kWidth, kHeight],
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+      format: 'rgba8unorm',
+    });
+    const depthTarget = t.device.createTexture({
+      size: [kWidth, kHeight],
+      usage: GPUTextureUsage.RENDER_ATTACHMENT,
+      format: 'depth24plus-stencil8',
+    });
+    const renderPassDescriptor: GPURenderPassDescriptor = {
+      colorAttachments: [
+        {
+          view: renderTarget.createView(),
+          loadOp: 'load',
+          storeOp: 'store',
+        },
+      ],
+      depthStencilAttachment: {
+        view: depthTarget.createView(),
+        depthLoadOp: 'load',
+        depthStoreOp: 'store',
+        stencilLoadOp: 'load',
+        stencilStoreOp: 'discard',
+      },
+    };
+    const encoder = t.device.createCommandEncoder();
+    range(kWidth * kHeight, i => {
+      const pipeline = t.device.createRenderPipeline({
+        layout: 'auto',
+        vertex: { module, entryPoint: 'vmain', buffers: [] },
+        primitive: { topology: 'point-list' },
+        depthStencil: {
+          format: 'depth24plus-stencil8',
+          depthCompare: 'always',
+          depthWriteEnabled: false,
+          // Not really used, but it ensures that each pipeline is unique.
+          depthBias: i,
+        },
+        fragment: {
+          targets: [{ format: 'rgba8unorm' }],
+          module,
+          entryPoint: 'fmain',
+        },
+      });
+      const pass = encoder.beginRenderPass(renderPassDescriptor);
+      pass.setPipeline(pipeline);
+      pass.draw(1, 1, i);
+      pass.end();
+    });
+    t.device.queue.submit([encoder.finish()]);
+    t.expectSingleColor(renderTarget, 'rgba8unorm', {
+      size: [kWidth, kHeight, 1],
+      exp: { R: 1, G: 0, B: 1, A: 1 },
+    });
+  });
+
+g.test('bind_group_churn')
+  .desc(
+    `Tests execution of render passes which switch between a huge number of bind groups. This uses
+a single render pass with a single pipeline, and one draw call per fragment of the output texture.
+Each draw call is made with a unique bind group 0, with binding 0 referencing a unique uniform
+buffer.`
+  )
+  .fn(t => {
+    const kSize = 128;
+    const module = t.device.createShaderModule({
+      code: `
+    struct Uniforms { index: u32, };
+    @group(0) @binding(0) var<uniform> uniforms: Uniforms;
+    @vertex fn vmain() -> @builtin(position) vec4<f32> {
+      let index = uniforms.index;
+      let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u));
+      let r = vec2<f32>(1.0 / f32(${kSize}));
+      let a = 2.0 * r;
+      let b = r - vec2<f32>(1.0);
+      return vec4<f32>(fma(position, a, b), 0.0, 1.0);
+    }
+    @fragment fn fmain() -> @location(0) vec4<f32> {
+      return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+    }
+    `,
+    });
+    const layout = t.device.createBindGroupLayout({
+      entries: [
+        {
+          binding: 0,
+          visibility: GPUShaderStage.VERTEX,
+          buffer: { type: 'uniform' },
+        },
+      ],
+    });
+    const pipeline = t.device.createRenderPipeline({
+      layout: t.device.createPipelineLayout({ bindGroupLayouts: [layout] }),
+      vertex: { module, entryPoint: 'vmain', buffers: [] },
+      primitive: { topology: 'point-list' },
+      fragment: {
+        targets: [{ format: 'rgba8unorm' }],
+        module,
+        entryPoint: 'fmain',
+      },
+    });
+    const renderTarget = t.device.createTexture({
+      size: [kSize, kSize],
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+      format: 'rgba8unorm',
+    });
+    const renderPassDescriptor: GPURenderPassDescriptor = {
+      colorAttachments: [
+        {
+          view: renderTarget.createView(),
+          loadOp: 'load',
+          storeOp: 'store',
+        },
+      ],
+    };
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginRenderPass(renderPassDescriptor);
+    pass.setPipeline(pipeline);
+    range(kSize * kSize, i => {
+      const buffer = t.device.createBuffer({
+        size: 4,
+        usage: GPUBufferUsage.UNIFORM,
+        mappedAtCreation: true,
+      });
+      new Uint32Array(buffer.getMappedRange())[0] = i;
+      buffer.unmap();
+      pass.setBindGroup(
+        0,
+        t.device.createBindGroup({ layout, entries: [{ binding: 0, resource: { buffer } }] })
+      );
+      pass.draw(1, 1);
+    });
+    pass.end();
+    t.device.queue.submit([encoder.finish()]);
+    t.expectSingleColor(renderTarget, 'rgba8unorm', {
+      size: [kSize, kSize, 1],
+      exp: { R: 1, G: 0, B: 1, A: 1 },
+    });
+  });
+
+g.test('many_draws')
+  .desc(
+    `Tests execution of render passes with a huge number of draw calls. This uses a single
+render pass with a single pipeline, and one draw call per fragment of the output texture.`
+  )
+  .fn(t => {
+    const kSize = 4096;
+    const module = t.device.createShaderModule({
+      code: `
+    @vertex fn vmain(@builtin(vertex_index) index: u32)
+        -> @builtin(position) vec4<f32> {
+      let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u));
+      let r = vec2<f32>(1.0 / f32(${kSize}));
+      let a = 2.0 * r;
+      let b = r - vec2<f32>(1.0);
+      return vec4<f32>(fma(position, a, b), 0.0, 1.0);
+    }
+    @fragment fn fmain() -> @location(0) vec4<f32> {
+      return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+    }
+    `,
+    });
+    const pipeline = t.device.createRenderPipeline({
+      layout: 'auto',
+      vertex: { module, entryPoint: 'vmain', buffers: [] },
+      primitive: { topology: 'point-list' },
+      fragment: {
+        targets: [{ format: 'rgba8unorm' }],
+        module,
+        entryPoint: 'fmain',
+      },
+    });
+    const renderTarget = t.device.createTexture({
+      size: [kSize, kSize],
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+      format: 'rgba8unorm',
+    });
+    const renderPassDescriptor: GPURenderPassDescriptor = {
+      colorAttachments: [
+        {
+          view: renderTarget.createView(),
+          loadOp: 'load',
+          storeOp: 'store',
+        },
+      ],
+    };
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginRenderPass(renderPassDescriptor);
+    pass.setPipeline(pipeline);
+    range(kSize * kSize, i => pass.draw(1, 1, i));
+    pass.end();
+    t.device.queue.submit([encoder.finish()]);
+    t.expectSingleColor(renderTarget, 'rgba8unorm', {
+      size: [kSize, kSize, 1],
+      exp: { R: 1, G: 0, B: 1, A: 1 },
+    });
+  });
+
+g.test('huge_draws')
+  .desc(
+    `Tests execution of several render passes with huge draw calls. Each pass uses a single draw
+call which draws multiple vertices for each fragment of a large output texture.`
+  )
+  .fn(t => {
+    const kSize = 32768;
+    const kTextureSize = 4096;
+    const kVertsPerFragment = (kSize * kSize) / (kTextureSize * kTextureSize);
+    const module = t.device.createShaderModule({
+      code: `
+    @vertex fn vmain(@builtin(vertex_index) vert_index: u32)
+        -> @builtin(position) vec4<f32> {
+      let index = vert_index / ${kVertsPerFragment}u;
+      let position = vec2<f32>(f32(index % ${kTextureSize}u), f32(index / ${kTextureSize}u));
+      let r = vec2<f32>(1.0 / f32(${kTextureSize}));
+      let a = 2.0 * r;
+      let b = r - vec2<f32>(1.0);
+      return vec4<f32>(fma(position, a, b), 0.0, 1.0);
+    }
+    @fragment fn fmain() -> @location(0) vec4<f32> {
+      return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+    }
+    `,
+    });
+    const pipeline = t.device.createRenderPipeline({
+      layout: 'auto',
+      vertex: { module, entryPoint: 'vmain', buffers: [] },
+      primitive: { topology: 'point-list' },
+      fragment: {
+        targets: [{ format: 'rgba8unorm' }],
+        module,
+        entryPoint: 'fmain',
+      },
+    });
+    const renderTarget = t.device.createTexture({
+      size: [kTextureSize, kTextureSize],
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+      format: 'rgba8unorm',
+    });
+    const renderPassDescriptor: GPURenderPassDescriptor = {
+      colorAttachments: [
+        {
+          view: renderTarget.createView(),
+          loadOp: 'load',
+          storeOp: 'store',
+        },
+      ],
+    };
+
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginRenderPass(renderPassDescriptor);
+    pass.setPipeline(pipeline);
+    pass.draw(kSize * kSize);
+    pass.end();
+    t.device.queue.submit([encoder.finish()]);
+    t.expectSingleColor(renderTarget, 'rgba8unorm', {
+      size: [kTextureSize, kTextureSize, 1],
+      exp: { R: 1, G: 0, B: 1, A: 1 },
+    });
+  });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/render/vertex_buffers.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/render/vertex_buffers.spec.ts
new file mode 100644
index 0000000000..e055f96fc4
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/render/vertex_buffers.spec.ts
@@ -0,0 +1,130 @@
+export const description = `
+Stress tests covering vertex buffer usage.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+function createHugeVertexBuffer(t: GPUTest, size: number) {
+  const kBufferSize = size * size * 8;
+  const buffer = t.device.createBuffer({
+    size: kBufferSize,
+    usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
+  });
+  const pipeline = t.device.createComputePipeline({
+    layout: 'auto',
+    compute: {
+      module: t.device.createShaderModule({
+        code: `
+        struct Buffer { data: array<vec2<u32>>, };
+        @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+        @compute @workgroup_size(1) fn main(
+            @builtin(global_invocation_id) id: vec3<u32>) {
+          let base = id.x * ${size}u;
+          for (var x: u32 = 0u; x < ${size}u; x = x + 1u) {
+            buffer.data[base + x] = vec2<u32>(x, id.x);
+          }
+        }
+        `,
+      }),
+      entryPoint: 'main',
+    },
+  });
+  const bindGroup = t.device.createBindGroup({
+    layout: pipeline.getBindGroupLayout(0),
+    entries: [
+      {
+        binding: 0,
+        resource: { buffer },
+      },
+    ],
+  });
+  const encoder = t.device.createCommandEncoder();
+  const pass = encoder.beginComputePass();
+  pass.setPipeline(pipeline);
+  pass.setBindGroup(0, bindGroup);
+  pass.dispatchWorkgroups(size);
+  pass.end();
+
+  const vertexBuffer = t.device.createBuffer({
+    size: kBufferSize,
+    usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST,
+  });
+  encoder.copyBufferToBuffer(buffer, 0, vertexBuffer, 0, kBufferSize);
+  t.device.queue.submit([encoder.finish()]);
+  return vertexBuffer;
+}
+
+g.test('many')
+  .desc(`Tests execution of draw calls using a huge vertex buffer.`)
+  .fn(t => {
+    const kSize = 4096;
+    const buffer = createHugeVertexBuffer(t, kSize);
+    const module = t.device.createShaderModule({
+      code: `
+    @vertex fn vmain(@location(0) position: vec2<u32>)
+        -> @builtin(position) vec4<f32> {
+      let r = vec2<f32>(1.0 / f32(${kSize}));
+      let a = 2.0 * r;
+      let b = r - vec2<f32>(1.0);
+      return vec4<f32>(fma(vec2<f32>(position), a, b), 0.0, 1.0);
+    }
+    @fragment fn fmain() -> @location(0) vec4<f32> {
+      return vec4<f32>(1.0, 0.0, 1.0, 1.0);
+    }
+    `,
+    });
+    const pipeline = t.device.createRenderPipeline({
+      layout: 'auto',
+      vertex: {
+        module,
+        entryPoint: 'vmain',
+        buffers: [
+          {
+            arrayStride: 8,
+            attributes: [
+              {
+                format: 'uint32x2',
+                offset: 0,
+                shaderLocation: 0,
+              },
+            ],
+          },
+        ],
+      },
+      primitive: { topology: 'point-list' },
+      fragment: {
+        targets: [{ format: 'rgba8unorm' }],
+        module,
+        entryPoint: 'fmain',
+      },
+    });
+    const renderTarget = t.device.createTexture({
+      size: [kSize, kSize],
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+      format: 'rgba8unorm',
+    });
+    const renderPassDescriptor: GPURenderPassDescriptor = {
+      colorAttachments: [
+        {
+          view: renderTarget.createView(),
+          loadOp: 'load',
+          storeOp: 'store',
+        },
+      ],
+    };
+
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginRenderPass(renderPassDescriptor);
+    pass.setPipeline(pipeline);
+    pass.setVertexBuffer(0, buffer);
+    pass.draw(kSize * kSize);
+    pass.end();
+    t.device.queue.submit([encoder.finish()]);
+    t.expectSingleColor(renderTarget, 'rgba8unorm', {
+      size: [kSize, kSize, 1],
+      exp: { R: 1, G: 0, B: 1, A: 1 },
+    });
+  });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/shaders/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/shaders/README.txt
new file mode 100644
index 0000000000..628b4e86fa
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/shaders/README.txt
@@ -0,0 +1 @@
+Stress tests covering very long-running and/or resource-intensive shaders.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/shaders/entry_points.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/shaders/entry_points.spec.ts
new file mode 100644
index 0000000000..313c79a8c3
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/shaders/entry_points.spec.ts
@@ -0,0 +1,78 @@
+export const description = `
+Stress tests covering behavior around shader entry points.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { range } from '../../common/util/util.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+const makeCode = (numEntryPoints: number) => {
+  const kBaseCode = `
+      struct Buffer { data: u32, };
+      @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+      fn main() { buffer.data = buffer.data + 1u;  }
+      `;
+  const makeEntryPoint = (i: number) => `
+      @compute @workgroup_size(1) fn computeMain${i}() { main(); }
+      `;
+  return kBaseCode + range(numEntryPoints, makeEntryPoint).join('');
+};
+
+g.test('many')
+  .desc(
+    `Tests compilation and usage of shaders with a huge number of entry points.
+
+TODO: There may be a normative limit to the number of entry points allowed in
+a shader, in which case this would become a validation test instead.`
+  )
+  .fn(t => {
+    const data = new Uint32Array([0]);
+    const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+
+    // NOTE: Initial shader compilation time seems to scale exponentially with
+    // this value in Chrome.
+    const kNumEntryPoints = 200;
+
+    const shader = t.device.createShaderModule({
+      code: makeCode(kNumEntryPoints),
+    });
+
+    const layout = t.device.createBindGroupLayout({
+      entries: [
+        {
+          binding: 0,
+          visibility: GPUShaderStage.COMPUTE,
+          buffer: { type: 'storage' },
+        },
+      ],
+    });
+    const pipelineLayout = t.device.createPipelineLayout({
+      bindGroupLayouts: [layout],
+    });
+    const bindGroup = t.device.createBindGroup({
+      layout,
+      entries: [{ binding: 0, resource: { buffer } }],
+    });
+
+    const encoder = t.device.createCommandEncoder();
+    range(kNumEntryPoints, i => {
+      const pipeline = t.device.createComputePipeline({
+        layout: pipelineLayout,
+        compute: {
+          module: shader,
+          entryPoint: `computeMain${i}`,
+        },
+      });
+
+      const pass = encoder.beginComputePass();
+      pass.setPipeline(pipeline);
+      pass.setBindGroup(0, bindGroup);
+      pass.dispatchWorkgroups(1);
+      pass.end();
+    });
+
+    t.device.queue.submit([encoder.finish()]);
+    t.expectGPUBufferValuesEqual(buffer, new Uint32Array([kNumEntryPoints]));
+  });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/shaders/non_halting.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/shaders/non_halting.spec.ts
new file mode 100644
index 0000000000..b88aa083b3
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/shaders/non_halting.spec.ts
@@ -0,0 +1,194 @@
+export const description = `
+Stress tests covering robustness in the presence of non-halting shaders.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('compute')
+  .desc(
+    `Tests execution of compute passes with non-halting dispatch operations.
+
+This is expected to hang for a bit, but it should ultimately result in graceful
+device loss.`
+  )
+  .fn(async t => {
+    const data = new Uint32Array([0]);
+    const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+    const module = t.device.createShaderModule({
+      code: `
+        struct Buffer { data: u32, };
+        @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+        @compute @workgroup_size(1) fn main() {
+          loop {
+            if (buffer.data == 1u) {
+              break;
+            }
+            buffer.data = buffer.data + 2u;
+          }
+        }
+      `,
+    });
+    const pipeline = t.device.createComputePipeline({
+      layout: 'auto',
+      compute: { module, entryPoint: 'main' },
+    });
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginComputePass();
+    pass.setPipeline(pipeline);
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [{ binding: 0, resource: { buffer } }],
+    });
+    pass.setBindGroup(0, bindGroup);
+    pass.dispatchWorkgroups(1);
+    pass.end();
+    t.device.queue.submit([encoder.finish()]);
+    await t.device.lost;
+  });
+
+g.test('vertex')
+  .desc(
+    `Tests execution of render passes with a non-halting vertex stage.
+
+This is expected to hang for a bit, but it should ultimately result in graceful
+device loss.`
+  )
+  .fn(async t => {
+    const module = t.device.createShaderModule({
+      code: `
+        struct Data { counter: u32, increment: u32, };
+        @group(0) @binding(0) var<uniform> data: Data;
+        @vertex fn vmain() -> @builtin(position) vec4<f32> {
+          var counter: u32 = data.counter;
+          loop {
+            if (counter % 2u == 1u) {
+              break;
+            }
+            counter = counter + data.increment;
+          }
+          return vec4<f32>(1.0, 1.0, 0.0, f32(counter));
+        }
+        @fragment fn fmain() -> @location(0) vec4<f32> {
+          return vec4<f32>(1.0);
+        }
+      `,
+    });
+
+    const pipeline = t.device.createRenderPipeline({
+      layout: 'auto',
+      vertex: { module, entryPoint: 'vmain', buffers: [] },
+      primitive: { topology: 'point-list' },
+      fragment: {
+        targets: [{ format: 'rgba8unorm' }],
+        module,
+        entryPoint: 'fmain',
+      },
+    });
+    const uniforms = t.makeBufferWithContents(new Uint32Array([0, 2]), GPUBufferUsage.UNIFORM);
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [
+        {
+          binding: 0,
+          resource: { buffer: uniforms },
+        },
+      ],
+    });
+    const renderTarget = t.device.createTexture({
+      size: [1, 1],
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+      format: 'rgba8unorm',
+    });
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginRenderPass({
+      colorAttachments: [
+        {
+          view: renderTarget.createView(),
+          clearValue: [0, 0, 0, 0],
+          loadOp: 'clear',
+          storeOp: 'store',
+        },
+      ],
+    });
+    pass.setPipeline(pipeline);
+    pass.setBindGroup(0, bindGroup);
+    pass.draw(1);
+    pass.end();
+    t.device.queue.submit([encoder.finish()]);
+    await t.device.lost;
+  });
+
+g.test('fragment')
+  .desc(
+    `Tests execution of render passes with a non-halting fragment stage.
+
+This is expected to hang for a bit, but it should ultimately result in graceful
+device loss.`
+  )
+  .fn(async t => {
+    const module = t.device.createShaderModule({
+      code: `
+        struct Data { counter: u32, increment: u32, };
+        @group(0) @binding(0) var<uniform> data: Data;
+        @vertex fn vmain() -> @builtin(position) vec4<f32> {
+          return vec4<f32>(0.0, 0.0, 0.0, 1.0);
+        }
+        @fragment fn fmain() -> @location(0) vec4<f32> {
+          var counter: u32 = data.counter;
+          loop {
+            if (counter % 2u == 1u) {
+              break;
+            }
+            counter = counter + data.increment;
+          }
+          return vec4<f32>(1.0 / f32(counter), 0.0, 0.0, 1.0);
+        }
+      `,
+    });
+
+    const pipeline = t.device.createRenderPipeline({
+      layout: 'auto',
+      vertex: { module, entryPoint: 'vmain', buffers: [] },
+      primitive: { topology: 'point-list' },
+      fragment: {
+        targets: [{ format: 'rgba8unorm' }],
+        module,
+        entryPoint: 'fmain',
+      },
+    });
+    const uniforms = t.makeBufferWithContents(new Uint32Array([0, 2]), GPUBufferUsage.UNIFORM);
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [
+        {
+          binding: 0,
+          resource: { buffer: uniforms },
+        },
+      ],
+    });
+    const renderTarget = t.device.createTexture({
+      size: [1, 1],
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+      format: 'rgba8unorm',
+    });
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginRenderPass({
+      colorAttachments: [
+        {
+          view: renderTarget.createView(),
+          clearValue: [0, 0, 0, 0],
+          loadOp: 'clear',
+          storeOp: 'store',
+        },
+      ],
+    });
+    pass.setPipeline(pipeline);
+    pass.setBindGroup(0, bindGroup);
+    pass.draw(1);
+    pass.end();
+    t.device.queue.submit([encoder.finish()]);
+    await t.device.lost;
+  });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/shaders/slow.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/shaders/slow.spec.ts
new file mode 100644
index 0000000000..8e354105b6
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/shaders/slow.spec.ts
@@ -0,0 +1,191 @@
+export const description = `
+Stress tests covering robustness in the presence of slow shaders.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest, TextureTestMixin } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(TextureTestMixin(GPUTest));
+
+g.test('compute')
+  .desc(`Tests execution of compute passes with very long-running dispatch operations.`)
+  .fn(t => {
+    const kDispatchSize = 1000;
+    const data = new Uint32Array(kDispatchSize);
+    const buffer = t.makeBufferWithContents(data, GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC);
+    const module = t.device.createShaderModule({
+      code: `
+        struct Buffer { data: array<u32>, };
+        @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
+        @compute @workgroup_size(1) fn main(
+            @builtin(global_invocation_id) id: vec3<u32>) {
+          loop {
+            if (buffer.data[id.x] == 1000000u) {
+              break;
+            }
+            buffer.data[id.x] = buffer.data[id.x] + 1u;
+          }
+        }
+      `,
+    });
+    const pipeline = t.device.createComputePipeline({
+      layout: 'auto',
+      compute: { module, entryPoint: 'main' },
+    });
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginComputePass();
+    pass.setPipeline(pipeline);
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [{ binding: 0, resource: { buffer } }],
+    });
+    pass.setBindGroup(0, bindGroup);
+    pass.dispatchWorkgroups(kDispatchSize);
+    pass.end();
+    t.device.queue.submit([encoder.finish()]);
+    t.expectGPUBufferValuesEqual(buffer, new Uint32Array(new Array(kDispatchSize).fill(1000000)));
+  });
+
+g.test('vertex')
+  .desc(`Tests execution of render passes with a very long-running vertex stage.`)
+  .fn(t => {
+    const module = t.device.createShaderModule({
+      code: `
+        struct Data { counter: u32, increment: u32, };
+        @group(0) @binding(0) var<uniform> data: Data;
+        @vertex fn vmain() -> @builtin(position) vec4<f32> {
+          var counter: u32 = data.counter;
+          loop {
+            counter = counter + data.increment;
+            if (counter % 50000000u == 0u) {
+              break;
+            }
+          }
+          return vec4<f32>(1.0, 1.0, 0.0, f32(counter));
+        }
+        @fragment fn fmain() -> @location(0) vec4<f32> {
+          return vec4<f32>(1.0, 1.0, 0.0, 1.0);
+        }
+      `,
+    });
+
+    const pipeline = t.device.createRenderPipeline({
+      layout: 'auto',
+      vertex: { module, entryPoint: 'vmain', buffers: [] },
+      primitive: { topology: 'point-list' },
+      fragment: {
+        targets: [{ format: 'rgba8unorm' }],
+        module,
+        entryPoint: 'fmain',
+      },
+    });
+    const uniforms = t.makeBufferWithContents(new Uint32Array([0, 1]), GPUBufferUsage.UNIFORM);
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [
+        {
+          binding: 0,
+          resource: { buffer: uniforms },
+        },
+      ],
+    });
+    const renderTarget = t.device.createTexture({
+      size: [3, 3],
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+      format: 'rgba8unorm',
+    });
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginRenderPass({
+      colorAttachments: [
+        {
+          view: renderTarget.createView(),
+          clearValue: [0, 0, 0, 0],
+          loadOp: 'clear',
+          storeOp: 'store',
+        },
+      ],
+    });
+    pass.setPipeline(pipeline);
+    pass.setBindGroup(0, bindGroup);
+    pass.draw(1);
+    pass.end();
+    t.device.queue.submit([encoder.finish()]);
+    t.expectSinglePixelComparisonsAreOkInTexture({ texture: renderTarget }, [
+      {
+        coord: { x: 1, y: 1 },
+        exp: new Uint8Array([255, 255, 0, 255]),
+      },
+    ]);
+  });
+
+g.test('fragment')
+  .desc(`Tests execution of render passes with a very long-running fragment stage.`)
+  .fn(t => {
+    const module = t.device.createShaderModule({
+      code: `
+        struct Data { counter: u32, increment: u32, };
+        @group(0) @binding(0) var<uniform> data: Data;
+        @vertex fn vmain() -> @builtin(position) vec4<f32> {
+          return vec4<f32>(0.0, 0.0, 0.0, 1.0);
+        }
+        @fragment fn fmain() -> @location(0) vec4<f32> {
+          var counter: u32 = data.counter;
+          loop {
+            counter = counter + data.increment;
+            if (counter % 50000000u == 0u) {
+              break;
+            }
+          }
+          return vec4<f32>(1.0, 1.0, 1.0 / f32(counter), 1.0);
+        }
+      `,
+    });
+
+    const pipeline = t.device.createRenderPipeline({
+      layout: 'auto',
+      vertex: { module, entryPoint: 'vmain', buffers: [] },
+      primitive: { topology: 'point-list' },
+      fragment: {
+        targets: [{ format: 'rgba8unorm' }],
+        module,
+        entryPoint: 'fmain',
+      },
+    });
+    const uniforms = t.makeBufferWithContents(new Uint32Array([0, 1]), GPUBufferUsage.UNIFORM);
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [
+        {
+          binding: 0,
+          resource: { buffer: uniforms },
+        },
+      ],
+    });
+    const renderTarget = t.device.createTexture({
+      size: [3, 3],
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+      format: 'rgba8unorm',
+    });
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginRenderPass({
+      colorAttachments: [
+        {
+          view: renderTarget.createView(),
+          clearValue: [0, 0, 0, 0],
+          loadOp: 'clear',
+          storeOp: 'store',
+        },
+      ],
+    });
+    pass.setPipeline(pipeline);
+    pass.setBindGroup(0, bindGroup);
+    pass.draw(1);
+    pass.end();
+    t.device.queue.submit([encoder.finish()]);
+    t.expectSinglePixelComparisonsAreOkInTexture({ texture: renderTarget }, [
+      {
+        coord: { x: 1, y: 1 },
+        exp: new Uint8Array([255, 255, 0, 255]),
+      },
+    ]);
+  });
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/texture/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/texture/README.txt
new file mode 100644
index 0000000000..db40963b2e
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/texture/README.txt
@@ -0,0 +1 @@
+Stress tests covering texture usage.
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/texture/large.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/texture/large.spec.ts
new file mode 100644
index 0000000000..cba2053d38
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/stress/texture/large.spec.ts
@@ -0,0 +1,56 @@
+export const description = `
+Stress tests covering usage of very large textures.
+`;
+
+import { makeTestGroup } from '../../common/framework/test_group.js';
+import { GPUTest } from '../../webgpu/gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('loading,2d')
+  .desc(
+    `Tests execution of shaders loading values from very large (up to at least
+8192x8192) 2D textures. The texture size is selected according to the limit
+supported by the GPUDevice.`
+  )
+  .unimplemented();
+
+g.test('loading,2d_array')
+  .desc(
+    `Tests execution of shaders loading values from very large (up to at least
+8192x8192x2048) arrays of 2D textures. The texture and array size is selected
+according to limits supported by the GPUDevice.`
+  )
+  .unimplemented();
+
+g.test('loading,3d')
+  .desc(
+    `Tests execution of shaders loading values from very large (up to at least
+2048x2048x2048) textures. The texture size is selected according to the limit
+supported by the GPUDevice.`
+  )
+  .unimplemented();
+
+g.test('sampling,2d')
+  .desc(
+    `Tests execution of shaders sampling values from very large (up to at least
+8192x8192) 2D textures. The texture size is selected according to the limit
+supported by the GPUDevice.`
+  )
+  .unimplemented();
+
+g.test('sampling,2d_array')
+  .desc(
+    `Tests execution of shaders sampling values from very large (up to at least
+8192x8192x2048) arrays of 2D textures. The texture and array size is selected
+according to limits supported by the GPUDevice.`
+  )
+  .unimplemented();
+
+g.test('sampling,3d')
+  .desc(
+    `Tests execution of shaders sampling values from very large (up to at least
+2048x2048x2048) textures. The texture size is selected according to the limit
+supported by the GPUDevice.`
+  )
+  .unimplemented();