diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /dom/webgpu/tests/cts/checkout/src/stress/adapter | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/webgpu/tests/cts/checkout/src/stress/adapter')
-rw-r--r-- | dom/webgpu/tests/cts/checkout/src/stress/adapter/README.txt | 1 | ||||
-rw-r--r-- | dom/webgpu/tests/cts/checkout/src/stress/adapter/device_allocation.spec.ts | 290 |
2 files changed, 291 insertions, 0 deletions
diff --git a/dom/webgpu/tests/cts/checkout/src/stress/adapter/README.txt b/dom/webgpu/tests/cts/checkout/src/stress/adapter/README.txt new file mode 100644 index 0000000000..3a57f3d87f --- /dev/null +++ b/dom/webgpu/tests/cts/checkout/src/stress/adapter/README.txt @@ -0,0 +1 @@ +Stress tests covering use of GPUAdapter. diff --git a/dom/webgpu/tests/cts/checkout/src/stress/adapter/device_allocation.spec.ts b/dom/webgpu/tests/cts/checkout/src/stress/adapter/device_allocation.spec.ts new file mode 100644 index 0000000000..184b4e8170 --- /dev/null +++ b/dom/webgpu/tests/cts/checkout/src/stress/adapter/device_allocation.spec.ts @@ -0,0 +1,290 @@ +export const description = ` +Stress tests for GPUAdapter.requestDevice. +`; + +import { Fixture } from '../../common/framework/fixture.js'; +import { makeTestGroup } from '../../common/framework/test_group.js'; +import { attemptGarbageCollection } from '../../common/util/collect_garbage.js'; +import { keysOf } from '../../common/util/data_tables.js'; +import { getGPU } from '../../common/util/navigator_gpu.js'; +import { assert, iterRange } from '../../common/util/util.js'; +import { kLimitInfo } from '../../webgpu/capability_info.js'; + +export const g = makeTestGroup(Fixture); + +/** Adapter preference identifier to option. */ +const kAdapterTypeOptions: { + readonly [k in GPUPowerPreference | 'fallback']: GPURequestAdapterOptions; +} = /* prettier-ignore */ { + 'low-power': { powerPreference: 'low-power', forceFallbackAdapter: false }, + 'high-performance': { powerPreference: 'high-performance', forceFallbackAdapter: false }, + 'fallback': { powerPreference: undefined, forceFallbackAdapter: true }, +}; +/** List of all adapter hint types. */ +const kAdapterTypes = keysOf(kAdapterTypeOptions); + +/** + * Creates a device, a valid compute pipeline, valid resources for the pipeline, and + * ties them together into a set of compute commands ready to be submitted to the GPU + * queue. Does not submit the commands in order to make sure that all resources are + * kept alive until the device is destroyed. + */ +async function createDeviceAndComputeCommands(adapter: GPUAdapter) { + // Constants are computed such that per run, this function should allocate roughly 2G + // worth of data. This should be sufficient as we run these creation functions many + // times. If the data backing the created objects is not recycled we should OOM. + const kNumPipelines = 64; + const kNumBindgroups = 128; + const kNumBufferElements = + kLimitInfo.maxComputeWorkgroupSizeX.default * kLimitInfo.maxComputeWorkgroupSizeY.default; + const kBufferSize = kNumBufferElements * 4; + const kBufferData = new Uint32Array([...iterRange(kNumBufferElements, x => x)]); + + const device: GPUDevice = await adapter.requestDevice(); + const commands = []; + + for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) { + const pipeline = device.createComputePipeline({ + layout: 'auto', + compute: { + module: device.createShaderModule({ + code: ` + struct Buffer { data: array<u32>, }; + + @group(0) @binding(0) var<storage, read_write> buffer: Buffer; + @compute @workgroup_size(1) fn main( + @builtin(global_invocation_id) id: vec3<u32>) { + buffer.data[id.x * ${kLimitInfo.maxComputeWorkgroupSizeX.default}u + id.y] = + buffer.data[id.x * ${kLimitInfo.maxComputeWorkgroupSizeX.default}u + id.y] + + ${pipelineIndex}u; + } + `, + }), + entryPoint: 'main', + }, + }); + for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) { + const buffer = device.createBuffer({ + size: kBufferSize, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC, + }); + device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length); + const bindgroup = device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [{ binding: 0, resource: { buffer } }], + }); + + const encoder = device.createCommandEncoder(); + const pass = encoder.beginComputePass(); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bindgroup); + pass.dispatchWorkgroups( + kLimitInfo.maxComputeWorkgroupSizeX.default, + kLimitInfo.maxComputeWorkgroupSizeY.default + ); + pass.end(); + commands.push(encoder.finish()); + } + } + return { device, objects: commands }; +} + +/** + * Creates a device, a valid render pipeline, valid resources for the pipeline, and + * ties them together into a set of render commands ready to be submitted to the GPU + * queue. Does not submit the commands in order to make sure that all resources are + * kept alive until the device is destroyed. + */ +async function createDeviceAndRenderCommands(adapter: GPUAdapter) { + // Constants are computed such that per run, this function should allocate roughly 2G + // worth of data. This should be sufficient as we run these creation functions many + // times. If the data backing the created objects is not recycled we should OOM. + const kNumPipelines = 128; + const kNumBindgroups = 128; + const kSize = 128; + const kBufferData = new Uint32Array([...iterRange(kSize * kSize, x => x)]); + + const device: GPUDevice = await adapter.requestDevice(); + const commands = []; + + for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) { + const module = device.createShaderModule({ + code: ` + struct Buffer { data: array<vec4<u32>, ${(kSize * kSize) / 4}>, }; + + @group(0) @binding(0) var<uniform> buffer: Buffer; + @vertex fn vmain( + @builtin(vertex_index) vertexIndex: u32 + ) -> @builtin(position) vec4<f32> { + let index = buffer.data[vertexIndex / 4u][vertexIndex % 4u]; + let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u)); + let r = vec2<f32>(1.0 / f32(${kSize})); + let a = 2.0 * r; + let b = r - vec2<f32>(1.0); + return vec4<f32>(fma(position, a, b), 0.0, 1.0); + } + + @fragment fn fmain() -> @location(0) vec4<f32> { + return vec4<f32>(${pipelineIndex}.0 / ${kNumPipelines}.0, 0.0, 0.0, 1.0); + } + `, + }); + const pipeline = device.createRenderPipeline({ + layout: device.createPipelineLayout({ + bindGroupLayouts: [ + device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.VERTEX, + buffer: { type: 'uniform' }, + }, + ], + }), + ], + }), + vertex: { module, entryPoint: 'vmain', buffers: [] }, + primitive: { topology: 'point-list' }, + fragment: { + targets: [{ format: 'rgba8unorm' }], + module, + entryPoint: 'fmain', + }, + }); + for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) { + const buffer = device.createBuffer({ + size: kSize * kSize * 4, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, + }); + device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length); + const bindgroup = device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [{ binding: 0, resource: { buffer } }], + }); + const texture = device.createTexture({ + size: [kSize, kSize], + usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC, + format: 'rgba8unorm', + }); + + const encoder = device.createCommandEncoder(); + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: texture.createView(), + loadOp: 'load', + storeOp: 'store', + }, + ], + }); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bindgroup); + pass.draw(kSize * kSize); + pass.end(); + commands.push(encoder.finish()); + } + } + return { device, objects: commands }; +} + +/** + * Creates a device and a large number of buffers which are immediately written to. The + * buffers are expected to be kept alive until they or the device are destroyed. + */ +async function createDeviceAndBuffers(adapter: GPUAdapter) { + // Currently we just allocate 2G of memory using 512MB blocks. We may be able to + // increase this to hit OOM instead, but on integrated GPUs on Metal, this can cause + // kernel panics at the moment, and it can greatly increase the time needed. + const kTotalMemorySize = 2 * 1024 * 1024 * 1024; + const kMemoryBlockSize = 512 * 1024 * 1024; + const kMemoryBlockData = new Uint8Array(kMemoryBlockSize); + + const device: GPUDevice = await adapter.requestDevice(); + const buffers = []; + for (let memory = 0; memory < kTotalMemorySize; memory += kMemoryBlockSize) { + const buffer = device.createBuffer({ + size: kMemoryBlockSize, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST, + }); + + // Write out to the buffer to make sure that it has backing memory. + device.queue.writeBuffer(buffer, 0, kMemoryBlockData, 0, kMemoryBlockData.length); + buffers.push(buffer); + } + return { device, objects: buffers }; +} + +g.test('coexisting') + .desc(`Tests allocation of many coexisting GPUDevice objects.`) + .params(u => u.combine('adapterType', kAdapterTypes)) + .fn(async t => { + const { adapterType } = t.params; + const adapter = await getGPU().requestAdapter(kAdapterTypeOptions[adapterType]); + assert(adapter !== null, 'Failed to get adapter.'); + + // Based on Vulkan conformance test requirement to be able to create multiple devices. + const kNumDevices = 5; + + const devices = []; + for (let i = 0; i < kNumDevices; ++i) { + const device: GPUDevice = await adapter.requestDevice(); + devices.push(device); + } + }); + +g.test('continuous,with_destroy') + .desc( + `Tests allocation and destruction of many GPUDevice objects over time. Device objects +are sequentially requested with a series of device allocated objects created on each +device. The devices are then destroyed to verify that the device and the device allocated +objects are recycled over a very large number of iterations.` + ) + .params(u => u.combine('adapterType', kAdapterTypes)) + .fn(async t => { + const { adapterType } = t.params; + const adapter = await getGPU().requestAdapter(kAdapterTypeOptions[adapterType]); + assert(adapter !== null, 'Failed to get adapter.'); + + // Since devices are being destroyed, we should be able to create many devices. + const kNumDevices = 100; + const kFunctions = [ + createDeviceAndBuffers, + createDeviceAndComputeCommands, + createDeviceAndRenderCommands, + ]; + + const deviceList = []; + const objectLists = []; + for (let i = 0; i < kNumDevices; ++i) { + const { device, objects } = await kFunctions[i % kFunctions.length](adapter); + t.expect(objects.length > 0, 'unable to allocate any objects'); + deviceList.push(device); + objectLists.push(objects); + device.destroy(); + } + }); + +g.test('continuous,no_destroy') + .desc( + `Tests allocation and implicit GC of many GPUDevice objects over time. Objects are +sequentially requested and dropped for GC over a very large number of iterations. Note +that without destroy, we do not create device allocated objects because that will +implicitly keep the device in scope.` + ) + .params(u => u.combine('adapterType', kAdapterTypes)) + .fn(async t => { + const { adapterType } = t.params; + const adapter = await getGPU().requestAdapter(kAdapterTypeOptions[adapterType]); + assert(adapter !== null, 'Failed to get adapter.'); + + const kNumDevices = 10_000; + for (let i = 1; i <= kNumDevices; ++i) { + await (async () => { + t.expect((await adapter.requestDevice()) !== null, 'unexpected null device'); + })(); + if (i % 10 === 0) { + // We need to occasionally wait for GC to clear out stale devices. + await attemptGarbageCollection(); + } + } + }); |