dom/webgpu/tests/cts/checkout/src/stress/adapter/device_allocation.spec.ts


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290

export const description = `
Stress tests for GPUAdapter.requestDevice.
`;

import { Fixture } from '../../common/framework/fixture.js';
import { makeTestGroup } from '../../common/framework/test_group.js';
import { attemptGarbageCollection } from '../../common/util/collect_garbage.js';
import { keysOf } from '../../common/util/data_tables.js';
import { getGPU } from '../../common/util/navigator_gpu.js';
import { assert, iterRange } from '../../common/util/util.js';
import { kLimitInfo } from '../../webgpu/capability_info.js';

export const g = makeTestGroup(Fixture);

/** Adapter preference identifier to option. */
const kAdapterTypeOptions: {
  readonly [k in GPUPowerPreference | 'fallback']: GPURequestAdapterOptions;
} = /* prettier-ignore */ {
  'low-power':        { powerPreference:        'low-power', forceFallbackAdapter: false },
  'high-performance': { powerPreference: 'high-performance', forceFallbackAdapter: false },
  'fallback':         { powerPreference:          undefined, forceFallbackAdapter:  true },
};
/** List of all adapter hint types. */
const kAdapterTypes = keysOf(kAdapterTypeOptions);

/**
 * Creates a device, a valid compute pipeline, valid resources for the pipeline, and
 * ties them together into a set of compute commands ready to be submitted to the GPU
 * queue. Does not submit the commands in order to make sure that all resources are
 * kept alive until the device is destroyed.
 */
async function createDeviceAndComputeCommands(adapter: GPUAdapter) {
  // Constants are computed such that per run, this function should allocate roughly 2G
  // worth of data. This should be sufficient as we run these creation functions many
  // times. If the data backing the created objects is not recycled we should OOM.
  const kNumPipelines = 64;
  const kNumBindgroups = 128;
  const kNumBufferElements =
    kLimitInfo.maxComputeWorkgroupSizeX.default * kLimitInfo.maxComputeWorkgroupSizeY.default;
  const kBufferSize = kNumBufferElements * 4;
  const kBufferData = new Uint32Array([...iterRange(kNumBufferElements, x => x)]);

  const device: GPUDevice = await adapter.requestDevice();
  const commands = [];

  for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) {
    const pipeline = device.createComputePipeline({
      layout: 'auto',
      compute: {
        module: device.createShaderModule({
          code: `
              struct Buffer { data: array<u32>, };

              @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
              @compute @workgroup_size(1) fn main(
                  @builtin(global_invocation_id) id: vec3<u32>) {
                buffer.data[id.x * ${kLimitInfo.maxComputeWorkgroupSizeX.default}u + id.y] =
                  buffer.data[id.x * ${kLimitInfo.maxComputeWorkgroupSizeX.default}u + id.y] +
                    ${pipelineIndex}u;
              }
            `,
        }),
        entryPoint: 'main',
      },
    });
    for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) {
      const buffer = device.createBuffer({
        size: kBufferSize,
        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
      });
      device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length);
      const bindgroup = device.createBindGroup({
        layout: pipeline.getBindGroupLayout(0),
        entries: [{ binding: 0, resource: { buffer } }],
      });

      const encoder = device.createCommandEncoder();
      const pass = encoder.beginComputePass();
      pass.setPipeline(pipeline);
      pass.setBindGroup(0, bindgroup);
      pass.dispatchWorkgroups(
        kLimitInfo.maxComputeWorkgroupSizeX.default,
        kLimitInfo.maxComputeWorkgroupSizeY.default
      );
      pass.end();
      commands.push(encoder.finish());
    }
  }
  return { device, objects: commands };
}

/**
 * Creates a device, a valid render pipeline, valid resources for the pipeline, and
 * ties them together into a set of render commands ready to be submitted to the GPU
 * queue. Does not submit the commands in order to make sure that all resources are
 * kept alive until the device is destroyed.
 */
async function createDeviceAndRenderCommands(adapter: GPUAdapter) {
  // Constants are computed such that per run, this function should allocate roughly 2G
  // worth of data. This should be sufficient as we run these creation functions many
  // times. If the data backing the created objects is not recycled we should OOM.
  const kNumPipelines = 128;
  const kNumBindgroups = 128;
  const kSize = 128;
  const kBufferData = new Uint32Array([...iterRange(kSize * kSize, x => x)]);

  const device: GPUDevice = await adapter.requestDevice();
  const commands = [];

  for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) {
    const module = device.createShaderModule({
      code: `
          struct Buffer { data: array<vec4<u32>, ${(kSize * kSize) / 4}>, };

          @group(0) @binding(0) var<uniform> buffer: Buffer;
          @vertex fn vmain(
            @builtin(vertex_index) vertexIndex: u32
          ) -> @builtin(position) vec4<f32> {
            let index = buffer.data[vertexIndex / 4u][vertexIndex % 4u];
            let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u));
            let r = vec2<f32>(1.0 / f32(${kSize}));
            let a = 2.0 * r;
            let b = r - vec2<f32>(1.0);
            return vec4<f32>(fma(position, a, b), 0.0, 1.0);
          }

          @fragment fn fmain() -> @location(0) vec4<f32> {
            return vec4<f32>(${pipelineIndex}.0 / ${kNumPipelines}.0, 0.0, 0.0, 1.0);
          }
        `,
    });
    const pipeline = device.createRenderPipeline({
      layout: device.createPipelineLayout({
        bindGroupLayouts: [
          device.createBindGroupLayout({
            entries: [
              {
                binding: 0,
                visibility: GPUShaderStage.VERTEX,
                buffer: { type: 'uniform' },
              },
            ],
          }),
        ],
      }),
      vertex: { module, entryPoint: 'vmain', buffers: [] },
      primitive: { topology: 'point-list' },
      fragment: {
        targets: [{ format: 'rgba8unorm' }],
        module,
        entryPoint: 'fmain',
      },
    });
    for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) {
      const buffer = device.createBuffer({
        size: kSize * kSize * 4,
        usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
      });
      device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length);
      const bindgroup = device.createBindGroup({
        layout: pipeline.getBindGroupLayout(0),
        entries: [{ binding: 0, resource: { buffer } }],
      });
      const texture = device.createTexture({
        size: [kSize, kSize],
        usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
        format: 'rgba8unorm',
      });

      const encoder = device.createCommandEncoder();
      const pass = encoder.beginRenderPass({
        colorAttachments: [
          {
            view: texture.createView(),
            loadOp: 'load',
            storeOp: 'store',
          },
        ],
      });
      pass.setPipeline(pipeline);
      pass.setBindGroup(0, bindgroup);
      pass.draw(kSize * kSize);
      pass.end();
      commands.push(encoder.finish());
    }
  }
  return { device, objects: commands };
}

/**
 * Creates a device and a large number of buffers which are immediately written to. The
 * buffers are expected to be kept alive until they or the device are destroyed.
 */
async function createDeviceAndBuffers(adapter: GPUAdapter) {
  // Currently we just allocate 2G of memory using 512MB blocks. We may be able to
  // increase this to hit OOM instead, but on integrated GPUs on Metal, this can cause
  // kernel panics at the moment, and it can greatly increase the time needed.
  const kTotalMemorySize = 2 * 1024 * 1024 * 1024;
  const kMemoryBlockSize = 512 * 1024 * 1024;
  const kMemoryBlockData = new Uint8Array(kMemoryBlockSize);

  const device: GPUDevice = await adapter.requestDevice();
  const buffers = [];
  for (let memory = 0; memory < kTotalMemorySize; memory += kMemoryBlockSize) {
    const buffer = device.createBuffer({
      size: kMemoryBlockSize,
      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
    });

    // Write out to the buffer to make sure that it has backing memory.
    device.queue.writeBuffer(buffer, 0, kMemoryBlockData, 0, kMemoryBlockData.length);
    buffers.push(buffer);
  }
  return { device, objects: buffers };
}

g.test('coexisting')
  .desc(`Tests allocation of many coexisting GPUDevice objects.`)
  .params(u => u.combine('adapterType', kAdapterTypes))
  .fn(async t => {
    const { adapterType } = t.params;
    const adapter = await getGPU().requestAdapter(kAdapterTypeOptions[adapterType]);
    assert(adapter !== null, 'Failed to get adapter.');

    // Based on Vulkan conformance test requirement to be able to create multiple devices.
    const kNumDevices = 5;

    const devices = [];
    for (let i = 0; i < kNumDevices; ++i) {
      const device: GPUDevice = await adapter.requestDevice();
      devices.push(device);
    }
  });

g.test('continuous,with_destroy')
  .desc(
    `Tests allocation and destruction of many GPUDevice objects over time. Device objects
are sequentially requested with a series of device allocated objects created on each
device. The devices are then destroyed to verify that the device and the device allocated
objects are recycled over a very large number of iterations.`
  )
  .params(u => u.combine('adapterType', kAdapterTypes))
  .fn(async t => {
    const { adapterType } = t.params;
    const adapter = await getGPU().requestAdapter(kAdapterTypeOptions[adapterType]);
    assert(adapter !== null, 'Failed to get adapter.');

    // Since devices are being destroyed, we should be able to create many devices.
    const kNumDevices = 100;
    const kFunctions = [
      createDeviceAndBuffers,
      createDeviceAndComputeCommands,
      createDeviceAndRenderCommands,
    ];

    const deviceList = [];
    const objectLists = [];
    for (let i = 0; i < kNumDevices; ++i) {
      const { device, objects } = await kFunctions[i % kFunctions.length](adapter);
      t.expect(objects.length > 0, 'unable to allocate any objects');
      deviceList.push(device);
      objectLists.push(objects);
      device.destroy();
    }
  });

g.test('continuous,no_destroy')
  .desc(
    `Tests allocation and implicit GC of many GPUDevice objects over time. Objects are
sequentially requested and dropped for GC over a very large number of iterations. Note
that without destroy, we do not create device allocated objects because that will
implicitly keep the device in scope.`
  )
  .params(u => u.combine('adapterType', kAdapterTypes))
  .fn(async t => {
    const { adapterType } = t.params;
    const adapter = await getGPU().requestAdapter(kAdapterTypeOptions[adapterType]);
    assert(adapter !== null, 'Failed to get adapter.');

    const kNumDevices = 10_000;
    for (let i = 1; i <= kNumDevices; ++i) {
      await (async () => {
        t.expect((await adapter.requestDevice()) !== null, 'unexpected null device');
      })();
      if (i % 10 === 0) {
        // We need to occasionally wait for GC to clear out stale devices.
        await attemptGarbageCollection();
      }
    }
  });