summaryrefslogtreecommitdiffstats
path: root/dom/webgpu/tests/cts/checkout/src/webgpu/util/device_pool.ts
diff options
context:
space:
mode:
Diffstat (limited to 'dom/webgpu/tests/cts/checkout/src/webgpu/util/device_pool.ts')
-rw-r--r--dom/webgpu/tests/cts/checkout/src/webgpu/util/device_pool.ts391
1 files changed, 391 insertions, 0 deletions
diff --git a/dom/webgpu/tests/cts/checkout/src/webgpu/util/device_pool.ts b/dom/webgpu/tests/cts/checkout/src/webgpu/util/device_pool.ts
new file mode 100644
index 0000000000..fbcbade771
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/webgpu/util/device_pool.ts
@@ -0,0 +1,391 @@
+import { SkipTestCase } from '../../common/framework/fixture.js';
+import { attemptGarbageCollection } from '../../common/util/collect_garbage.js';
+import { getGPU } from '../../common/util/navigator_gpu.js';
+import {
+ assert,
+ raceWithRejectOnTimeout,
+ assertReject,
+ unreachable,
+} from '../../common/util/util.js';
+import { kLimitInfo, kLimits } from '../capability_info.js';
+
+export interface DeviceProvider {
+ readonly device: GPUDevice;
+ expectDeviceLost(reason: GPUDeviceLostReason): void;
+}
+
+class TestFailedButDeviceReusable extends Error {}
+class FeaturesNotSupported extends Error {}
+export class TestOOMedShouldAttemptGC extends Error {}
+
+export class DevicePool {
+ private holders: 'uninitialized' | 'failed' | DescriptorToHolderMap = 'uninitialized';
+
+ /** Acquire a device from the pool and begin the error scopes. */
+ async acquire(descriptor?: UncanonicalizedDeviceDescriptor): Promise<DeviceProvider> {
+ let errorMessage = '';
+ if (this.holders === 'uninitialized') {
+ this.holders = new DescriptorToHolderMap();
+ try {
+ await this.holders.getOrCreate(undefined);
+ } catch (ex) {
+ this.holders = 'failed';
+ if (ex instanceof Error) {
+ errorMessage = ` with ${ex.name} "${ex.message}"`;
+ }
+ }
+ }
+
+ assert(
+ this.holders !== 'failed',
+ `WebGPU device failed to initialize${errorMessage}; not retrying`
+ );
+
+ const holder = await this.holders.getOrCreate(descriptor);
+
+ assert(holder.state === 'free', 'Device was in use on DevicePool.acquire');
+ holder.state = 'acquired';
+ holder.beginTestScope();
+ return holder;
+ }
+
+ /**
+ * End the error scopes and check for errors.
+ * Then, if the device seems reusable, release it back into the pool. Otherwise, drop it.
+ */
+ async release(holder: DeviceProvider): Promise<void> {
+ assert(this.holders instanceof DescriptorToHolderMap, 'DevicePool got into a bad state');
+ assert(holder instanceof DeviceHolder, 'DeviceProvider should always be a DeviceHolder');
+
+ assert(holder.state === 'acquired', 'trying to release a device while already released');
+ try {
+ await holder.endTestScope();
+
+ // (Hopefully if the device was lost, it has been reported by the time endErrorScopes()
+ // has finished (or timed out). If not, it could cause a finite number of extra test
+ // failures following this one (but should recover eventually).)
+ assert(
+ holder.lostInfo === undefined,
+ `Device was unexpectedly lost. Reason: ${holder.lostInfo?.reason}, Message: ${holder.lostInfo?.message}`
+ );
+ } catch (ex) {
+ // Any error that isn't explicitly TestFailedButDeviceReusable forces a new device to be
+ // created for the next test.
+ if (!(ex instanceof TestFailedButDeviceReusable)) {
+ this.holders.delete(holder);
+ if ('destroy' in holder.device) {
+ holder.device.destroy();
+ }
+
+ // Release the (hopefully only) ref to the GPUDevice.
+ holder.releaseGPUDevice();
+
+ // Try to clean up, in case there are stray GPU resources in need of collection.
+ if (ex instanceof TestOOMedShouldAttemptGC) {
+ await attemptGarbageCollection();
+ }
+ }
+ // In the try block, we may throw an error if the device is lost in order to force device
+ // reinitialization, however, if the device lost was expected we want to suppress the error
+ // The device lost is expected when `holder.expectedLostReason` is equal to
+ // `holder.lostInfo.reason`.
+ const expectedDeviceLost =
+ holder.expectedLostReason !== undefined &&
+ holder.lostInfo !== undefined &&
+ holder.expectedLostReason === holder.lostInfo.reason;
+ if (!expectedDeviceLost) {
+ throw ex;
+ }
+ } finally {
+ // Mark the holder as free so the device can be reused (if it's still in this.devices).
+ holder.state = 'free';
+ }
+ }
+}
+
+/**
+ * Map from GPUDeviceDescriptor to DeviceHolder.
+ */
+class DescriptorToHolderMap {
+ /** Map keys that are known to be unsupported and can be rejected quickly. */
+ private unsupported: Set<string> = new Set();
+ private holders: Map<string, DeviceHolder> = new Map();
+
+ /** Deletes an item from the map by DeviceHolder value. */
+ delete(holder: DeviceHolder): void {
+ for (const [k, v] of this.holders) {
+ if (v === holder) {
+ this.holders.delete(k);
+ return;
+ }
+ }
+ unreachable("internal error: couldn't find DeviceHolder to delete");
+ }
+
+ /**
+ * Gets a DeviceHolder from the map if it exists; otherwise, calls create() to create one,
+ * inserts it, and returns it.
+ *
+ * If an `uncanonicalizedDescriptor` is provided, it is canonicalized and used as the map key.
+ * If one is not provided, the map key is `""` (empty string).
+ *
+ * Throws SkipTestCase if devices with this descriptor are unsupported.
+ */
+ async getOrCreate(
+ uncanonicalizedDescriptor: UncanonicalizedDeviceDescriptor | undefined
+ ): Promise<DeviceHolder> {
+ const [descriptor, key] = canonicalizeDescriptor(uncanonicalizedDescriptor);
+ // Quick-reject descriptors that are known to be unsupported already.
+ if (this.unsupported.has(key)) {
+ throw new SkipTestCase(
+ `GPUDeviceDescriptor previously failed: ${JSON.stringify(descriptor)}`
+ );
+ }
+
+ // Search for an existing device with the same descriptor.
+ {
+ const value = this.holders.get(key);
+ if (value) {
+ // Move it to the end of the Map (most-recently-used).
+ this.holders.delete(key);
+ this.holders.set(key, value);
+ return value;
+ }
+ }
+
+ // No existing item was found; add a new one.
+ let value;
+ try {
+ value = await DeviceHolder.create(descriptor);
+ } catch (ex) {
+ if (ex instanceof FeaturesNotSupported) {
+ this.unsupported.add(key);
+ throw new SkipTestCase(
+ `GPUDeviceDescriptor not supported: ${JSON.stringify(descriptor)}\n${ex?.message ?? ''}`
+ );
+ }
+
+ throw ex;
+ }
+ this.insertAndCleanUp(key, value);
+ return value;
+ }
+
+ /** Insert an entry, then remove the least-recently-used items if there are too many. */
+ private insertAndCleanUp(key: string, value: DeviceHolder) {
+ this.holders.set(key, value);
+
+ const kMaxEntries = 5;
+ if (this.holders.size > kMaxEntries) {
+ // Delete the first (least recently used) item in the set.
+ for (const [key] of this.holders) {
+ this.holders.delete(key);
+ return;
+ }
+ }
+ }
+}
+
+export type UncanonicalizedDeviceDescriptor = {
+ requiredFeatures?: Iterable<GPUFeatureName>;
+ requiredLimits?: Record<string, GPUSize32>;
+ /** @deprecated this field cannot be used */
+ nonGuaranteedFeatures?: undefined;
+ /** @deprecated this field cannot be used */
+ nonGuaranteedLimits?: undefined;
+ /** @deprecated this field cannot be used */
+ extensions?: undefined;
+ /** @deprecated this field cannot be used */
+ features?: undefined;
+};
+type CanonicalDeviceDescriptor = Omit<
+ Required<GPUDeviceDescriptor>,
+ 'label' | 'nonGuaranteedFeatures' | 'nonGuaranteedLimits'
+>;
+/**
+ * Make a stringified map-key from a GPUDeviceDescriptor.
+ * Tries to make sure all defaults are resolved, first - but it's okay if some are missed
+ * (it just means some GPUDevice objects won't get deduplicated).
+ *
+ * This does **not** canonicalize `undefined` (the "default" descriptor) into a fully-qualified
+ * GPUDeviceDescriptor. This is just because `undefined` is a common case and we want to use it
+ * as a sanity check that WebGPU is working.
+ */
+function canonicalizeDescriptor(
+ desc: UncanonicalizedDeviceDescriptor | undefined
+): [CanonicalDeviceDescriptor | undefined, string] {
+ if (desc === undefined) {
+ return [undefined, ''];
+ }
+
+ const featuresCanonicalized = desc.requiredFeatures
+ ? Array.from(new Set(desc.requiredFeatures)).sort()
+ : [];
+
+ /** Canonicalized version of the requested limits: in canonical order, with only values which are
+ * specified _and_ non-default. */
+ const limitsCanonicalized: Record<string, number> = {};
+ if (desc.requiredLimits) {
+ for (const limit of kLimits) {
+ const requestedValue = desc.requiredLimits[limit];
+ const defaultValue = kLimitInfo[limit].default;
+ // Skip adding a limit to limitsCanonicalized if it is the same as the default.
+ if (requestedValue !== undefined && requestedValue !== defaultValue) {
+ limitsCanonicalized[limit] = requestedValue;
+ }
+ }
+ }
+
+ // Type ensures every field is carried through.
+ const descriptorCanonicalized: CanonicalDeviceDescriptor = {
+ requiredFeatures: featuresCanonicalized,
+ requiredLimits: limitsCanonicalized,
+ defaultQueue: {},
+ };
+ return [descriptorCanonicalized, JSON.stringify(descriptorCanonicalized)];
+}
+
+function supportsFeature(
+ adapter: GPUAdapter,
+ descriptor: CanonicalDeviceDescriptor | undefined
+): boolean {
+ if (descriptor === undefined) {
+ return true;
+ }
+
+ for (const feature of descriptor.requiredFeatures) {
+ if (!adapter.features.has(feature)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * DeviceHolder has three states:
+ * - 'free': Free to be used for a new test.
+ * - 'acquired': In use by a running test.
+ */
+type DeviceHolderState = 'free' | 'acquired';
+
+/**
+ * Holds a GPUDevice and tracks its state (free/acquired) and handles device loss.
+ */
+class DeviceHolder implements DeviceProvider {
+ /** The device. Will be cleared during cleanup if there were unexpected errors. */
+ private _device: GPUDevice | undefined;
+ /** Whether the device is in use by a test or not. */
+ state: DeviceHolderState = 'free';
+ /** initially undefined; becomes set when the device is lost */
+ lostInfo?: GPUDeviceLostInfo;
+ /** Set if the device is expected to be lost. */
+ expectedLostReason?: GPUDeviceLostReason;
+
+ // Gets a device and creates a DeviceHolder.
+ // If the device is lost, DeviceHolder.lost gets set.
+ static async create(descriptor: CanonicalDeviceDescriptor | undefined): Promise<DeviceHolder> {
+ const gpu = getGPU();
+ const adapter = await gpu.requestAdapter();
+ assert(adapter !== null, 'requestAdapter returned null');
+ if (!supportsFeature(adapter, descriptor)) {
+ throw new FeaturesNotSupported('One or more features are not supported');
+ }
+ const device = await adapter.requestDevice(descriptor);
+ assert(device !== null, 'requestDevice returned null');
+
+ return new DeviceHolder(device);
+ }
+
+ private constructor(device: GPUDevice) {
+ this._device = device;
+ void this._device.lost.then(ev => {
+ this.lostInfo = ev;
+ });
+ }
+
+ get device() {
+ assert(this._device !== undefined);
+ return this._device;
+ }
+
+ /** Push error scopes that surround test execution. */
+ beginTestScope(): void {
+ assert(this.state === 'acquired');
+ this.device.pushErrorScope('out-of-memory');
+ this.device.pushErrorScope('validation');
+ }
+
+ /** Mark the DeviceHolder as expecting a device loss when the test scope ends. */
+ expectDeviceLost(reason: GPUDeviceLostReason) {
+ assert(this.state === 'acquired');
+ this.expectedLostReason = reason;
+ }
+
+ /**
+ * Attempt to end test scopes: Check that there are no extra error scopes, and that no
+ * otherwise-uncaptured errors occurred during the test. Time out if it takes too long.
+ */
+ endTestScope(): Promise<void> {
+ assert(this.state === 'acquired');
+ const kTimeout = 5000;
+
+ // Time out if attemptEndTestScope (popErrorScope or onSubmittedWorkDone) never completes. If
+ // this rejects, the device won't be reused, so it's OK that popErrorScope calls may not have
+ // finished.
+ //
+ // This could happen due to a browser bug - e.g.,
+ // as of this writing, on Chrome GPU process crash, popErrorScope just hangs.
+ return raceWithRejectOnTimeout(this.attemptEndTestScope(), kTimeout, 'endTestScope timed out');
+ }
+
+ private async attemptEndTestScope(): Promise<void> {
+ let gpuValidationError: GPUError | null;
+ let gpuOutOfMemoryError: GPUError | null;
+
+ // Submit to the queue to attempt to force a GPU flush.
+ this.device.queue.submit([]);
+
+ try {
+ // May reject if the device was lost.
+ [gpuValidationError, gpuOutOfMemoryError] = await Promise.all([
+ this.device.popErrorScope(),
+ this.device.popErrorScope(),
+ ]);
+ } catch (ex) {
+ assert(this.lostInfo !== undefined, 'popErrorScope failed; did beginTestScope get missed?');
+ throw ex;
+ }
+
+ // Attempt to wait for the queue to be idle.
+ if (this.device.queue.onSubmittedWorkDone) {
+ await this.device.queue.onSubmittedWorkDone();
+ }
+
+ await assertReject(
+ this.device.popErrorScope(),
+ 'There was an extra error scope on the stack after a test'
+ );
+
+ if (gpuValidationError !== null) {
+ assert(gpuValidationError instanceof GPUValidationError);
+ // Allow the device to be reused.
+ throw new TestFailedButDeviceReusable(
+ `Unexpected validation error occurred: ${gpuValidationError.message}`
+ );
+ }
+ if (gpuOutOfMemoryError !== null) {
+ assert(gpuOutOfMemoryError instanceof GPUOutOfMemoryError);
+ // Don't allow the device to be reused; unexpected OOM could break the device.
+ throw new TestOOMedShouldAttemptGC('Unexpected out-of-memory error occurred');
+ }
+ }
+
+ /**
+ * Release the ref to the GPUDevice. This should be the only ref held by the DevicePool or
+ * GPUTest, so in theory it can get garbage collected.
+ */
+ releaseGPUDevice(): void {
+ this._device = undefined;
+ }
+}