1 files changed, 1049 insertions, 0 deletions
diff --git a/dom/webgpu/tests/cts/checkout/src/webgpu/shader/execution/memory_model/memory_model_setup.ts b/dom/webgpu/tests/cts/checkout/src/webgpu/shader/execution/memory_model/memory_model_setup.ts
new file mode 100644
index 0000000000..c26f8fde29
--- /dev/null
+++ b/dom/webgpu/tests/cts/checkout/src/webgpu/shader/execution/memory_model/memory_model_setup.ts
@@ -0,0 +1,1049 @@
+import { GPUTest } from '../../../gpu_test';
+import { checkElementsPassPredicate } from '../../../util/check_contents.js';
+
+/* All buffer sizes are counted in units of 4-byte words. */
+
+/* Parameter values are set heuristically, typically by a time-intensive search. */
+export type MemoryModelTestParams = {
+  /* Number of invocations per workgroup. The workgroups are 1-dimensional. */
+  workgroupSize: number;
+  /** The number of workgroups to assign to running the test. */
+  testingWorkgroups: number;
+  /**
+   * Run no more than this many workgroups. Must be >= the number of testing workgroups. Non-testing workgroups are used
+   * to stress other memory locations.
+   */
+  maxWorkgroups: number;
+  /** The percentage of iterations to shuffle the workgroup ids. */
+  shufflePct: number;
+  /** The percentage of iterations to run the bounded spin-loop barrier. */
+  barrierPct: number;
+  /** The percentage of iterations to run memory stress using non-testing workgroups. */
+  memStressPct: number;
+  /** The number of iterations to run the memory stress pattern. */
+  memStressIterations: number;
+  /** The percentage of iterations the first instruction in the stress pattern should be a store. */
+  memStressStoreFirstPct: number;
+  /** The percentage of iterations the second instruction in the stress pattern should be a store. */
+  memStressStoreSecondPct: number;
+  /** The percentage of iterations for testing threads to run stress before running the test. */
+  preStressPct: number;
+  /** Same as for memStressIterations. */
+  preStressIterations: number;
+  /** The percentage of iterations the first instruction in the pre-stress pattern should be a store. */
+  preStressStoreFirstPct: number;
+  /** The percentage of iterations the second instruction in the pre-stress pattern should be a store. */
+  preStressStoreSecondPct: number;
+  /** The size of the scratch memory region, used for stressing threads. */
+  scratchMemorySize: number;
+  /** The size of each block of memory stressing threads access. */
+  stressLineSize: number;
+  /** The number of blocks of memory to assign stressing threads to. */
+  stressTargetLines: number;
+  /** How non-testing threads are assigned to stressing locations. 100 means all iterations use a round robin approach, 0 means all use a chunking approach. */
+  stressStrategyBalancePct: number;
+  /** Used to permute thread ids within a workgroup, so more random pairings are created between threads coordinating on a test. */
+  permuteFirst: number;
+  /** Used to create distance between memory locations used in a test. Set this to 1 for memory that should be aliased. */
+  permuteSecond: number;
+  /** The distance (in number of 4 byte intervals) between any two memory locations used for testing. */
+  memStride: number;
+  /** For tests that access one memory location, but use dynamic addresses to avoid compiler optimization, aliased memory should be set to true. */
+  aliasedMemory: boolean;
+  /** The number of possible behaviors that a test can have. */
+  numBehaviors: number;
+};
+
+/** The number of memory locations accessed by a test. Currently, only tests with up to 2 memory locations are supported. */
+const numMemLocations = 2;
+
+/** The number of read outputs per test that need to be analyzed in the result aggregation shader. Currently, only tests with up to 2 read outputs are supported. */
+const numReadOutputs = 2;
+
+/** Represents a device buffer and a utility buffer for resetting memory and copying parameters. */
+type BufferWithSource = {
+  /** Buffer used by shader code. */
+  deviceBuf: GPUBuffer;
+  /** Buffer populated from the host size, data is copied to device buffer for use by shader. */
+  srcBuf: GPUBuffer;
+  /** Size in bytes of the buffer. */
+  size: number;
+};
+
+/** Specifies the buffers used during a memory model test. */
+type MemoryModelBuffers = {
+  /** This is the memory region that testing threads read from and write to. */
+  testLocations: BufferWithSource;
+  /** This buffer collects the results of reads for analysis in the result aggregation shader. */
+  readResults: BufferWithSource;
+  /** This buffer is the aggregated results of every testing thread, and is used to check for test success/failure. */
+  testResults: BufferWithSource;
+  /** This buffer stores the shuffled workgroup ids for use during testing. Read-only in the shader. */
+  shuffledWorkgroups: BufferWithSource;
+  /** This is the bounded spin-loop barrier, used to temporally align testing threads. */
+  barrier: BufferWithSource;
+  /** Memory region for stressing threads to read to and write from. */
+  scratchpad: BufferWithSource;
+  /** The memory locations in the scratch region that stressing threads access. */
+  scratchMemoryLocations: BufferWithSource;
+  /** Parameters that are used by the shader to calculate memory locations and perform stress. */
+  stressParams: BufferWithSource;
+};
+
+/** The number of stress params to add to the stress params buffer. */
+const numStressParams = 12;
+const barrierParamIndex = 0;
+const memStressIndex = 1;
+const memStressIterationsIndex = 2;
+const memStressPatternIndex = 3;
+const preStressIndex = 4;
+const preStressIterationsIndex = 5;
+const preStressPatternIndex = 6;
+const permuteFirstIndex = 7;
+const permuteSecondIndex = 8;
+const testingWorkgroupsIndex = 9;
+const memStrideIndex = 10;
+const memLocationOffsetIndex = 11;
+
+/**
+ * All memory used in these consists of a four byte word, so this value is used to correctly set the byte size of buffers that
+ * are read to/written from during tests and for storing test results.
+ */
+const bytesPerWord = 4;
+
+/**
+ * Implements setup code necessary to run a memory model test. A test consists of two parts:
+ *  1.) A test shader that runs a specified memory model litmus test and attempts to reveal a weak (disallowed) behavior.
+ *      At a high level, a test shader consists of a set of testing workgroups where every invocation executes the litmus test
+ *      on a set of test locations, and a set of stressing workgroups where every invocation accesses a specified memory location
+ *      in a random pattern.
+ *  2.) A result shader that takes the output of the test shader, which consists of the memory locations accessed during the test
+ *      and the results of any reads made during the test, and aggregate the results based on the possible behaviors of the test.
+ */
+export class MemoryModelTester {
+  protected test: GPUTest;
+  protected params: MemoryModelTestParams;
+  protected buffers: MemoryModelBuffers;
+  protected testPipeline: GPUComputePipeline;
+  protected testBindGroup: GPUBindGroup;
+  protected resultPipeline: GPUComputePipeline;
+  protected resultBindGroup: GPUBindGroup;
+
+  /** Sets up a memory model test by initializing buffers and pipeline layouts. */
+  constructor(t: GPUTest, params: MemoryModelTestParams, testShader: string, resultShader: string) {
+    this.test = t;
+    this.params = params;
+
+    // set up buffers
+    const testingThreads = this.params.workgroupSize * this.params.testingWorkgroups;
+    const testLocationsSize =
+      testingThreads * numMemLocations * this.params.memStride * bytesPerWord;
+    const testLocationsBuffer: BufferWithSource = {
+      deviceBuf: this.test.device.createBuffer({
+        size: testLocationsSize,
+        usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
+      }),
+      srcBuf: this.test.makeBufferWithContents(
+        new Uint32Array(testLocationsSize).fill(0),
+        GPUBufferUsage.COPY_SRC
+      ),
+      size: testLocationsSize,
+    };
+
+    const readResultsSize = testingThreads * numReadOutputs * bytesPerWord;
+    const readResultsBuffer: BufferWithSource = {
+      deviceBuf: this.test.device.createBuffer({
+        size: readResultsSize,
+        usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
+      }),
+      srcBuf: this.test.makeBufferWithContents(
+        new Uint32Array(readResultsSize).fill(0),
+        GPUBufferUsage.COPY_SRC
+      ),
+      size: readResultsSize,
+    };
+
+    const testResultsSize = this.params.numBehaviors * bytesPerWord;
+    const testResultsBuffer: BufferWithSource = {
+      deviceBuf: this.test.device.createBuffer({
+        size: testResultsSize,
+        usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
+      }),
+      srcBuf: this.test.makeBufferWithContents(
+        new Uint32Array(testResultsSize).fill(0),
+        GPUBufferUsage.COPY_SRC
+      ),
+      size: testResultsSize,
+    };
+
+    const shuffledWorkgroupsSize = this.params.maxWorkgroups * bytesPerWord;
+    const shuffledWorkgroupsBuffer: BufferWithSource = {
+      deviceBuf: this.test.device.createBuffer({
+        size: shuffledWorkgroupsSize,
+        usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
+      }),
+      srcBuf: this.test.device.createBuffer({
+        size: shuffledWorkgroupsSize,
+        usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.MAP_WRITE,
+      }),
+      size: shuffledWorkgroupsSize,
+    };
+
+    const barrierSize = bytesPerWord;
+    const barrierBuffer: BufferWithSource = {
+      deviceBuf: this.test.device.createBuffer({
+        size: barrierSize,
+        usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
+      }),
+      srcBuf: this.test.makeBufferWithContents(
+        new Uint32Array(barrierSize).fill(0),
+        GPUBufferUsage.COPY_SRC
+      ),
+      size: barrierSize,
+    };
+
+    const scratchpadSize = this.params.scratchMemorySize * bytesPerWord;
+    const scratchpadBuffer: BufferWithSource = {
+      deviceBuf: this.test.device.createBuffer({
+        size: scratchpadSize,
+        usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
+      }),
+      srcBuf: this.test.makeBufferWithContents(
+        new Uint32Array(scratchpadSize).fill(0),
+        GPUBufferUsage.COPY_SRC
+      ),
+      size: scratchpadSize,
+    };
+
+    const scratchMemoryLocationsSize = this.params.maxWorkgroups * bytesPerWord;
+    const scratchMemoryLocationsBuffer: BufferWithSource = {
+      deviceBuf: this.test.device.createBuffer({
+        size: scratchMemoryLocationsSize,
+        usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
+      }),
+      srcBuf: this.test.device.createBuffer({
+        size: scratchMemoryLocationsSize,
+        usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.MAP_WRITE,
+      }),
+      size: scratchMemoryLocationsSize,
+    };
+
+    const stressParamsSize = numStressParams * bytesPerWord;
+    const stressParamsBuffer: BufferWithSource = {
+      deviceBuf: this.test.device.createBuffer({
+        size: stressParamsSize,
+        usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM,
+      }),
+      srcBuf: this.test.device.createBuffer({
+        size: stressParamsSize,
+        usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.MAP_WRITE,
+      }),
+      size: stressParamsSize,
+    };
+
+    this.buffers = {
+      testLocations: testLocationsBuffer,
+      readResults: readResultsBuffer,
+      testResults: testResultsBuffer,
+      shuffledWorkgroups: shuffledWorkgroupsBuffer,
+      barrier: barrierBuffer,
+      scratchpad: scratchpadBuffer,
+      scratchMemoryLocations: scratchMemoryLocationsBuffer,
+      stressParams: stressParamsBuffer,
+    };
+
+    // set up pipeline layouts
+    const testLayout = this.test.device.createBindGroupLayout({
+      entries: [
+        { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
+        { binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
+        { binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'read-only-storage' } },
+        { binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
+        { binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
+        { binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
+        { binding: 6, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'uniform' } },
+      ],
+    });
+    this.testPipeline = this.test.device.createComputePipeline({
+      layout: this.test.device.createPipelineLayout({
+        bindGroupLayouts: [testLayout],
+      }),
+      compute: {
+        module: this.test.device.createShaderModule({
+          code: testShader,
+        }),
+        entryPoint: 'main',
+      },
+    });
+    this.testBindGroup = this.test.device.createBindGroup({
+      entries: [
+        { binding: 0, resource: { buffer: this.buffers.testLocations.deviceBuf } },
+        { binding: 1, resource: { buffer: this.buffers.readResults.deviceBuf } },
+        { binding: 2, resource: { buffer: this.buffers.shuffledWorkgroups.deviceBuf } },
+        { binding: 3, resource: { buffer: this.buffers.barrier.deviceBuf } },
+        { binding: 4, resource: { buffer: this.buffers.scratchpad.deviceBuf } },
+        { binding: 5, resource: { buffer: this.buffers.scratchMemoryLocations.deviceBuf } },
+        { binding: 6, resource: { buffer: this.buffers.stressParams.deviceBuf } },
+      ],
+      layout: testLayout,
+    });
+
+    const resultLayout = this.test.device.createBindGroupLayout({
+      entries: [
+        { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
+        { binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
+        { binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage' } },
+        { binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'uniform' } },
+      ],
+    });
+    this.resultPipeline = this.test.device.createComputePipeline({
+      layout: this.test.device.createPipelineLayout({
+        bindGroupLayouts: [resultLayout],
+      }),
+      compute: {
+        module: this.test.device.createShaderModule({
+          code: resultShader,
+        }),
+        entryPoint: 'main',
+      },
+    });
+    this.resultBindGroup = this.test.device.createBindGroup({
+      entries: [
+        { binding: 0, resource: { buffer: this.buffers.testLocations.deviceBuf } },
+        { binding: 1, resource: { buffer: this.buffers.readResults.deviceBuf } },
+        { binding: 2, resource: { buffer: this.buffers.testResults.deviceBuf } },
+        { binding: 3, resource: { buffer: this.buffers.stressParams.deviceBuf } },
+      ],
+      layout: resultLayout,
+    });
+  }
+
+  /**
+   * Run the test for the specified number of iterations. Checks the testResults buffer on the weakIndex; if
+   * this value is not 0 then the test has failed. The number of iterations is chosen per test so that the
+   * full set of tests meets some time budget while still being reasonably effective at uncovering issues.
+   * Currently, we aim for each test to complete in under one second.
+   */
+  async run(iterations: number, weakIndex: number): Promise<void> {
+    for (let i = 0; i < iterations; i++) {
+      const numWorkgroups = this.getRandomInRange(
+        this.params.testingWorkgroups,
+        this.params.maxWorkgroups
+      );
+      await this.setShuffledWorkgroups(numWorkgroups);
+      await this.setScratchLocations(numWorkgroups);
+      await this.setStressParams();
+      const encoder = this.test.device.createCommandEncoder();
+      this.copyBufferToBuffer(encoder, this.buffers.testLocations);
+      this.copyBufferToBuffer(encoder, this.buffers.readResults);
+      this.copyBufferToBuffer(encoder, this.buffers.testResults);
+      this.copyBufferToBuffer(encoder, this.buffers.barrier);
+      this.copyBufferToBuffer(encoder, this.buffers.shuffledWorkgroups);
+      this.copyBufferToBuffer(encoder, this.buffers.scratchpad);
+      this.copyBufferToBuffer(encoder, this.buffers.scratchMemoryLocations);
+      this.copyBufferToBuffer(encoder, this.buffers.stressParams);
+
+      const testPass = encoder.beginComputePass();
+      testPass.setPipeline(this.testPipeline);
+      testPass.setBindGroup(0, this.testBindGroup);
+      testPass.dispatchWorkgroups(numWorkgroups);
+      testPass.end();
+
+      const resultPass = encoder.beginComputePass();
+      resultPass.setPipeline(this.resultPipeline);
+      resultPass.setBindGroup(0, this.resultBindGroup);
+      resultPass.dispatchWorkgroups(this.params.testingWorkgroups);
+      resultPass.end();
+
+      this.test.device.queue.submit([encoder.finish()]);
+      this.test.expectGPUBufferValuesPassCheck(
+        this.buffers.testResults.deviceBuf,
+        this.checkWeakIndex(weakIndex),
+        {
+          type: Uint32Array,
+          typedLength: this.params.numBehaviors,
+        }
+      );
+    }
+  }
+
+  /** Returns a function that checks whether the test passes, given a weak index and the test results buffer. */
+  protected checkWeakIndex(weakIndex: number): (a: Uint32Array) => Error | undefined {
+    const checkResult = this.checkResult(weakIndex);
+    const resultPrinter = this.resultPrinter(weakIndex);
+    return function (a: Uint32Array): Error | undefined {
+      return checkElementsPassPredicate(a, checkResult, {
+        predicatePrinter: [{ leftHeader: 'expected ==', getValueForCell: resultPrinter }],
+      });
+    };
+  }
+
+  /**
+   * Returns a function that checks whether the specified weak index's value is not equal to 0.
+   * If the weak index's value is not 0, it means the test has observed a behavior disallowed by the memory model and
+   * is considered a test failure.
+   */
+  protected checkResult(weakIndex: number): (i: number, v: number) => boolean {
+    return function (i: number, v: number): boolean {
+      if (i === weakIndex && v > 0) {
+        return false;
+      }
+      return true;
+    };
+  }
+
+  /** Returns a printer function that visualizes the results of checking the test results. */
+  protected resultPrinter(weakIndex: number): (i: number) => string | number {
+    return function (i: number): string | number {
+      if (i === weakIndex) {
+        return 0;
+      } else {
+        return 'any value';
+      }
+    };
+  }
+
+  /** Utility method that simplifies copying source buffers to device buffers. */
+  protected copyBufferToBuffer(encoder: GPUCommandEncoder, buffer: BufferWithSource): void {
+    encoder.copyBufferToBuffer(buffer.srcBuf, 0, buffer.deviceBuf, 0, buffer.size);
+  }
+
+  /** Returns a random integer between 0 and the max. */
+  protected getRandomInt(max: number): number {
+    return Math.floor(Math.random() * max);
+  }
+
+  /** Returns a random number in between the min and max values. */
+  protected getRandomInRange(min: number, max: number): number {
+    if (min === max) {
+      return min;
+    } else {
+      const offset = this.getRandomInt(max - min);
+      return min + offset;
+    }
+  }
+
+  /** Returns a permuted array using a simple Fisher-Yates shuffle algorithm. */
+  protected shuffleArray(a: number[]): void {
+    for (let i = a.length - 1; i >= 0; i--) {
+      const toSwap = this.getRandomInt(i + 1);
+      const temp = a[toSwap];
+      a[toSwap] = a[i];
+      a[i] = temp;
+    }
+  }
+
+  /**
+   * Shuffles the order of workgroup ids, so that threads operating on the same memory location are not always in
+   * consecutive workgroups.
+   */
+  protected async setShuffledWorkgroups(numWorkgroups: number): Promise<void> {
+    await this.buffers.shuffledWorkgroups.srcBuf.mapAsync(GPUMapMode.WRITE);
+    const shuffledWorkgroupsBuffer = this.buffers.shuffledWorkgroups.srcBuf.getMappedRange();
+    const shuffledWorkgroupsArray = new Uint32Array(shuffledWorkgroupsBuffer);
+    for (let i = 0; i < numWorkgroups; i++) {
+      shuffledWorkgroupsArray[i] = i;
+    }
+    if (this.getRandomInt(100) < this.params.shufflePct) {
+      for (let i = numWorkgroups - 1; i > 0; i--) {
+        const x = this.getRandomInt(i + 1);
+        const temp = shuffledWorkgroupsArray[i];
+        shuffledWorkgroupsArray[i] = shuffledWorkgroupsArray[x];
+        shuffledWorkgroupsArray[x] = temp;
+      }
+    }
+    this.buffers.shuffledWorkgroups.srcBuf.unmap();
+  }
+
+  /** Sets the memory locations that stressing workgroups will access. Uses either a chunking or round robin assignment strategy. */
+  protected async setScratchLocations(numWorkgroups: number): Promise<void> {
+    await this.buffers.scratchMemoryLocations.srcBuf.mapAsync(GPUMapMode.WRITE);
+    const scratchLocationsArrayBuffer = this.buffers.scratchMemoryLocations.srcBuf.getMappedRange();
+    const scratchLocationsArray = new Uint32Array(scratchLocationsArrayBuffer);
+    const scratchNumRegions = this.params.scratchMemorySize / this.params.stressLineSize;
+    const scratchRegions = [...Array(scratchNumRegions).keys()];
+    this.shuffleArray(scratchRegions);
+    for (let i = 0; i < this.params.stressTargetLines; i++) {
+      const region = scratchRegions[i];
+      const locInRegion = this.getRandomInt(this.params.stressLineSize);
+      if (this.getRandomInt(100) < this.params.stressStrategyBalancePct) {
+        // In the round-robin case, the current scratch location is striped across all workgroups.
+        for (let j = i; j < numWorkgroups; j += this.params.stressTargetLines) {
+          scratchLocationsArray[j] = region * this.params.stressLineSize + locInRegion;
+        }
+      } else {
+        // In the chunking case, the current scratch location is assigned to a block of workgroups. The final scratch
+        // location may be assigned to more workgroups, if the number of scratch locations does not cleanly divide the
+        // number of workgroups.
+        const workgroupsPerLocation = numWorkgroups / this.params.stressTargetLines;
+        for (let j = 0; j < workgroupsPerLocation; j++) {
+          scratchLocationsArray[i * workgroupsPerLocation + j] =
+            region * this.params.stressLineSize + locInRegion;
+        }
+        if (
+          i === this.params.stressTargetLines - 1 &&
+          numWorkgroups % this.params.stressTargetLines !== 0
+        ) {
+          for (let j = 0; j < numWorkgroups % this.params.stressTargetLines; j++) {
+            scratchLocationsArray[numWorkgroups - j - 1] =
+              region * this.params.stressLineSize + locInRegion;
+          }
+        }
+      }
+    }
+    this.buffers.scratchMemoryLocations.srcBuf.unmap();
+  }
+
+  /** Sets the parameters that are used by the shader to calculate memory locations and perform stress. */
+  protected async setStressParams(): Promise<void> {
+    await this.buffers.stressParams.srcBuf.mapAsync(GPUMapMode.WRITE);
+    const stressParamsArrayBuffer = this.buffers.stressParams.srcBuf.getMappedRange();
+    const stressParamsArray = new Uint32Array(stressParamsArrayBuffer);
+    if (this.getRandomInt(100) < this.params.barrierPct) {
+      stressParamsArray[barrierParamIndex] = 1;
+    } else {
+      stressParamsArray[barrierParamIndex] = 0;
+    }
+    if (this.getRandomInt(100) < this.params.memStressPct) {
+      stressParamsArray[memStressIndex] = 1;
+    } else {
+      stressParamsArray[memStressIndex] = 0;
+    }
+    stressParamsArray[memStressIterationsIndex] = this.params.memStressIterations;
+    const memStressStoreFirst = this.getRandomInt(100) < this.params.memStressStoreFirstPct;
+    const memStressStoreSecond = this.getRandomInt(100) < this.params.memStressStoreSecondPct;
+    let memStressPattern;
+    if (memStressStoreFirst && memStressStoreSecond) {
+      memStressPattern = 0;
+    } else if (memStressStoreFirst && !memStressStoreSecond) {
+      memStressPattern = 1;
+    } else if (!memStressStoreFirst && memStressStoreSecond) {
+      memStressPattern = 2;
+    } else {
+      memStressPattern = 3;
+    }
+    stressParamsArray[memStressPatternIndex] = memStressPattern;
+    if (this.getRandomInt(100) < this.params.preStressPct) {
+      stressParamsArray[preStressIndex] = 1;
+    } else {
+      stressParamsArray[preStressIndex] = 0;
+    }
+    stressParamsArray[preStressIterationsIndex] = this.params.preStressIterations;
+    const preStressStoreFirst = this.getRandomInt(100) < this.params.preStressStoreFirstPct;
+    const preStressStoreSecond = this.getRandomInt(100) < this.params.preStressStoreSecondPct;
+    let preStressPattern;
+    if (preStressStoreFirst && preStressStoreSecond) {
+      preStressPattern = 0;
+    } else if (preStressStoreFirst && !preStressStoreSecond) {
+      preStressPattern = 1;
+    } else if (!preStressStoreFirst && preStressStoreSecond) {
+      preStressPattern = 2;
+    } else {
+      preStressPattern = 3;
+    }
+    stressParamsArray[preStressPatternIndex] = preStressPattern;
+    stressParamsArray[permuteFirstIndex] = this.params.permuteFirst;
+    stressParamsArray[permuteSecondIndex] = this.params.permuteSecond;
+    stressParamsArray[testingWorkgroupsIndex] = this.params.testingWorkgroups;
+    stressParamsArray[memStrideIndex] = this.params.memStride;
+    if (this.params.aliasedMemory) {
+      stressParamsArray[memLocationOffsetIndex] = 0;
+    } else {
+      stressParamsArray[memLocationOffsetIndex] = this.params.memStride;
+    }
+    this.buffers.stressParams.srcBuf.unmap();
+  }
+}
+
+/** Defines common data structures used in memory model test shaders. */
+const shaderMemStructures = `
+  struct Memory {
+    value: array<u32>
+  };
+
+  struct AtomicMemory {
+    value: array<atomic<u32>>
+  };
+
+  struct ReadResult {
+    r0: atomic<u32>,
+    r1: atomic<u32>,
+  };
+
+  struct ReadResults {
+    value: array<ReadResult>
+  };
+
+  struct StressParamsMemory {
+    do_barrier: u32,
+    mem_stress: u32,
+    mem_stress_iterations: u32,
+    mem_stress_pattern: u32,
+    pre_stress: u32,
+    pre_stress_iterations: u32,
+    pre_stress_pattern: u32,
+    permute_first: u32,
+    permute_second: u32,
+    testing_workgroups: u32,
+    mem_stride: u32,
+    location_offset: u32,
+  };
+`;
+
+/**
+ * Structure to hold the counts of occurrences of the possible behaviors of a two-thread, four-instruction test.
+ * "seq0" means the first invocation's instructions are observed to have occurred before the second invocation's instructions.
+ * "seq1" means the second invocation's instructions are observed to have occurred before the first invocation's instructions.
+ * "interleaved" means there was an observation of some interleaving of instructions between the two invocations.
+ * "weak" means there was an observation of some ordering of instructions that is inconsistent with the WebGPU memory model.
+ */
+const fourBehaviorTestResultStructure = `
+  struct TestResults {
+    seq0: atomic<u32>,
+    seq1: atomic<u32>,
+    interleaved: atomic<u32>,
+    weak: atomic<u32>,
+  };
+`;
+
+/**
+ * Defines the possible behaviors of a two instruction test. Used to test the behavior of non-atomic memory with barriers and
+ * one-thread coherence tests.
+ * "seq" means that the expected, sequential behavior occurred.
+ * "weak" means that an unexpected, inconsistent behavior occurred.
+ */
+const twoBehaviorTestResultStructure = `
+  struct TestResults {
+    seq: atomic<u32>,
+    weak: atomic<u32>,
+  };
+`;
+
+/** Common bindings used in the test shader phase of a test. */
+const commonTestShaderBindings = `
+  @group(0) @binding(1) var<storage, read_write> results : ReadResults;
+  @group(0) @binding(2) var<storage, read> shuffled_workgroups : Memory;
+  @group(0) @binding(3) var<storage, read_write> barrier : AtomicMemory;
+  @group(0) @binding(4) var<storage, read_write> scratchpad : Memory;
+  @group(0) @binding(5) var<storage, read_write> scratch_locations : Memory;
+  @group(0) @binding(6) var<uniform> stress_params : StressParamsMemory;
+`;
+
+/** The combined bindings for a test on atomic memory. */
+const atomicTestShaderBindings = [
+  `
+  @group(0) @binding(0) var<storage, read_write> test_locations : AtomicMemory;
+`,
+  commonTestShaderBindings,
+].join('\n');
+
+/** The combined bindings for a test on non-atomic memory. */
+const nonAtomicTestShaderBindings = [
+  `
+  @group(0) @binding(0) var<storage, read_write> test_locations : Memory;
+`,
+  commonTestShaderBindings,
+].join('\n');
+
+/** Bindings used in the result aggregation phase of the test. */
+const resultShaderBindings = `
+  @group(0) @binding(0) var<storage, read_write> test_locations : AtomicMemory;
+  @group(0) @binding(1) var<storage, read_write> read_results : ReadResults;
+  @group(0) @binding(2) var<storage, read_write> test_results : TestResults;
+  @group(0) @binding(3) var<uniform> stress_params : StressParamsMemory;
+`;
+
+/**
+ * For tests that operate on workgroup memory, include this definition. 3584 memory locations is
+ * large enough to accommodate the maximum memory size needed per workgroup for testing, which is
+ * 256 invocations per workgroup x 2 memory locations x 7 (memStride, or max stride between successive memory locations).
+ * Should change to a pipeline overridable constant when possible.
+ */
+const atomicWorkgroupMemory = `
+  var<workgroup> wg_test_locations: array<atomic<u32>, 3584>;
+`;
+
+/**
+ * For tests that operate on non-atomic workgroup memory, include this definition. 3584 memory locations
+ * is large enough to accommodate the maximum memory size needed per workgroup for testing.
+ */
+const nonAtomicWorkgroupMemory = `
+  var<workgroup> wg_test_locations: array<u32, 3584>;
+`;
+
+/**
+ * Functions used to calculate memory locations for each invocation, for both testing and result aggregation.
+ * The permute function ensures a random permutation based on multiplying and modding by coprime numbers. The stripe
+ * workgroup function ensures that invocations coordinating on a test are spread out across different workgroups.
+ */
+const memoryLocationFunctions = `
+  fn permute_id(id: u32, factor: u32, mask: u32) -> u32 {
+    return (id * factor) % mask;
+  }
+
+  fn stripe_workgroup(workgroup_id: u32, local_id: u32) -> u32 {
+    return (workgroup_id + 1u + local_id % (stress_params.testing_workgroups - 1u)) % stress_params.testing_workgroups;
+  }
+`;
+
+/** Functions that help add stress to the test. */
+const testShaderFunctions = `
+  //Force the invocations in the workgroup to wait for each other, but without the general memory ordering
+  // effects of a control barrier. The barrier spins until either all invocations have incremented the atomic
+  // variable or 1024 loops have occurred. 1024 was chosen because it gives more time for invocations to enter
+  // the barrier but does not overly reduce testing throughput.
+  fn spin(limit: u32) {
+    var i : u32 = 0u;
+    var bar_val : u32 = atomicAdd(&barrier.value[0], 1u);
+    loop {
+      if (i == 1024u || bar_val >= limit) {
+        break;
+      }
+      bar_val = atomicAdd(&barrier.value[0], 0u);
+      i = i + 1u;
+    }
+  }
+
+  // Perform iterations of stress, depending on the specified pattern. Pattern 0 is store-store, pattern 1 is store-load,
+  // pattern 2 is load-store, and pattern 3 is load-load. The extra if condition (if tmpX > 100000u), is used to avoid
+  // the compiler optimizing out unused loads, where 100,000 is larger than the maximum number of stress iterations used
+  // in any test.
+  fn do_stress(iterations: u32, pattern: u32, workgroup_id: u32) {
+    let addr = scratch_locations.value[workgroup_id];
+    switch(pattern) {
+      case 0u: {
+        for(var i: u32 = 0u; i < iterations; i = i + 1u) {
+          scratchpad.value[addr] = i;
+          scratchpad.value[addr] = i + 1u;
+        }
+      }
+      case 1u: {
+        for(var i: u32 = 0u; i < iterations; i = i + 1u) {
+          scratchpad.value[addr] = i;
+          let tmp1: u32 = scratchpad.value[addr];
+          if (tmp1 > 100000u) {
+            scratchpad.value[addr] = i;
+            break;
+          }
+        }
+      }
+      case 2u: {
+        for(var i: u32 = 0u; i < iterations; i = i + 1u) {
+          let tmp1: u32 = scratchpad.value[addr];
+          if (tmp1 > 100000u) {
+            scratchpad.value[addr] = i;
+            break;
+          }
+          scratchpad.value[addr] = i;
+        }
+      }
+      case 3u: {
+        for(var i: u32 = 0u; i < iterations; i = i + 1u) {
+          let tmp1: u32 = scratchpad.value[addr];
+          if (tmp1 > 100000u) {
+            scratchpad.value[addr] = i;
+            break;
+          }
+          let tmp2: u32 = scratchpad.value[addr];
+          if (tmp2 > 100000u) {
+            scratchpad.value[addr] = i;
+            break;
+          }
+        }
+      }
+      default: {
+      }
+    }
+  }
+`;
+
+/**
+ * Entry point to both test and result shaders. One-dimensional workgroup size is hardcoded to 256, until
+ * pipeline overridable constants are supported.
+ */
+const shaderEntryPoint = `
+  // Change to pipeline overridable constant when possible.
+  const workgroupXSize = 256u;
+  @compute @workgroup_size(workgroupXSize) fn main(
+    @builtin(local_invocation_id) local_invocation_id : vec3<u32>,
+    @builtin(workgroup_id) workgroup_id : vec3<u32>) {
+`;
+
+/** All test shaders first calculate the shuffled workgroup. */
+const testShaderCommonHeader = `
+    let shuffled_workgroup = shuffled_workgroups.value[workgroup_id[0]];
+    if (shuffled_workgroup < stress_params.testing_workgroups) {
+`;
+
+/**
+ * All test shaders must calculate addresses for memory locations used in the test. Not all these addresses are
+ * used in every test, but no test uses more than these addresses.
+ */
+const testShaderCommonCalculations = `
+  let x_0 = id_0 * stress_params.mem_stride * 2u;
+  let y_0 = permute_id(id_0, stress_params.permute_second, total_ids) * stress_params.mem_stride * 2u + stress_params.location_offset;
+  let x_1 = id_1 * stress_params.mem_stride * 2u;
+  let y_1 = permute_id(id_1, stress_params.permute_second, total_ids) * stress_params.mem_stride * 2u + stress_params.location_offset;
+  if (stress_params.pre_stress == 1u) {
+    do_stress(stress_params.pre_stress_iterations, stress_params.pre_stress_pattern, shuffled_workgroup);
+  }
+`;
+
+/**
+ * An inter-workgroup test calculates two sets of memory locations that are guaranteed to be in separate workgroups.
+ * If the bounded spin-loop barrier is called, it attempts to wait for all invocations in all workgroups.
+ */
+const interWorkgroupTestShaderCode = [
+  `
+  let total_ids = workgroupXSize * stress_params.testing_workgroups;
+  let id_0 = shuffled_workgroup * workgroupXSize + local_invocation_id[0];
+  let new_workgroup = stripe_workgroup(shuffled_workgroup, local_invocation_id[0]);
+  let id_1 = new_workgroup * workgroupXSize + permute_id(local_invocation_id[0], stress_params.permute_first, workgroupXSize);
+`,
+  testShaderCommonCalculations,
+  `
+  if (stress_params.do_barrier == 1u) {
+    spin(workgroupXSize * stress_params.testing_workgroups);
+  }
+`,
+].join('\n');
+
+/**
+ * An intra-workgroup test calculates two set of memory locations that are guaranteed to be in the same workgroup.
+ * If the bounded spin-loop barrier is called, it attempts to wait for all invocations in the same workgroup.
+ */
+const intraWorkgroupTestShaderCode = [
+  `
+  let total_ids = workgroupXSize;
+  let id_0 = local_invocation_id[0];
+  let id_1 = permute_id(local_invocation_id[0], stress_params.permute_first, workgroupXSize);
+`,
+  testShaderCommonCalculations,
+  `
+  if (stress_params.do_barrier == 1u) {
+    spin(workgroupXSize);
+  }
+`,
+].join('\n');
+
+/**
+ * Tests that operate on storage memory and communicate with invocations in the same workgroup must offset their locations
+ * relative to global memory.
+ */
+const storageIntraWorkgroupTestShaderCode = `
+  let total_ids = workgroupXSize;
+  let id_0 = local_invocation_id[0];
+  let id_1 = permute_id(local_invocation_id[0], stress_params.permute_first, workgroupXSize);
+  let x_0 = (shuffled_workgroup * workgroupXSize + id_0) * stress_params.mem_stride * 2u;
+  let y_0 = (shuffled_workgroup * workgroupXSize + permute_id(id_0, stress_params.permute_second, total_ids)) * stress_params.mem_stride * 2u + stress_params.location_offset;
+  let x_1 = (shuffled_workgroup * workgroupXSize + id_1) * stress_params.mem_stride * 2u;
+  let y_1 = (shuffled_workgroup * workgroupXSize + permute_id(id_1, stress_params.permute_second, total_ids)) * stress_params.mem_stride * 2u + stress_params.location_offset;
+  if (stress_params.pre_stress == 1u) {
+    do_stress(stress_params.pre_stress_iterations, stress_params.pre_stress_pattern, shuffled_workgroup);
+  }
+  if (stress_params.do_barrier == 1u) {
+    spin(workgroupXSize);
+  }
+`;
+
+/** All test shaders may perform stress with non-testing threads. */
+const testShaderCommonFooter = `
+    } else if (stress_params.mem_stress == 1u) {
+      do_stress(stress_params.mem_stress_iterations, stress_params.mem_stress_pattern, shuffled_workgroup);
+    }
+  }
+`;
+
+/**
+ * All result shaders must calculate memory locations used in the test. Not all these locations are
+ * used in every result shader, but no result shader uses more than these locations.
+ */
+const resultShaderCommonCalculations = `
+  let id_0 = workgroup_id[0] * workgroupXSize + local_invocation_id[0];
+  let x_0 = id_0 * stress_params.mem_stride * 2u;
+  let mem_x_0 = atomicLoad(&test_locations.value[x_0]);
+  let r0 = atomicLoad(&read_results.value[id_0].r0);
+  let r1 = atomicLoad(&read_results.value[id_0].r1);
+`;
+
+/** Common result shader code for an inter-workgroup test. */
+const interWorkgroupResultShaderCode = [
+  resultShaderCommonCalculations,
+  `
+  let total_ids = workgroupXSize * stress_params.testing_workgroups;
+  let y_0 = permute_id(id_0, stress_params.permute_second, total_ids) * stress_params.mem_stride * 2u + stress_params.location_offset;
+  let mem_y_0 = atomicLoad(&test_locations.value[y_0]);
+`,
+].join('\n');
+
+/** Common result shader code for an intra-workgroup test. */
+const intraWorkgroupResultShaderCode = [
+  resultShaderCommonCalculations,
+  `
+  let total_ids = workgroupXSize;
+  let y_0 = (workgroup_id[0] * workgroupXSize + permute_id(local_invocation_id[0], stress_params.permute_second, total_ids)) * stress_params.mem_stride * 2u + stress_params.location_offset;
+  let mem_y_0 = atomicLoad(&test_locations.value[y_0]);
+`,
+].join('\n');
+
+/** Ending bracket for result shaders. */
+const resultShaderCommonFooter = `
+}
+`;
+
+/** The common shader code for test shaders that perform atomic storage class memory litmus tests. */
+const storageMemoryAtomicTestShaderCode = [
+  shaderMemStructures,
+  atomicTestShaderBindings,
+  memoryLocationFunctions,
+  testShaderFunctions,
+  shaderEntryPoint,
+  testShaderCommonHeader,
+].join('\n');
+
+/** The common shader code for test shaders that perform non-atomic storage class memory litmus tests. */
+const storageMemoryNonAtomicTestShaderCode = [
+  shaderMemStructures,
+  nonAtomicTestShaderBindings,
+  memoryLocationFunctions,
+  testShaderFunctions,
+  shaderEntryPoint,
+  testShaderCommonHeader,
+].join('\n');
+
+/** The common shader code for test shaders that perform atomic workgroup class memory litmus tests. */
+const workgroupMemoryAtomicTestShaderCode = [
+  shaderMemStructures,
+  atomicTestShaderBindings,
+  atomicWorkgroupMemory,
+  memoryLocationFunctions,
+  testShaderFunctions,
+  shaderEntryPoint,
+  testShaderCommonHeader,
+].join('\n');
+
+/** The common shader code for test shaders that perform non-atomic workgroup class memory litmus tests. */
+const workgroupMemoryNonAtomicTestShaderCode = [
+  shaderMemStructures,
+  nonAtomicTestShaderBindings,
+  nonAtomicWorkgroupMemory,
+  memoryLocationFunctions,
+  testShaderFunctions,
+  shaderEntryPoint,
+  testShaderCommonHeader,
+].join('\n');
+
+/** The common shader code for all result shaders. */
+const resultShaderCommonCode = [
+  shaderMemStructures,
+  resultShaderBindings,
+  memoryLocationFunctions,
+  shaderEntryPoint,
+].join('\n');
+
+/**
+ * Defines the types of possible memory a test is operating on. Used as part of the process of building shader code from
+ * its composite parts.
+ */
+export enum MemoryType {
+  /** Atomic memory in the storage address space. */
+  AtomicStorageClass = 'atomic_storage',
+  /** Non-atomic memory in the storage address space. */
+  NonAtomicStorageClass = 'non_atomic_storage',
+  /** Atomic memory in the workgroup address space. */
+  AtomicWorkgroupClass = 'atomic_workgroup',
+  /** Non-atomic memory in the workgroup address space. */
+  NonAtomicWorkgroupClass = 'non_atomic_workgroup',
+}
+
+/**
+ * Defines the relative positions of two invocations coordinating on a test. Used as part of the process of building shader
+ * code from its composite parts.
+ */
+export enum TestType {
+  /** A test consists of two invocations in different workgroups. */
+  InterWorkgroup = 'inter_workgroup',
+  /** A test consists of two invocations in the same workgroup. */
+  IntraWorkgroup = 'intra_workgroup',
+}
+
+/** Defines the number of behaviors a test may have. */
+export enum ResultType {
+  TwoBehavior,
+  FourBehavior,
+}
+
+/**
+ * Given test code that performs the actual sequence of loads and stores, as well as a memory type and test type, returns
+ * a complete test shader.
+ */
+export function buildTestShader(
+  testCode: string,
+  memoryType: MemoryType,
+  testType: TestType
+): string {
+  let memoryTypeCode;
+  let isStorageAS = false;
+  switch (memoryType) {
+    case MemoryType.AtomicStorageClass:
+      memoryTypeCode = storageMemoryAtomicTestShaderCode;
+      isStorageAS = true;
+      break;
+    case MemoryType.NonAtomicStorageClass:
+      memoryTypeCode = storageMemoryNonAtomicTestShaderCode;
+      isStorageAS = true;
+      break;
+    case MemoryType.AtomicWorkgroupClass:
+      memoryTypeCode = workgroupMemoryAtomicTestShaderCode;
+      break;
+    case MemoryType.NonAtomicWorkgroupClass:
+      memoryTypeCode = workgroupMemoryNonAtomicTestShaderCode;
+  }
+  let testTypeCode;
+  switch (testType) {
+    case TestType.InterWorkgroup:
+      testTypeCode = interWorkgroupTestShaderCode;
+      break;
+    case TestType.IntraWorkgroup:
+      if (isStorageAS) {
+        testTypeCode = storageIntraWorkgroupTestShaderCode;
+      } else {
+        testTypeCode = intraWorkgroupTestShaderCode;
+      }
+  }
+  return [memoryTypeCode, testTypeCode, testCode, testShaderCommonFooter].join('\n');
+}
+
+/**
+ * Given result code that aggregates the possible behaviors of a test across all instances, as well as a test type and
+ * number of behaviors, returns a complete result shader.
+ */
+export function buildResultShader(
+  resultCode: string,
+  testType: TestType,
+  resultType: ResultType
+): string {
+  let resultStructure;
+  switch (resultType) {
+    case ResultType.TwoBehavior:
+      resultStructure = twoBehaviorTestResultStructure;
+      break;
+    case ResultType.FourBehavior:
+      resultStructure = fourBehaviorTestResultStructure;
+  }
+  let testTypeCode;
+  switch (testType) {
+    case TestType.InterWorkgroup:
+      testTypeCode = interWorkgroupResultShaderCode;
+      break;
+    case TestType.IntraWorkgroup:
+      testTypeCode = intraWorkgroupResultShaderCode;
+  }
+  return [
+    resultStructure,
+    resultShaderCommonCode,
+    testTypeCode,
+    resultCode,
+    resultShaderCommonFooter,
+  ].join('\n');
+}