707 lines
20 KiB
JavaScript
707 lines
20 KiB
JavaScript
/* Any copyright is dedicated to the Public Domain.
|
|
http://creativecommons.org/publicdomain/zero/1.0/ */
|
|
|
|
/// <reference path="../../../../../toolkit/components/translations/tests/browser/shared-head.js" />
|
|
|
|
// Load the shared-head file first.
|
|
Services.scriptloader.loadSubScript(
|
|
"chrome://mochitests/content/browser/toolkit/components/ml/tests/browser/shared-head.js",
|
|
this
|
|
);
|
|
|
|
/**
|
|
* @type {import("../../actors/MLEngineParent.sys.mjs")}
|
|
*/
|
|
const { MLEngineParent, MLEngine } = ChromeUtils.importESModule(
|
|
"resource://gre/actors/MLEngineParent.sys.mjs"
|
|
);
|
|
|
|
const { ModelHub, TestIndexedDBCache } = ChromeUtils.importESModule(
|
|
"chrome://global/content/ml/ModelHub.sys.mjs"
|
|
);
|
|
|
|
const { getInferenceProcessInfo } = ChromeUtils.importESModule(
|
|
"chrome://global/content/ml/Utils.sys.mjs"
|
|
);
|
|
|
|
const MS_PER_SEC = 1000;
|
|
const IndexedDBCache = TestIndexedDBCache;
|
|
|
|
const {
|
|
createEngine,
|
|
PipelineOptions,
|
|
QuantizationLevel,
|
|
ExecutionPriority,
|
|
InferenceDevice,
|
|
LogLevel,
|
|
} = ChromeUtils.importESModule(
|
|
"chrome://global/content/ml/EngineProcess.sys.mjs"
|
|
);
|
|
|
|
// This test suite shares some utility functions with translations as they work in a very
|
|
// similar fashion. Eventually, the plan is to unify these two components.
|
|
Services.scriptloader.loadSubScript(
|
|
"chrome://mochitests/content/browser/toolkit/components/translations/tests/browser/shared-head.js",
|
|
this
|
|
);
|
|
|
|
/**
|
|
* Sets up the stage for a test
|
|
*
|
|
*/
|
|
async function setup({
|
|
disabled = false,
|
|
prefs = [],
|
|
records = null,
|
|
backend,
|
|
} = {}) {
|
|
const { removeMocks, remoteClients } = await createAndMockMLRemoteSettings({
|
|
autoDownloadFromRemoteSettings: false,
|
|
records,
|
|
backend,
|
|
});
|
|
|
|
await SpecialPowers.pushPrefEnv({
|
|
set: [
|
|
// Enabled by default.
|
|
["browser.ml.enable", !disabled],
|
|
["browser.ml.logLevel", "All"],
|
|
["browser.ml.modelCacheTimeout", 1000],
|
|
["browser.ml.checkForMemory", false],
|
|
["browser.ml.queueWaitTimeout", 2],
|
|
["javascript.options.wasm_lazy_tiering", true],
|
|
...prefs,
|
|
],
|
|
});
|
|
|
|
return {
|
|
remoteClients,
|
|
async cleanup() {
|
|
await removeMocks();
|
|
await waitForCondition(
|
|
() => EngineProcess.areAllEnginesTerminated(),
|
|
"Waiting for all of the engines to be terminated.",
|
|
100,
|
|
200
|
|
);
|
|
await SpecialPowers.popPrefEnv();
|
|
},
|
|
};
|
|
}
|
|
|
|
function getDefaultWasmRecords(backend) {
|
|
return [
|
|
{
|
|
name: MLEngineParent.WASM_FILENAME[
|
|
backend || MLEngineParent.DEFAULT_BACKEND
|
|
],
|
|
version:
|
|
MLEngineParent.WASM_MAJOR_VERSION[
|
|
backend || MLEngineParent.DEFAULT_BACKEND
|
|
] + ".0",
|
|
},
|
|
];
|
|
}
|
|
|
|
async function createAndMockMLRemoteSettings({
|
|
autoDownloadFromRemoteSettings = false,
|
|
records = null,
|
|
backend,
|
|
} = {}) {
|
|
const wasmRecords = getDefaultWasmRecords(backend).map(
|
|
({ name, version }) => ({
|
|
id: crypto.randomUUID(),
|
|
name,
|
|
version,
|
|
last_modified: Date.now(),
|
|
schema: Date.now(),
|
|
})
|
|
);
|
|
const runtime = await createRemoteClient({
|
|
collectionName: "test-translation-wasm",
|
|
records: wasmRecords,
|
|
attachmentMock: true,
|
|
autoDownloadFromRemoteSettings,
|
|
});
|
|
|
|
const options = await createRemoteClient({
|
|
records: records || [
|
|
{
|
|
taskName: "moz-echo",
|
|
modelId: "mozilla/distilvit",
|
|
processorId: "mozilla/distilvit",
|
|
tokenizerId: "mozilla/distilvit",
|
|
modelRevision: "main",
|
|
processorRevision: "main",
|
|
tokenizerRevision: "main",
|
|
dtype: "q8",
|
|
id: "74a71cfd-1734-44e6-85c0-69cf3e874138",
|
|
},
|
|
],
|
|
collectionName: "test-ml-inference-options",
|
|
});
|
|
|
|
const allowDeny = await createRemoteClient({
|
|
records: [
|
|
{
|
|
filter: "ALLOW",
|
|
urlPrefix: "https://",
|
|
id: "74a71cfd-1734-44e6-85c0-69cf3e874138",
|
|
},
|
|
],
|
|
collectionName: "test-ml-allow-deny-list",
|
|
});
|
|
|
|
const remoteClients = {
|
|
"ml-onnx-runtime": runtime,
|
|
"ml-inference-options": options,
|
|
"ml-model-allow-deny-list": allowDeny,
|
|
};
|
|
|
|
MLEngineParent.mockRemoteSettings({
|
|
"ml-onnx-runtime": runtime.client,
|
|
"ml-inference-options": options,
|
|
"ml-model-allow-deny-list": allowDeny,
|
|
});
|
|
|
|
return {
|
|
async removeMocks() {
|
|
await runtime.client.attachments.deleteAll();
|
|
await runtime.client.db.clear();
|
|
await options.db.clear();
|
|
await allowDeny.db.clear();
|
|
MLEngineParent.removeMocks();
|
|
},
|
|
remoteClients,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Creates a local RemoteSettingsClient for use within tests.
|
|
*
|
|
* @returns {RemoteSettings|AttachmentMock}
|
|
*/
|
|
async function createRemoteClient({
|
|
records,
|
|
collectionName,
|
|
attachmentMock = false,
|
|
autoDownloadFromRemoteSettings = false,
|
|
}) {
|
|
const { RemoteSettings } = ChromeUtils.importESModule(
|
|
"resource://services-settings/remote-settings.sys.mjs"
|
|
);
|
|
const client = RemoteSettings(`${collectionName}-${_remoteSettingsMockId++}`);
|
|
await client.db.clear();
|
|
await client.db.importChanges({}, Date.now(), records);
|
|
|
|
if (attachmentMock) {
|
|
return createAttachmentMock(
|
|
client,
|
|
collectionName,
|
|
autoDownloadFromRemoteSettings
|
|
);
|
|
}
|
|
return client;
|
|
}
|
|
|
|
/*
|
|
* Perftest related
|
|
*/
|
|
const ONE_MIB = 1024 * 1024;
|
|
const INIT_START = "initializationStart";
|
|
const INIT_END = "initializationEnd";
|
|
const RUN_START = "runStart";
|
|
const RUN_END = "runEnd";
|
|
const PIPELINE_READY_START = "ensurePipelineIsReadyStart";
|
|
const PIPELINE_READY_END = "ensurePipelineIsReadyEnd";
|
|
const PIPELINE_READY_LATENCY = "pipeline-ready-latency";
|
|
const INITIALIZATION_LATENCY = "initialization-latency";
|
|
const MODEL_RUN_LATENCY = "model-run-latency";
|
|
const TOTAL_MEMORY_USAGE = "total-memory-usage";
|
|
const COLD_START_PREFIX = "cold-start-";
|
|
const PEAK_MEMORY_USAGE = "peak-memory-usage";
|
|
const ITERATIONS = 10;
|
|
const WHEN = "when";
|
|
const MEMORY = "memory";
|
|
const E2E_INIT_LATENCY = "e2e-init-latency";
|
|
const FIRST_TOKEN_LATENCY = "1st-token-latency";
|
|
const DECODING_LATENCY = "decoding-latency";
|
|
// Token speeds are apppropriate for comparing the speed of the same model.
|
|
const DECODING_TOKEN_SPEED = "decoding-tokenSpeed";
|
|
const PROMPT_TOKEN_SPEED = "prompt-tokenSpeed";
|
|
// Characters speed is appropriate for comparing the speed of two different models.
|
|
const DECODING_CHARACTERS_SPEED = "decoding-charactersSpeed";
|
|
const PROMPT_CHARACTERS_SPEED = "prompt-charactersSpeed";
|
|
|
|
const formatNumber = new Intl.NumberFormat("en-US", {
|
|
maximumSignificantDigits: 4,
|
|
}).format;
|
|
|
|
function median(arr) {
|
|
arr = [...arr].sort((a, b) => a - b);
|
|
const mid = Math.floor(arr.length / 2);
|
|
|
|
if (arr.length % 2) {
|
|
return arr[mid];
|
|
}
|
|
|
|
return (arr[mid - 1] + arr[mid]) / 2;
|
|
}
|
|
|
|
function stringify(arr) {
|
|
function pad(str) {
|
|
str = str.padStart(7, " ");
|
|
if (str[0] != " ") {
|
|
str = " " + str;
|
|
}
|
|
return str;
|
|
}
|
|
|
|
return arr.reduce((acc, elem) => acc + pad(formatNumber(elem)), "");
|
|
}
|
|
|
|
function reportMetrics(journal) {
|
|
let text = "\nResults (ms)\n";
|
|
const names = Object.keys(journal);
|
|
const prefixLen = 1 + Math.max(...names.map(str => str.length));
|
|
for (const name in journal) {
|
|
const med = median(journal[name]);
|
|
text += (name + ":").padEnd(prefixLen, " ") + stringify(journal[name]);
|
|
text += " median " + formatNumber(med) + "\n";
|
|
}
|
|
const reportedMetrics = [];
|
|
for (const [name, values] of Object.entries(journal)) {
|
|
reportedMetrics.push({
|
|
name,
|
|
values,
|
|
value: median(values),
|
|
});
|
|
}
|
|
dump(text);
|
|
info(`perfMetrics | ${JSON.stringify(reportedMetrics)}`);
|
|
}
|
|
|
|
/**
|
|
* Fetches the latest metric entry with the specified name and retrieves its value for the given key.
|
|
* If multiple metrics share the same name, the function returns the key from the most recent one.
|
|
*
|
|
* @param {Array<object>} metrics - The array of metric objects to search through.
|
|
* @param {string} name - The name of the metric to find.
|
|
* @param {string} key - The key within the metric object whose value should be returned.
|
|
* @returns {*} - The value of the specified key in the latest metric with the given name, or undefined if no matching metric is found.
|
|
*/
|
|
function fetchMLMetric(metrics, name, key) {
|
|
const matchingMetrics = metrics.filter(metric => metric.name === name);
|
|
if (matchingMetrics.length === 0) {
|
|
return undefined;
|
|
} // Return undefined if no match found
|
|
const latestMetric = matchingMetrics[matchingMetrics.length - 1];
|
|
return latestMetric[key];
|
|
}
|
|
|
|
function fetchLatencyMetrics(metrics, isFirstRun) {
|
|
const pipelineLatency =
|
|
fetchMLMetric(metrics, PIPELINE_READY_END, WHEN) -
|
|
fetchMLMetric(metrics, PIPELINE_READY_START, WHEN);
|
|
const initLatency =
|
|
fetchMLMetric(metrics, INIT_END, WHEN) -
|
|
fetchMLMetric(metrics, INIT_START, WHEN);
|
|
const runLatency =
|
|
fetchMLMetric(metrics, RUN_END, WHEN) -
|
|
fetchMLMetric(metrics, RUN_START, WHEN);
|
|
return {
|
|
[`${isFirstRun ? COLD_START_PREFIX : ""}${PIPELINE_READY_LATENCY}`]:
|
|
pipelineLatency,
|
|
[`${isFirstRun ? COLD_START_PREFIX : ""}${INITIALIZATION_LATENCY}`]:
|
|
initLatency,
|
|
[`${isFirstRun ? COLD_START_PREFIX : ""}${MODEL_RUN_LATENCY}`]: runLatency,
|
|
};
|
|
}
|
|
|
|
function fetchMetrics(metrics, isFirstRun) {
|
|
return {
|
|
...fetchLatencyMetrics(metrics, isFirstRun),
|
|
};
|
|
}
|
|
|
|
async function initializeEngine(pipelineOptions, prefs = null) {
|
|
const modelDirectory = normalizePathForOS(
|
|
`${Services.env.get("MOZ_FETCHES_DIR")}/onnx-models`
|
|
);
|
|
info(`Model Directory: ${modelDirectory}`);
|
|
|
|
const modelHubRootUrl = Services.env.get("MOZ_MODELS_HUB");
|
|
if (!modelHubRootUrl) {
|
|
throw new Error(
|
|
"MOZ_MODELS_HUB is not set, you need to run with --hooks toolkit/components/ml/tests/tools/hook_local_hub.py"
|
|
);
|
|
}
|
|
|
|
info(`ModelHubRootUrl: ${modelHubRootUrl}`);
|
|
var browserPrefs = [["browser.ml.modelHubRootUrl", modelHubRootUrl]];
|
|
if (prefs) {
|
|
browserPrefs = browserPrefs.concat(prefs);
|
|
}
|
|
|
|
const { cleanup } = await perfSetup({
|
|
prefs: browserPrefs,
|
|
backend: pipelineOptions.backend,
|
|
});
|
|
info("Get the engine process");
|
|
const startTime = performance.now();
|
|
const mlEngineParent = await EngineProcess.getMLEngineParent();
|
|
const engine = await mlEngineParent.getEngine(
|
|
new PipelineOptions(pipelineOptions)
|
|
);
|
|
const e2eInitTime = performance.now() - startTime;
|
|
|
|
info("Get Pipeline Options");
|
|
info("Run the inference");
|
|
return {
|
|
cleanup,
|
|
engine,
|
|
e2eInitTime,
|
|
};
|
|
}
|
|
|
|
function normalizePathForOS(path) {
|
|
if (Services.appinfo.OS === "WINNT") {
|
|
// On Windows, replace forward slashes with backslashes
|
|
return path.replace(/\//g, "\\");
|
|
}
|
|
|
|
// On Unix-like systems, replace backslashes with forward slashes
|
|
return path.replace(/\\/g, "/");
|
|
}
|
|
|
|
async function perfSetup({ disabled = false, prefs = [], backend } = {}) {
|
|
const { removeMocks, remoteClients } = await createAndMockMLRemoteSettings({
|
|
autoDownloadFromRemoteSettings: false,
|
|
backend,
|
|
});
|
|
|
|
var finalPrefs = [
|
|
// Enabled by default.
|
|
["browser.ml.enable", !disabled],
|
|
["browser.ml.logLevel", "Error"],
|
|
["browser.ml.modelCacheTimeout", 1000],
|
|
["browser.ml.checkForMemory", false],
|
|
["javascript.options.wasm_lazy_tiering", true],
|
|
...prefs,
|
|
];
|
|
|
|
await SpecialPowers.pushPrefEnv({
|
|
set: finalPrefs,
|
|
});
|
|
|
|
let artifactDirectory = normalizePathForOS(
|
|
`${Services.env.get("MOZ_FETCHES_DIR")}`
|
|
);
|
|
|
|
async function pathExists(path) {
|
|
try {
|
|
return await IOUtils.exists(path);
|
|
} catch (e) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Stop immediately if this fails.
|
|
if (!artifactDirectory) {
|
|
artifactDirectory = normalizePathForOS(
|
|
`${Services.env.get("MOZ_ML_LOCAL_DIR")}`
|
|
);
|
|
}
|
|
|
|
if (!artifactDirectory) {
|
|
throw new Error(
|
|
`The wasm artifact directory is not set. This usually happens when running locally. " +
|
|
"Please download all the files from taskcluster/kinds/fetch/onnxruntime-web-fetch.yml. " +
|
|
"Place them in a directory and rerun the test with the environment variable 'MOZ_ML_LOCAL_DIR' " +
|
|
"set such that all the files are directly inside 'MOZ_ML_LOCAL_DIR'`
|
|
);
|
|
}
|
|
|
|
if (!PathUtils.isAbsolute(artifactDirectory)) {
|
|
throw new Error(
|
|
"Please provide an absolute path for 'MOZ_ML_LOCAL_DIR and not a relative path"
|
|
);
|
|
}
|
|
|
|
async function download(record) {
|
|
info(`Downloading record: ${record.name}`);
|
|
const recordPath = normalizePathForOS(
|
|
`${artifactDirectory}/${record.name}`
|
|
);
|
|
|
|
// Stop immediately if this fails.
|
|
if (!(await pathExists(recordPath))) {
|
|
throw new Error(`The wasm file <${recordPath}> does not exist. This usually happens when running locally. " +
|
|
"Please download all the files from taskcluster/kinds/fetch/onnxruntime-web-fetch.yml. " +
|
|
"Place them in the directory <${artifactDirectory}> " +
|
|
"such that <${recordPath}> exists.`);
|
|
}
|
|
|
|
return {
|
|
buffer: (await IOUtils.read(recordPath)).buffer,
|
|
};
|
|
}
|
|
|
|
remoteClients["ml-onnx-runtime"].client.attachments.download = download;
|
|
|
|
return {
|
|
remoteClients,
|
|
async cleanup() {
|
|
await removeMocks();
|
|
await waitForCondition(
|
|
() => EngineProcess.areAllEnginesTerminated(),
|
|
"Waiting for all of the engines to be terminated.",
|
|
100,
|
|
200
|
|
);
|
|
await SpecialPowers.popPrefEnv();
|
|
},
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Returns the current total physical memory usage in MiB for the inference process
|
|
*/
|
|
async function getTotalMemoryUsage() {
|
|
const procInfo = await getInferenceProcessInfo();
|
|
return Math.round(procInfo.memory / ONE_MIB);
|
|
}
|
|
|
|
/**
|
|
* Runs an inference given the options and arguments
|
|
*
|
|
*/
|
|
async function runInference({
|
|
pipelineOptions,
|
|
request,
|
|
isFirstRun = false,
|
|
browserPrefs = null,
|
|
}) {
|
|
info(
|
|
`runInference is request null | ${request === null || request === undefined}`
|
|
);
|
|
const { cleanup, engine, e2eInitTime } = await initializeEngine(
|
|
pipelineOptions,
|
|
browserPrefs
|
|
);
|
|
|
|
const streamerOptions = {
|
|
perTokens: true,
|
|
skipPrompt: pipelineOptions.taskName !== "text-generation",
|
|
returnTokens: true,
|
|
...(request.streamerOptions || {}),
|
|
};
|
|
request = { ...request, streamerOptions };
|
|
|
|
let metrics = {};
|
|
let timeToFirstToken;
|
|
let startTime;
|
|
let numGeneratedCharacters = 0;
|
|
let numGeneratedTokens = 0;
|
|
let numPromptCharacters = 0;
|
|
if (streamerOptions.skipPrompt && Array.isArray(request?.args)) {
|
|
numPromptCharacters += request.args
|
|
.flat()
|
|
.reduce((sum, item) => sum + (item?.length || 0), 0);
|
|
}
|
|
let numPromptTokens = 0;
|
|
const run = async () => {
|
|
let isFirstTokenReceived = false;
|
|
let result;
|
|
let currentTokenLen = 0;
|
|
let currentCharLen = 0;
|
|
startTime = performance.now();
|
|
const generator = engine.runWithGenerator(request);
|
|
|
|
do {
|
|
result = await generator.next();
|
|
|
|
currentTokenLen = result.value?.tokens?.flat()?.length || 0;
|
|
currentCharLen = result.value?.text?.length || 0;
|
|
|
|
if (result.value?.isPrompt) {
|
|
numPromptCharacters += currentCharLen;
|
|
numPromptTokens += currentTokenLen;
|
|
} else {
|
|
numGeneratedCharacters += currentCharLen;
|
|
numGeneratedTokens += currentTokenLen;
|
|
if (!isFirstTokenReceived) {
|
|
timeToFirstToken = performance.now() - startTime;
|
|
isFirstTokenReceived = true;
|
|
startTime = performance.now();
|
|
}
|
|
}
|
|
} while (!result.done);
|
|
|
|
return result.value;
|
|
};
|
|
|
|
try {
|
|
const res = await run();
|
|
const decodingTime = performance.now() - startTime;
|
|
metrics = fetchMetrics(res.metrics || [], isFirstRun);
|
|
metrics[`${isFirstRun ? COLD_START_PREFIX : ""}${TOTAL_MEMORY_USAGE}`] =
|
|
await getTotalMemoryUsage();
|
|
|
|
metrics[`${isFirstRun ? COLD_START_PREFIX : ""}${E2E_INIT_LATENCY}`] =
|
|
e2eInitTime;
|
|
metrics[`${isFirstRun ? COLD_START_PREFIX : ""}${FIRST_TOKEN_LATENCY}`] =
|
|
timeToFirstToken;
|
|
metrics[`${isFirstRun ? COLD_START_PREFIX : ""}${DECODING_LATENCY}`] =
|
|
decodingTime;
|
|
metrics[
|
|
`${isFirstRun ? COLD_START_PREFIX : ""}${DECODING_CHARACTERS_SPEED}`
|
|
] = numGeneratedCharacters / (decodingTime / MS_PER_SEC);
|
|
metrics[`${isFirstRun ? COLD_START_PREFIX : ""}${DECODING_TOKEN_SPEED}`] =
|
|
numGeneratedTokens / (decodingTime / MS_PER_SEC);
|
|
metrics[
|
|
`${isFirstRun ? COLD_START_PREFIX : ""}${PROMPT_CHARACTERS_SPEED}`
|
|
] = numPromptCharacters / (timeToFirstToken / MS_PER_SEC);
|
|
metrics[`${isFirstRun ? COLD_START_PREFIX : ""}${PROMPT_TOKEN_SPEED}`] =
|
|
numPromptTokens / (timeToFirstToken / MS_PER_SEC);
|
|
} finally {
|
|
await engine.terminate();
|
|
await EngineProcess.destroyMLEngine();
|
|
await cleanup();
|
|
}
|
|
return metrics;
|
|
}
|
|
|
|
/**
|
|
* Can be used to track peak memory
|
|
*
|
|
*/
|
|
class PeakMemoryTracker {
|
|
constructor(interval = 500) {
|
|
this._memory = 0;
|
|
this._intervalId = null;
|
|
this._interval = interval;
|
|
}
|
|
|
|
async collectPeakMemory() {
|
|
const procInfo = await getInferenceProcessInfo();
|
|
if (procInfo.memory && procInfo.memory > this._memory) {
|
|
this._memory = procInfo.memory;
|
|
}
|
|
}
|
|
|
|
start() {
|
|
if (this._intervalId !== null) {
|
|
return;
|
|
} // Prevent multiple intervals
|
|
this._intervalId = setInterval(() => {
|
|
this.collectPeakMemory().catch(console.error);
|
|
}, this._interval);
|
|
}
|
|
|
|
stop() {
|
|
if (this._intervalId !== null) {
|
|
clearInterval(this._intervalId);
|
|
this._intervalId = null;
|
|
}
|
|
|
|
try {
|
|
return Math.round(this._memory / ONE_MIB);
|
|
} finally {
|
|
this._memory = 0;
|
|
}
|
|
}
|
|
}
|
|
/**
|
|
* Runs a performance test for the given name, options, and arguments and
|
|
* reports the results for perfherder.
|
|
*/
|
|
async function perfTest({
|
|
name,
|
|
options,
|
|
request,
|
|
iterations = ITERATIONS,
|
|
addColdStart = false,
|
|
trackPeakMemory = false,
|
|
peakMemoryInterval = 500,
|
|
browserPrefs = null,
|
|
}) {
|
|
info(`is request null | ${request === null || request === undefined}`);
|
|
name = name.toUpperCase();
|
|
|
|
let METRICS;
|
|
|
|
// When tracking peak memory we only do this because we're
|
|
// stressing the system with 500ms callbacks so other netrics are impacted
|
|
if (trackPeakMemory) {
|
|
METRICS = [`${name}-${PEAK_MEMORY_USAGE}`];
|
|
} else {
|
|
METRICS = [
|
|
`${name}-${PIPELINE_READY_LATENCY}`,
|
|
`${name}-${INITIALIZATION_LATENCY}`,
|
|
`${name}-${MODEL_RUN_LATENCY}`,
|
|
`${name}-${TOTAL_MEMORY_USAGE}`,
|
|
`${name}-${E2E_INIT_LATENCY}`,
|
|
`${name}-${FIRST_TOKEN_LATENCY}`,
|
|
`${name}-${DECODING_LATENCY}`,
|
|
`${name}-${DECODING_CHARACTERS_SPEED}`,
|
|
`${name}-${DECODING_TOKEN_SPEED}`,
|
|
`${name}-${PROMPT_CHARACTERS_SPEED}`,
|
|
`${name}-${PROMPT_TOKEN_SPEED}`,
|
|
...(addColdStart
|
|
? [
|
|
`${name}-${COLD_START_PREFIX}${PIPELINE_READY_LATENCY}`,
|
|
`${name}-${COLD_START_PREFIX}${INITIALIZATION_LATENCY}`,
|
|
`${name}-${COLD_START_PREFIX}${MODEL_RUN_LATENCY}`,
|
|
`${name}-${COLD_START_PREFIX}${TOTAL_MEMORY_USAGE}`,
|
|
]
|
|
: []),
|
|
];
|
|
}
|
|
|
|
const journal = {};
|
|
for (let metric of METRICS) {
|
|
journal[metric] = [];
|
|
}
|
|
|
|
const pipelineOptions = new PipelineOptions(options);
|
|
var tracker;
|
|
|
|
let nIterations = addColdStart ? iterations + 1 : iterations;
|
|
for (let i = 0; i < nIterations; i++) {
|
|
if (trackPeakMemory) {
|
|
tracker = new PeakMemoryTracker(peakMemoryInterval);
|
|
tracker.start();
|
|
}
|
|
const shouldAddColdStart = addColdStart && i === 0;
|
|
let metrics = await runInference({
|
|
pipelineOptions,
|
|
request,
|
|
isFirstRun: shouldAddColdStart,
|
|
browserPrefs,
|
|
});
|
|
if (trackPeakMemory) {
|
|
journal[`${name}-${PEAK_MEMORY_USAGE}`].push(tracker.stop());
|
|
} else {
|
|
for (let [metricName, metricVal] of Object.entries(metrics)) {
|
|
if (!Number.isFinite(metricVal) || metricVal < 0) {
|
|
metricVal = 0;
|
|
}
|
|
// Add the metric if it wasn't there
|
|
if (journal[`${name}-${metricName}`] === undefined) {
|
|
journal[`${name}-${metricName}`] = [];
|
|
}
|
|
journal[`${name}-${metricName}`].push(metricVal);
|
|
}
|
|
}
|
|
}
|
|
Assert.ok(true);
|
|
reportMetrics(journal);
|
|
}
|
|
|
|
/**
|
|
* Measures floating point value within epsilon tolerance
|
|
*/
|
|
function isEqualWithTolerance(A, B, epsilon = 0.000001) {
|
|
return Math.abs(Math.abs(A) - Math.abs(B)) < epsilon;
|
|
}
|