toolkit/components/ml/actors/MLEngineChild.sys.mjs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";

/**
 * @typedef {import("../../promiseworker/PromiseWorker.sys.mjs").BasePromiseWorker} BasePromiseWorker
 */

/**
 * @typedef {object} Lazy
 * @property {typeof import("../../promiseworker/PromiseWorker.sys.mjs").BasePromiseWorker} BasePromiseWorker
 * @property {typeof setTimeout} setTimeout
 * @property {typeof clearTimeout} clearTimeout
 */

/** @type {Lazy} */
const lazy = {};
ChromeUtils.defineESModuleGetters(lazy, {
  BasePromiseWorker: "resource://gre/modules/PromiseWorker.sys.mjs",
  setTimeout: "resource://gre/modules/Timer.sys.mjs",
  clearTimeout: "resource://gre/modules/Timer.sys.mjs",
  ModelHub: "chrome://global/content/ml/ModelHub.sys.mjs",
  PipelineOptions: "chrome://global/content/ml/EngineProcess.sys.mjs",
});

ChromeUtils.defineLazyGetter(lazy, "console", () => {
  return console.createInstance({
    maxLogLevelPref: "browser.ml.logLevel",
    prefix: "ML",
  });
});

XPCOMUtils.defineLazyPreferenceGetter(
  lazy,
  "CACHE_TIMEOUT_MS",
  "browser.ml.modelCacheTimeout"
);
XPCOMUtils.defineLazyPreferenceGetter(
  lazy,
  "MODEL_HUB_ROOT_URL",
  "browser.ml.modelHubRootUrl"
);
XPCOMUtils.defineLazyPreferenceGetter(
  lazy,
  "MODEL_HUB_URL_TEMPLATE",
  "browser.ml.modelHubUrlTemplate"
);
XPCOMUtils.defineLazyPreferenceGetter(lazy, "LOG_LEVEL", "browser.ml.logLevel");

/**
 * The engine child is responsible for the life cycle and instantiation of the local
 * machine learning inference engine.
 */
export class MLEngineChild extends JSWindowActorChild {
  /**
   * The cached engines.
   *
   * @type {Map<string, EngineDispatcher>}
   */
  #engineDispatchers = new Map();

  // eslint-disable-next-line consistent-return
  async receiveMessage({ name, data }) {
    switch (name) {
      case "MLEngine:NewPort": {
        const { port, pipelineOptions } = data;

        // Override some options using prefs
        let options = new lazy.PipelineOptions(pipelineOptions);

        options.updateOptions({
          modelHubRootUrl: lazy.MODEL_HUB_ROOT_URL,
          modelHubUrlTemplate: lazy.MODEL_HUB_URL_TEMPLATE,
          timeoutMS: lazy.CACHE_TIMEOUT_MS,
          logLevel: lazy.LOG_LEVEL,
        });

        this.#engineDispatchers.set(
          options.taskName,
          new EngineDispatcher(this, port, options)
        );
        break;
      }
      case "MLEngine:ForceShutdown": {
        for (const engineDispatcher of this.#engineDispatchers.values()) {
          return engineDispatcher.terminate();
        }
        this.#engineDispatchers = null;
        break;
      }
    }
  }

  handleEvent(event) {
    switch (event.type) {
      case "DOMContentLoaded":
        this.sendAsyncMessage("MLEngine:Ready");
        break;
    }
  }

  /**
   * Gets the wasm array buffer from RemoteSettings.
   *
   * @returns {Promise<ArrayBuffer>}
   */
  getWasmArrayBuffer() {
    return this.sendQuery("MLEngine:GetWasmArrayBuffer");
  }

  /**
   * Gets the inference options from RemoteSettings.
   *
   * @returns {Promise<object>}
   */
  getInferenceOptions(taskName) {
    return this.sendQuery(`MLEngine:GetInferenceOptions:${taskName}`);
  }

  /**
   * @param {string} engineName
   */
  removeEngine(engineName) {
    this.#engineDispatchers.delete(engineName);
    if (this.#engineDispatchers.size === 0) {
      this.sendQuery("MLEngine:DestroyEngineProcess");
    }
  }
}

/**
 * This classes manages the lifecycle of an ML Engine, and handles dispatching messages
 * to it.
 */
class EngineDispatcher {
  /** @type {Set<MessagePort>} */
  #ports = new Set();

  /** @type {TimeoutID | null} */
  #keepAliveTimeout = null;

  /** @type {PromiseWithResolvers} */
  #modelRequest;

  /** @type {Promise<Engine> | null} */
  #engine = null;

  /** @type {string} */
  #taskName;

  /** Creates the inference engine given the wasm runtime and the run options.
   *
   * The initialization is done in three steps:
   * 1. The wasm runtime is fetched from RS
   * 2. The inference options are fetched from RS and augmented with the pipeline options.
   * 3. The inference engine is created with the wasm runtime and the options.
   *
   * Any exception here will be bubbled up for the constructor to log.
   *
   * @param {PipelineOptions} pipelineOptions
   * @returns {Promise<Engine>}
   */
  async initializeInferenceEngine(pipelineOptions) {
    // Create the inference engine given the wasm runtime and the options.
    const wasm = await this.mlEngineChild.getWasmArrayBuffer();
    const inferenceOptions = await this.mlEngineChild.getInferenceOptions(
      this.#taskName
    );
    lazy.console.debug("Inference engine options:", inferenceOptions);
    pipelineOptions.updateOptions(inferenceOptions);

    return InferenceEngine.create(wasm, pipelineOptions);
  }

  /**
   * @param {MLEngineChild} mlEngineChild
   * @param {MessagePort} port
   * @param {PipelineOptions} pipelineOptions
   */
  constructor(mlEngineChild, port, pipelineOptions) {
    this.mlEngineChild = mlEngineChild;
    this.#taskName = pipelineOptions.taskName;
    this.timeoutMS = pipelineOptions.timeoutMS;

    this.#engine = this.initializeInferenceEngine(pipelineOptions);

    // Trigger the keep alive timer.
    this.#engine
      .then(() => void this.keepAlive())
      .catch(error => {
        if (
          // Ignore errors from tests intentionally causing errors.
          !error?.message?.startsWith("Intentionally")
        ) {
          lazy.console.error("Could not initalize the engine", error);
        }
      });

    this.setupMessageHandler(port);
  }

  /**
   * The worker needs to be shutdown after some amount of time of not being used.
   */
  keepAlive() {
    if (this.#keepAliveTimeout) {
      // Clear any previous timeout.
      lazy.clearTimeout(this.#keepAliveTimeout);
    }
    // In automated tests, the engine is manually destroyed.
    if (!Cu.isInAutomation) {
      this.#keepAliveTimeout = lazy.setTimeout(this.terminate, this.timeoutMS);
    }
  }

  /**
   * @param {MessagePort} port
   */
  getModel(port) {
    if (this.#modelRequest) {
      // There could be a race to get a model, use the first request.
      return this.#modelRequest.promise;
    }
    this.#modelRequest = Promise.withResolvers();
    port.postMessage({ type: "EnginePort:ModelRequest" });
    return this.#modelRequest.promise;
  }

  /**
   * @param {MessagePort} port
   */
  setupMessageHandler(port) {
    port.onmessage = async ({ data }) => {
      switch (data.type) {
        case "EnginePort:Discard": {
          port.close();
          this.#ports.delete(port);
          break;
        }
        case "EnginePort:Terminate": {
          this.terminate();
          break;
        }
        case "EnginePort:ModelResponse": {
          if (this.#modelRequest) {
            const { model, error } = data;
            if (model) {
              this.#modelRequest.resolve(model);
            } else {
              this.#modelRequest.reject(error);
            }
            this.#modelRequest = null;
          } else {
            lazy.console.error(
              "Got a EnginePort:ModelResponse but no model resolvers"
            );
          }
          break;
        }
        case "EnginePort:Run": {
          const { requestId, request } = data;
          let engine;
          try {
            engine = await this.#engine;
          } catch (error) {
            port.postMessage({
              type: "EnginePort:RunResponse",
              requestId,
              response: null,
              error,
            });
            // The engine failed to load. Terminate the entire dispatcher.
            this.terminate();
            return;
          }

          // Do not run the keepAlive timer until we are certain that the engine loaded,
          // as the engine shouldn't be killed while it is initializing.
          this.keepAlive();

          try {
            port.postMessage({
              type: "EnginePort:RunResponse",
              requestId,
              response: await engine.run(request),
              error: null,
            });
          } catch (error) {
            port.postMessage({
              type: "EnginePort:RunResponse",
              requestId,
              response: null,
              error,
            });
          }
          break;
        }
        default:
          lazy.console.error("Unknown port message to engine: ", data);
          break;
      }
    };
  }

  /**
   * Terminates the engine and its worker after a timeout.
   */
  async terminate() {
    if (this.#keepAliveTimeout) {
      lazy.clearTimeout(this.#keepAliveTimeout);
      this.#keepAliveTimeout = null;
    }
    for (const port of this.#ports) {
      port.postMessage({ type: "EnginePort:EngineTerminated" });
      port.close();
    }
    this.#ports = new Set();
    this.mlEngineChild.removeEngine(this.#taskName);
    try {
      const engine = await this.#engine;
      engine.terminate();
    } catch (error) {
      lazy.console.error("Failed to get the engine", error);
    }
  }
}

let modelHub = null; // This will hold the ModelHub instance to reuse it.

/**
 * Retrieves a model file as an ArrayBuffer from the specified URL.
 * This function normalizes the URL, extracts the organization, model name, and file path,
 * then fetches the model file using the ModelHub API. The `modelHub` instance is created
 * only once and reused for subsequent calls to optimize performance.
 *
 * @param {string} url - The URL of the model file to fetch. Can be a path relative to
 * the model hub root or an absolute URL.
 * @returns {Promise} A promise that resolves to a Meta object containing the URL, response headers,
 * and data as an ArrayBuffer. The data is marked for transfer to avoid cloning.
 */
async function getModelFile(url) {
  // Create the model hub instance if needed
  if (!modelHub) {
    lazy.console.debug("Creating model hub instance");
    modelHub = new lazy.ModelHub({
      rootUrl: lazy.MODEL_HUB_ROOT_URL,
      urlTemplate: lazy.MODEL_HUB_URL_TEMPLATE,
    });
  }

  if (url.startsWith(lazy.MODEL_HUB_ROOT_URL)) {
    url = url.slice(lazy.MODEL_HUB_ROOT_URL.length);
    // Make sure we get a front slash
    if (!url.startsWith("/")) {
      url = `/${url}`;
    }
  }

  // Parsing url to get model name, and file path.
  // if this errors out, it will be caught in the worker
  const parsedUrl = modelHub.parseUrl(url);

  let [data, headers] = await modelHub.getModelFileAsArrayBuffer(parsedUrl);
  return new lazy.BasePromiseWorker.Meta([url, headers, data], {
    transfers: [data],
  });
}

/**
 * Wrapper around the ChromeWorker that runs the inference.
 */
class InferenceEngine {
  /** @type {BasePromiseWorker} */
  #worker;

  /**
   * Initialize the worker.
   *
   * @param {ArrayBuffer} wasm
   * @param {PipelineOptions} pipelineOptions
   * @returns {InferenceEngine}
   */
  static async create(wasm, pipelineOptions) {
    /** @type {BasePromiseWorker} */
    const worker = new lazy.BasePromiseWorker(
      "chrome://global/content/ml/MLEngine.worker.mjs",
      { type: "module" },
      { getModelFile }
    );

    const args = [wasm, pipelineOptions];
    const closure = {};
    const transferables = [wasm];
    await worker.post("initializeEngine", args, closure, transferables);
    return new InferenceEngine(worker);
  }

  /**
   * @param {BasePromiseWorker} worker
   */
  constructor(worker) {
    this.#worker = worker;
  }

  /**
   * @param {string} request
   * @returns {Promise<string>}
   */
  run(request) {
    return this.#worker.post("run", [request]);
  }

  terminate() {
    this.#worker.terminate();
    this.#worker = null;
  }
}