/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ // This import does not use Chromutils because the next version of the library // will require an async import, which is not supported by importESModule, // so we'll just add await here. import { env, RawImage, AutoProcessor, AutoTokenizer, AutoModelForVision2Seq, } from "chrome://global/content/ml/transformers-dev.js"; /** * Lazy initialization container. * * @type {object} */ const lazy = {}; ChromeUtils.defineESModuleGetters( lazy, { arrayBufferToBlobURL: "chrome://global/content/ml/Utils.sys.mjs", }, { global: "current" } ); // Using a custom console, see https://bugzilla.mozilla.org/show_bug.cgi?id=1891789 let _logLevel = "Error"; function debug(...args) { if (["Debug", "Trace", "All"].includes(_logLevel)) { console.log("ML:", ...args); // eslint-disable-line no-console } } /** * Echo inference for testing purposes. * * @async * @param {object} request - The request object containing image data. * @param {object} _model - The model used for inference. * @param {object} _tokenizer - The tokenizer used for decoding. * @param {object} _processor - The processor used for preparing image data. * @returns {Promise} The result object containing the processed text. */ async function echo(request, _model, _tokenizer, _processor) { return { metrics: { tokenizingTime: 0, }, output: request.data, }; } /** * Converts an image to text using a machine learning model. * * @async * @param {object} request - The request object containing image data. * @param {string} [request.imageUrl] - The URL of the image to process. Either `imageUrl` or `data` must be provided, but not both. * @param {ArrayBuffer} [request.data] - The raw image data to process. Either `data` or `imageUrl` must be provided, but not both. * @param {string} request.mimeType - The MIME type of the image data. * @param {object} model - The model used for inference. * @param {object} tokenizer - The tokenizer used for decoding. * @param {object} processor - The processor used for preparing image data. * @returns {Promise} The result object containing the processed text. */ async function imageToText(request, model, tokenizer, processor) { let result = { metrics: { inferenceTime: 0, tokenizingTime: 0, }, }; let start = Date.now(); let rawImage; if ("imageUrl" in request) { rawImage = await RawImage.fromUrl(request.imageUrl); } else { const blob = new Blob([request.data], { type: request.mimeType }); rawImage = await RawImage.fromBlob(blob); } debug("Image loaded in ", Date.now() - start); const { pixel_values } = await processor(rawImage); result.metrics.tokenizingTime += Date.now() - start; const toReturn = []; for (const batch of pixel_values) { batch.dims = [1, ...batch.dims]; start = Date.now(); const output = await model.generate(batch); result.metrics.inferenceTime += Date.now() - start; start = Date.now(); const decoded = tokenizer .batch_decode(output, { skip_special_tokens: true, }) .map(x => ({ generated_text: x.trim() })); result.metrics.tokenizingTime += Date.now() - start; toReturn.push(decoded); } debug("Inference done in ", Date.now() - start); result.output = toReturn[0][0].generated_text; return result; } /** * Configuration for engine. Each task has a configuration object that * gets merged at runtime with the options from PipelineOptions. * * When a key exists in both the default configuration and the options, * the value from the options is used. * * The configuration keys that are not exposed as options are all the * callables that are used in the pipeline: * * - modelClass * - tokenizerClass * - processorClass * - pipelineFunction * * @type {object} */ const ENGINE_CONFIGURATION = { "image-to-text": { modelId: "mozilla/distilvit", modelClass: AutoModelForVision2Seq, tokenizerId: "mozilla/distilvit", tokenizerClass: AutoTokenizer, processorId: "mozilla/distilvit", processorClass: AutoProcessor, pipelineFunction: imageToText, }, echo: { modelId: null, modelClass: null, tokenizerId: null, tokenizerClass: null, processorId: null, processorClass: null, pipelineFunction: echo, }, }; /** * Represents a pipeline for processing machine learning tasks. */ export class Pipeline { #modelCache = null; #model = null; #tokenizer = null; #processor = null; #pipelineFunction = null; #taskName = null; #initTime = 0; #isReady = false; /** * Creates an instance of a Pipeline. * * @param {object} modelCache - Implements the Cache interface and used to get models * @param {object} config - The configuration options */ constructor(modelCache, config) { let start = Date.now(); this.#modelCache = modelCache; _logLevel = config.logLevel || "Error"; // Setting up the Transformers.js environment // See https://huggingface.co/docs/transformers.js/api/env // Caching strategy. // Here we make sure that everytime transformers.js requires a file, it uses // modelCache, which transfers the request to the main thread and uses the // ModelHub that caches files into IndexDB. env.useBrowserCache = false; env.allowLocalModels = false; env.remoteHost = config.modelHubRootUrl; env.remotePathTemplate = config.modelHubUrlTemplate; env.useCustomCache = true; env.customCache = this.#modelCache; env.localModelPath = "/"; // ONNX runtime - we set up the wasm runtime we got from RS for the ONNX backend to pick debug("Setting up ONNX backend"); env.backends.onnx.wasm.wasmPaths = {}; env.backends.onnx.wasm.wasmPaths[config.runtimeFilename] = lazy.arrayBufferToBlobURL(config.runtime); if (config.modelClass && config.modelId) { debug(`Loading model ${config.modelId} with class ${config.modelClass}`); this.#model = config.modelClass.from_pretrained(config.modelId); } if (config.tokenizerClass && config.tokenizerId) { debug( `Loading tokenizer ${config.tokenizerId} with class ${config.tokenizerClass}` ); this.#tokenizer = config.tokenizerClass.from_pretrained( config.tokenizerId ); } if (config.processorClass && config.processorId) { debug( `Loading processor ${config.processorId} with class ${config.processorClass}` ); this.#processor = config.processorClass.from_pretrained( config.processorId ); } this.#taskName = config.taskName; this.#pipelineFunction = config.pipelineFunction.bind(this); this.#initTime = Date.now() - start; debug("Pipeline initialized, took ", this.#initTime); } /** * Initializes the pipeline with given options. * * @static * @async * @param {object} modelCache - Implements the Cache interface and used to get models * @param {ArrayBuffer} runtime - The runtime wasm file. * @param {PipelineOptions} options - The options for initialization. * @returns {Promise} The initialized pipeline instance. */ static async initialize(modelCache, runtime, options) { const taskName = options.taskName; debug(`Initializing Pipeline for task ${taskName}`); if (!ENGINE_CONFIGURATION[taskName]) { throw new Error(`Task ${taskName} is not supported`); } // Loading the config defaults for the task let config = { ...ENGINE_CONFIGURATION[taskName] }; config.runtime = runtime; // Overriding the defaults with the options options.applyToConfig(config); if (!config.pipelineFunction) { throw new Error("pipelineFunction is required for the pipeline"); } return new Pipeline(modelCache, config); } /** * Runs the pipeline with the given request. * * @async * @param {T} request - The request object to be processed. The fields it may contain * depends on the task. See each pipeline function for more details. * @returns {Promise} The result object from the pipeline execution. */ async run(request) { debug("Running task: ", this.#taskName); // Calling all promises to ensure they are resolved before running the first pipeline if (!this.#isReady) { let start = Date.now(); debug("Initializing model, tokenizer and processor"); // deactive console.warn, see https://bugzilla.mozilla.org/show_bug.cgi?id=1891003 const originalWarn = console.warn; console.warn = () => {}; try { this.#model = await this.#model; this.#tokenizer = await this.#tokenizer; this.#processor = await this.#processor; this.#isReady = true; } catch (error) { debug("Error initializing pipeline", error); throw error; } finally { console.warn = originalWarn; } this.#initTime += Date.now() - start; debug("Pipeline is fully initialized, took ", this.#initTime); } let result = await this.#pipelineFunction( request, this.#model, this.#tokenizer, this.#processor ); result.metrics.initTime = this.#initTime; return result; } }