summaryrefslogtreecommitdiffstats
path: root/toolkit/components/translations/bergamot-translator/build-bergamot.py
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/components/translations/bergamot-translator/build-bergamot.py')
-rwxr-xr-xtoolkit/components/translations/bergamot-translator/build-bergamot.py271
1 files changed, 271 insertions, 0 deletions
diff --git a/toolkit/components/translations/bergamot-translator/build-bergamot.py b/toolkit/components/translations/bergamot-translator/build-bergamot.py
new file mode 100755
index 0000000000..6c55237f22
--- /dev/null
+++ b/toolkit/components/translations/bergamot-translator/build-bergamot.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python3
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Builds the Bergamot translations engine for integration with Firefox.
+
+If you wish to test the Bergamot engine locally, then uncomment the .wasm line in
+the toolkit/components/translations/jar.mn after building the file. Just make sure
+not to check the code change in.
+"""
+
+import argparse
+import multiprocessing
+import os
+import shutil
+import subprocess
+from collections import namedtuple
+
+import yaml
+
+DIR_PATH = os.path.realpath(os.path.dirname(__file__))
+THIRD_PARTY_PATH = os.path.join(DIR_PATH, "thirdparty")
+MOZ_YAML_PATH = os.path.join(DIR_PATH, "moz.yaml")
+PATCHES_PATH = os.path.join(DIR_PATH, "patches")
+BERGAMOT_PATH = os.path.join(THIRD_PARTY_PATH, "bergamot-translator")
+MARIAN_PATH = os.path.join(BERGAMOT_PATH, "3rd_party/marian-dev")
+GEMM_SCRIPT = os.path.join(BERGAMOT_PATH, "wasm/patch-artifacts-import-gemm-module.sh")
+BUILD_PATH = os.path.join(THIRD_PARTY_PATH, "build-wasm")
+EMSDK_PATH = os.path.join(THIRD_PARTY_PATH, "emsdk")
+EMSDK_ENV_PATH = os.path.join(EMSDK_PATH, "emsdk_env.sh")
+WASM_PATH = os.path.join(BUILD_PATH, "bergamot-translator-worker.wasm")
+JS_PATH = os.path.join(BUILD_PATH, "bergamot-translator-worker.js")
+FINAL_JS_PATH = os.path.join(DIR_PATH, "bergamot-translator.js")
+ROOT_PATH = os.path.join(DIR_PATH, "../../../..")
+
+# 3.1.47 had an error compiling sentencepiece.
+EMSDK_VERSION = "3.1.8"
+EMSDK_REVISION = "2346baa7bb44a4a0571cc75f1986ab9aaa35aa03"
+
+patches = [
+ (BERGAMOT_PATH, os.path.join(PATCHES_PATH, "allocation-bergamot.patch")),
+ (MARIAN_PATH, os.path.join(PATCHES_PATH, "allocation-marian.patch")),
+]
+
+parser = argparse.ArgumentParser(
+ description=__doc__,
+ # Preserves whitespace in the help text.
+ formatter_class=argparse.RawTextHelpFormatter,
+)
+parser.add_argument(
+ "--clobber", action="store_true", help="Clobber the build artifacts"
+)
+parser.add_argument(
+ "--debug",
+ action="store_true",
+ help="Build with debug symbols, useful for profiling",
+)
+
+ArgNamespace = namedtuple("ArgNamespace", ["clobber", "debug"])
+
+
+def git_clone_update(name: str, repo_path: str, repo_url: str, revision: str):
+ if not os.path.exists(repo_path):
+ print(f"\nā¬‡ļø Clone the {name} repo into {repo_path}\n")
+ subprocess.check_call(
+ ["git", "clone", repo_url],
+ cwd=THIRD_PARTY_PATH,
+ )
+
+ local_head = subprocess.check_output(
+ ["git", "rev-parse", "HEAD"],
+ cwd=repo_path,
+ text=True,
+ ).strip()
+
+ def run(command):
+ return subprocess.check_call(command, cwd=repo_path)
+
+ if local_head != revision:
+ print(f"The head ({local_head}) and revision ({revision}) don't match.")
+ print(f"\nšŸ”Ž Fetching the latest from {name}.\n")
+ run(["git", "fetch", "--recurse-submodules"])
+
+ print(f"šŸ›’ Checking out the revision {revision}")
+ run(["git", "checkout", revision])
+ run(["git", "submodule", "update", "--init", "--recursive"])
+
+
+def install_and_activate_emscripten(args: ArgNamespace):
+ git_clone_update(
+ name="emsdk",
+ repo_path=EMSDK_PATH,
+ repo_url="https://github.com/emscripten-core/emsdk.git",
+ revision=EMSDK_REVISION,
+ )
+
+ # Run these commands in the shell so that the configuration is saved.
+ def run_shell(command):
+ return subprocess.run(command, cwd=EMSDK_PATH, shell=True, check=True)
+
+ print(f"\nšŸ› ļø Installing EMSDK version {EMSDK_VERSION}\n")
+ run_shell("./emsdk install " + EMSDK_VERSION)
+
+ print("\nšŸ› ļø Activating emsdk\n")
+ run_shell("./emsdk activate " + EMSDK_VERSION)
+
+
+def install_bergamot():
+ with open(MOZ_YAML_PATH, "r", encoding="utf8") as file:
+ text = file.read()
+
+ moz_yaml = yaml.safe_load(text)
+
+ git_clone_update(
+ name="bergamot",
+ repo_path=BERGAMOT_PATH,
+ repo_url=moz_yaml["origin"]["url"],
+ revision=moz_yaml["origin"]["revision"],
+ )
+
+
+def to_human_readable(size):
+ """Convert sizes to human-readable format"""
+ size_in_mb = size / 1048576
+ return f"{size_in_mb:.2f}M ({size} bytes)"
+
+
+def apply_git_patch(repo_path, patch_path):
+ print(f"Applying patch {patch_path} to {os.path.basename(repo_path)}")
+ subprocess.check_call(["git", "apply", "--reject", patch_path], cwd=repo_path)
+
+
+def revert_git_patch(repo_path, patch_path):
+ print(f"Reverting patch {patch_path} from {os.path.basename(repo_path)}")
+ subprocess.check_call(["git", "apply", "-R", "--reject", patch_path], cwd=repo_path)
+
+
+def build_bergamot(args: ArgNamespace):
+ if args.clobber and os.path.exists(BUILD_PATH):
+ shutil.rmtree(BUILD_PATH)
+
+ if not os.path.exists(BUILD_PATH):
+ os.mkdir(BUILD_PATH)
+
+ print("\n šŸ–Œļø Applying source code patches\n")
+ for repo_path, patch_path in patches:
+ apply_git_patch(repo_path, patch_path)
+
+ # These commands require the emsdk environment variables to be set up.
+ def run_shell(command):
+ if '"' in command or "'" in command:
+ raise Exception("This run_shell utility does not support quotes.")
+
+ return subprocess.run(
+ # "source" is not available in all shells so explicitly
+ f"bash -c 'source {EMSDK_ENV_PATH} && {command}'",
+ cwd=BUILD_PATH,
+ shell=True,
+ check=True,
+ )
+
+ try:
+ flags = ""
+ if args.debug:
+ flags = "-DCMAKE_BUILD_TYPE=RelWithDebInfo"
+
+ print("\n šŸƒ Running CMake for Bergamot\n")
+ run_shell(
+ f"emcmake cmake -DCOMPILE_WASM=on -DWORMHOLE=off {flags} {BERGAMOT_PATH}"
+ )
+
+ print("\n šŸƒ Building Bergamot with emmake\n")
+ run_shell(f"emmake make -j {multiprocessing.cpu_count()}")
+
+ print("\n šŸŖš Patching Bergamot for gemm support\n")
+ subprocess.check_call(["bash", GEMM_SCRIPT, BUILD_PATH])
+
+ print("\nāœ… Build complete\n")
+ print(" " + JS_PATH)
+ print(" " + WASM_PATH)
+
+ # Get the sizes of the build artifacts.
+ wasm_size = os.path.getsize(WASM_PATH)
+ gzip_size = int(
+ subprocess.run(
+ f"gzip -c {WASM_PATH} | wc -c",
+ check=True,
+ shell=True,
+ capture_output=True,
+ ).stdout.strip()
+ )
+ print(f" Uncompressed wasm size: {to_human_readable(wasm_size)}")
+ print(f" Compressed wasm size: {to_human_readable(gzip_size)}")
+ finally:
+ print("\nšŸ–Œļø Reverting the source code patches\n")
+ for repo_path, patch_path in patches[::-1]:
+ revert_git_patch(repo_path, patch_path)
+
+
+def write_final_bergamot_js_file():
+ """
+ The generated JS file requires some light patching for integration.
+ """
+
+ source = "\n".join(
+ [
+ "/* This Source Code Form is subject to the terms of the Mozilla Public",
+ " * License, v. 2.0. If a copy of the MPL was not distributed with this",
+ " * file, You can obtain one at http://mozilla.org/MPL/2.0/. */",
+ "",
+ "function loadBergamot(Module) {",
+ "",
+ ]
+ )
+
+ with open(JS_PATH, "r", encoding="utf8") as file:
+ for line in file.readlines():
+ source += " " + line
+
+ source += " return Module;\n}"
+
+ # Use the Module's printing.
+ source = source.replace("console.log(", "Module.print(")
+
+ # Add some instrumentation to the module's memory size.
+ source = source.replace(
+ "function updateGlobalBufferAndViews(buf) {",
+ """
+ function updateGlobalBufferAndViews(buf) {
+ const mb = (buf.byteLength / 1_000_000).toFixed();
+ Module.print(
+ `Growing wasm buffer to ${mb}MB (${buf.byteLength} bytes).`
+ );
+ """,
+ )
+
+ print("\n Formatting the final bergamot file")
+ # Create the file outside of this directory so it's not ignored by eslint.
+ temp_path = os.path.join(DIR_PATH, "../temp-bergamot.js")
+ with open(temp_path, "w", encoding="utf8") as file:
+ file.write(source)
+
+ subprocess.run(
+ f"./mach eslint --fix {temp_path}",
+ cwd=ROOT_PATH,
+ check=True,
+ shell=True,
+ capture_output=True,
+ )
+
+ print(f"\n Writing out final bergamot file: {FINAL_JS_PATH}")
+ shutil.move(temp_path, FINAL_JS_PATH)
+
+
+def main():
+ args: ArgNamespace = parser.parse_args()
+
+ if not os.path.exists(THIRD_PARTY_PATH):
+ os.mkdir(THIRD_PARTY_PATH)
+
+ install_and_activate_emscripten(args)
+ install_bergamot()
+ build_bergamot(args)
+ write_final_bergamot_js_file()
+
+
+if __name__ == "__main__":
+ main()