summaryrefslogtreecommitdiffstats
path: root/taskcluster/scripts/misc/fetch-talos-pdfs.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rwxr-xr-xtaskcluster/scripts/misc/fetch-talos-pdfs.py118
1 files changed, 118 insertions, 0 deletions
diff --git a/taskcluster/scripts/misc/fetch-talos-pdfs.py b/taskcluster/scripts/misc/fetch-talos-pdfs.py
new file mode 100755
index 0000000000..059af062e7
--- /dev/null
+++ b/taskcluster/scripts/misc/fetch-talos-pdfs.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+This script downloads all the required PDFs from the test_manifest.json
+file found in the mozilla pdf.js repo.
+"""
+
+import json
+import os
+import pathlib
+import shutil
+
+import requests
+from redo import retriable
+
+
+def log(msg):
+ print("fetch-talos-pdf: %s" % msg)
+
+
+@retriable(attempts=7, sleeptime=5, sleepscale=2)
+def fetch_file(url, filepath):
+ """Download a file from the given url to a given file.
+
+ :param str url: URL to download file from.
+ :param Path filepath: Location to ouput the downloaded file
+ (includes the name of the file).
+ """
+ size = 4096
+ r = requests.get(url, stream=True)
+ r.raise_for_status()
+
+ with filepath.open("wb") as fd:
+ for chunk in r.iter_content(size):
+ fd.write(chunk)
+
+
+def fetch_talos_pdf_link(pdf_path, output_file):
+ """Fetches a PDF file with a link into the output file location.
+
+ :param Path pdf_path: Path to a PDF file that contains a URL to download from.
+ :param Path output_file: Location (including the file name) to download PDF to.
+ """
+ pdf_link = pdf_path.read_text().strip()
+ log(f"Downloading from PDF link: {pdf_link}")
+ fetch_file(pdf_link, output_file)
+
+
+def gather_talos_pdf(test_folder, pdf_info, output_dir):
+ """Gathers a PDF file into the output directory.
+
+ :param Path test_folder: The test folder that the pdfs can be found in.
+ :param Path pdf_info: Information about the pdf we're currently gathering, and
+ found in the test/test_manifest.json file from the pdf.js repo.
+ :param Path output_dir: The directory to move/download the PDF to.
+ """
+ pdf_file = pdf_info["file"]
+ output_pdf_path = pathlib.Path(output_dir, pathlib.Path(pdf_file).name)
+
+ log(f"Gathering PDF {pdf_file}...")
+ if output_pdf_path.exists():
+ log(f"{pdf_file} already exists in output location")
+ elif pdf_info.get("link", False):
+ fetch_talos_pdf_link(
+ pathlib.Path(test_folder, pdf_file + ".link"), output_pdf_path
+ )
+ else:
+ log(f"Copying PDF to output location {output_pdf_path}")
+ shutil.copy(pathlib.Path(test_folder, pdf_file), output_pdf_path)
+
+
+def gather_talos_pdfs(pdf_js_repo, output_dir):
+ """Gather all pdfs to be used in the talos pdfpaint test.
+
+ Uses the pdf.js repo to gather the files from it's test/test_manifest.json
+ file. Some of these are also links that need to be downloaded. These
+ are output in an output directory.
+
+ :param Path pdf_js_repo: Path to the Mozilla Github pdf.js repo.
+ :param Path output_dir: Output directory for the PDFs.
+ """
+ test_manifest_path = pathlib.Path(
+ pdf_js_repo, "test", "test_manifest.json"
+ ).resolve()
+ test_folder = test_manifest_path.parent
+
+ # Gather all the PDFs into the output directory
+ test_manifest = json.loads(test_manifest_path.read_text())
+ for pdf_info in test_manifest:
+ gather_talos_pdf(test_folder, pdf_info, output_dir)
+
+ # Include the test manifest in the output directory as it
+ # contains the names of the tests
+ shutil.copy(test_manifest_path, pathlib.Path(output_dir, test_manifest_path.name))
+
+
+if __name__ == "__main__":
+ moz_fetches_dir = os.environ.get("MOZ_FETCHES_DIR", "")
+ if not moz_fetches_dir:
+ raise Exception(
+ "MOZ_FETCHES_DIR is not set to the path containing the pdf.js repo"
+ )
+
+ pdf_js_repo = pathlib.Path(moz_fetches_dir, "pdf.js")
+ if not pdf_js_repo.exists():
+ raise Exception("Can't find the pdf.js repository in MOZ_FETCHES_DIR")
+
+ output_dir = os.environ.get("OUTPUT_DIR", "")
+ if not output_dir:
+ raise Exception("OUTPUT_DIR is not set for the file output")
+
+ output_dir_path = pathlib.Path(output_dir)
+ output_dir_path.mkdir(parents=True, exist_ok=True)
+ gather_talos_pdfs(pdf_js_repo, output_dir_path)