summaryrefslogtreecommitdiffstats
path: root/src/python/orcus/tools/bugzilla.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/python/orcus/tools/bugzilla.py')
-rw-r--r--src/python/orcus/tools/bugzilla.py219
1 files changed, 219 insertions, 0 deletions
diff --git a/src/python/orcus/tools/bugzilla.py b/src/python/orcus/tools/bugzilla.py
new file mode 100644
index 0000000..f23b4d5
--- /dev/null
+++ b/src/python/orcus/tools/bugzilla.py
@@ -0,0 +1,219 @@
+########################################################################
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+########################################################################
+
+import argparse
+import requests
+import json
+import os
+import base64
+import traceback
+import concurrent.futures as cf
+from pathlib import Path
+from urllib.parse import urlparse
+
+
+class BugzillaAccess:
+ """Encapsulates access to a bugzilla server by using its REST API.
+
+ Args:
+ bzurl (str): URL to the bugzilla server.
+ cache_dir (:obj:`pathlib.Path`): path to the cache directory.
+ """
+
+ def __init__(self, bzurl, cache_dir):
+ self._bzurl = bzurl
+ self._cache_dir = cache_dir
+ os.makedirs(self._cache_dir, exist_ok=True)
+
+ def _get_cache_content(self, cache_file, func_fetch):
+ if os.path.isfile(cache_file):
+ with open(cache_file, 'r') as f:
+ return f.read()
+
+ s = func_fetch()
+ with open(cache_file, 'w') as f:
+ f.write(s)
+
+ return s
+
+ def get_bug_ids(self, bz_params):
+ """Get all bug ID's for specified bugzilla query parameters.
+
+ Args:
+ bz_params (dict):
+ dictionary containing all search parameters. Each search term
+ must form a single key-value pair.
+
+ Returns (:obj:`list` of :obj:`str`):
+ list of bug ID strings.
+ """
+
+ def _fetch():
+ r = requests.get(
+ f"{self._bzurl}/rest/bug",
+ params=bz_params
+ )
+
+ if r.status_code != 200:
+ raise RuntimeError(f"failed to query bug ids from the TDF bugzilla! (status:{r.status_code})")
+ return r.text
+
+ escape_chars = " /"
+ buf = []
+ for key in bz_params.keys():
+ v = str(bz_params[key])
+ for c in escape_chars:
+ v = v.replace(c, '-')
+ buf.append(key)
+ buf.append(v)
+
+ cache_file = '-'.join(buf) + ".json"
+ cache_file = self._cache_dir / cache_file
+ s = self._get_cache_content(cache_file, _fetch)
+
+ content = json.loads(s)
+ bugs = content.get("bugs")
+ if not bugs:
+ return []
+
+ bug_ids = [bug.get("id") for bug in bugs]
+ bug_ids = [x for x in filter(None, bug_ids)]
+ return bug_ids
+
+ def get_attachments(self, bug_id):
+ """Fetch all attachments for specified bug."""
+
+ def _fetch():
+ r = requests.get(f"{self._bzurl}/rest/bug/{bug_id}/attachment")
+ if r.status_code != 200:
+ raise RuntimeError(
+ f"failed to fetch the attachments for bug {bug_id}! (status:{r.status_code})")
+ return r.text
+
+ cache_file = self._cache_dir / f"attachments-{bug_id}.json"
+ s = self._get_cache_content(cache_file, _fetch)
+ content = json.loads(s)
+ attachments = list()
+ for d in content["bugs"][str(bug_id)]:
+ data = d["data"]
+ if not data:
+ continue
+ bytes = base64.b64decode(data)
+ attachments.append({
+ "content_type": d["content_type"],
+ "filename": d["file_name"],
+ "data": bytes
+ })
+ return attachments
+
+
+def parse_query_params(queries):
+ bz_params = dict()
+ for query in queries:
+ k, v = query.split('=')
+ if v and v[0] in ('"', "'"):
+ if v[0] != v[-1]:
+ raise argparse.ArgumentError(f"mis-matched quotes in {query}")
+ v = v[1:-1]
+ bz_params[k] = v
+ return bz_params
+
+
+def _create_argparser():
+ parser = argparse.ArgumentParser(
+ description="""This command allows you to download attachments from a
+bugzilla server that supports REST API.""")
+ parser.add_argument(
+ "--outdir", "-o", type=str, required=True,
+ help="""output directory for downloaded files. Downloaded files are
+grouped by their respective bug ID's.""")
+ parser.add_argument(
+ "--limit", type=int, default=50,
+ help="number of bugs to include in a single set of search results.")
+ parser.add_argument(
+ "--offset", type=int, default=0,
+ help="number of bugs to skip in the search results.")
+ parser.add_argument(
+ "--cont", action="store_true", default=False,
+ help="""when specified, the search continues after the initial batch
+is returned, by retrieving the next batch of results until the entire search
+results are returned. The number specified by the ``--limit`` option is used
+as the batch size.""")
+ parser.add_argument(
+ "--worker", type=int, default=8,
+ help="number of worker threads to use for parallel downloads of files.")
+ parser.add_argument(
+ "--cache-dir", type=Path, default=Path(".bugzilla"),
+ help="""directory to keep downloaded bugzilla search results. The
+command will not send the query request to the remote server when the results
+are cached. You may want to delete the cache directory after you are finished.""")
+ parser.add_argument(
+ "--url", type=str, required=True,
+ help="""base URL for bugzilla service. It must begin with the
+``http(s)://`` prefix.""")
+ parser.add_argument(
+ "query", type=str, nargs='*',
+ help="""One or more query term to use to limit your search. Each query
+term must be in the form key=value. You need to quote the value string when the
+value string contains whitespace character i.e. key="value with space".""")
+ return parser
+
+
+def main():
+ parser = _create_argparser()
+ args = parser.parse_args()
+
+ bz_params = parse_query_params(args.query)
+
+ for k, v in bz_params.items():
+ print(f"{k}: {v}")
+
+ bz_params["limit"] = args.limit
+ bz_params["offset"] = args.offset
+
+ url = urlparse(args.url)
+ cache_dir = Path(args.cache_dir) / url.netloc
+ bz = BugzillaAccess(args.url, cache_dir)
+
+ def _run(bug_id, index, totals):
+ """Top-level function for each worker thread."""
+ width = len(str(totals))
+ index_s = str(index+1)
+ index_s = ' ' * (width - len(index_s)) + index_s
+ print(f"({index_s}/{totals}) fetching attachments for bug {bug_id} ...", flush=True)
+
+ try:
+ attachments = bz.get_attachments(bug_id)
+ for attachment in attachments:
+ filepath = Path(args.outdir) / url.netloc / str(bug_id) / attachment["filename"]
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
+ with open(filepath, "wb") as f:
+ f.write(attachment["data"])
+ except Exception as e:
+ traceback.print_exc()
+ print(e)
+
+ iter_count = 0
+ while True:
+ bug_ids = bz.get_bug_ids(bz_params)
+ if not bug_ids:
+ return
+ print(f"-- iteration {iter_count+1}", flush=True)
+ with cf.ThreadPoolExecutor(max_workers=args.worker) as executor:
+ for i, bug_id in enumerate(bug_ids):
+ executor.submit(_run, bug_id, i, len(bug_ids))
+
+ if not args.cont:
+ return
+
+ bz_params["offset"] += bz_params["limit"]
+ iter_count += 1
+
+
+if __name__ == "__main__":
+ main()