# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from collections import defaultdict
from pathlib import Path

from mozperftest.metrics.exceptions import (
    MetricsMissingResultsError,
    MetricsMultipleTransformsError,
)
from mozperftest.metrics.notebook import PerftestETL
from mozperftest.metrics.utils import metric_fields, validate_intermediate_results

COMMON_ARGS = {
    "metrics": {
        "type": metric_fields,
        "nargs": "*",
        "default": [],
        "help": "The metrics that should be retrieved from the data.",
    },
    "prefix": {"type": str, "default": "", "help": "Prefix used by the output files."},
    "split-by": {
        "type": str,
        "default": None,
        "help": "A metric name to use for splitting the data. For instance, "
        "using browserScripts.pageinfo.url will split the data by the unique "
        "URLs that are found.",
    },
    "simplify-names": {
        "action": "store_true",
        "default": False,
        "help": "If set, metric names will be simplified to a single word. The PerftestETL "
        "combines dictionary keys by `.`, and the final key contains that value of the data. "
        "That final key becomes the new name of the metric.",
    },
    "simplify-exclude": {
        "nargs": "*",
        "default": ["statistics"],
        "help": "When renaming/simplifying metric names, entries with these strings "
        "will be ignored and won't get simplified. These options are only used when "
        "--simplify-names is set.",
    },
    "transformer": {
        "type": str,
        "default": None,
        "help": "The path to the file containing the custom transformer, "
        "or the module to import along with the class name, "
        "e.g. mozperftest.test.xpcshell:XpcShellTransformer",
    },
}


class MetricsStorage(object):
    """Holds data that is commonly used across all metrics layers.

    An instance of this class represents data for a given and output
    path and prefix.
    """

    def __init__(self, output_path, prefix, logger):
        self.prefix = prefix
        self.output_path = output_path
        self.stddata = {}
        self.ptnb_config = {}
        self.results = []
        self.logger = logger

        p = Path(output_path)
        p.mkdir(parents=True, exist_ok=True)

    def _parse_results(self, results):
        if isinstance(results, dict):
            return [results]
        res = []
        # XXX we need to embrace pathlib everywhere.
        if isinstance(results, (str, Path)):
            # Expecting a single path or a directory
            p = Path(results)
            if not p.exists():
                self.logger.warning("Given path does not exist: {}".format(results))
            elif p.is_dir():
                files = [f for f in p.glob("**/*.json") if not f.is_dir()]
                res.extend(self._parse_results(files))
            else:
                res.append(p.as_posix())
        if isinstance(results, list):
            # Expecting a list of paths
            for path in results:
                res.extend(self._parse_results(path))
        return res

    def set_results(self, results):
        """Processes and sets results provided by the metadata.

        `results` can be a path to a file or a directory. Every
        file is scanned and we build a list. Alternatively, it
        can be a mapping containing the results, in that case
        we just use it direcly, but keep it in a list.

        :param results list/dict/str: Path, or list of paths to the data
            (or the data itself in a dict) of the data to be processed.
        """
        # Parse the results into files (for now) and the settings
        self.results = defaultdict(lambda: defaultdict(list))
        self.settings = defaultdict(dict)
        for res in results:
            # Ensure that the results are valid before continuing
            validate_intermediate_results(res)

            name = res["name"]
            if isinstance(res["results"], dict):
                # XXX Implement subtest based parsing
                raise NotImplementedError(
                    "Subtest-based processing is not implemented yet"
                )

            # Merge all entries with the same name into one
            # result, if separation is needed use unique names
            self.results[name]["files"].extend(self._parse_results(res["results"]))

            suite_settings = self.settings[name]
            for key, val in res.items():
                if key == "results":
                    continue
                suite_settings[key] = val

            # Check the transform definitions
            currtrfm = self.results[name]["transformer"]
            if not currtrfm:
                self.results[name]["transformer"] = res.get(
                    "transformer", "SingleJsonRetriever"
                )
            elif currtrfm != res.get("transformer", "SingleJsonRetriever"):
                raise MetricsMultipleTransformsError(
                    f"Only one transformer allowed per data name! Found multiple for {name}: "
                    f"{[currtrfm, res['transformer']]}"
                )

            # Get the transform options if available
            self.results[name]["options"] = res.get("transformer-options", {})

        if not self.results:
            self.return_code = 1
            raise MetricsMissingResultsError("Could not find any results to process.")

    def get_standardized_data(self, group_name="firefox", transformer=None):
        """Returns a parsed, standardized results data set.

        The dataset is computed once then cached unless overwrite is used.
        The transformer dictates how the data will be parsed, by default it uses
        a JSON transformer that flattens the dictionary while merging all the
        common metrics together.

        :param group_name str: The name for this results group.
        :param transformer str: The name of the transformer to use
            when parsing the data. Currently, only SingleJsonRetriever
            is available.
        :param overwrite str: if True, we recompute the results
        :return dict: Standardized notebook data with containing the
            requested metrics.
        """
        if self.stddata:
            return self.stddata

        for data_type, data_info in self.results.items():
            tfm = transformer if transformer is not None else data_info["transformer"]
            prefix = data_type
            if self.prefix:
                prefix = "{}-{}".format(self.prefix, data_type)

            # Primarily used to store the transformer used on the data
            # so that it can also be used for generating things
            # like summary values for suites, and subtests.
            self.ptnb_config[data_type] = {
                "output": self.output_path,
                "prefix": prefix,
                "custom_transformer": tfm,
                "file_groups": {data_type: data_info["files"]},
            }

            ptnb = PerftestETL(
                file_groups=self.ptnb_config[data_type]["file_groups"],
                config=self.ptnb_config[data_type],
                prefix=self.prefix,
                logger=self.logger,
                custom_transform=tfm,
            )
            r = ptnb.process(**data_info["options"])
            self.stddata[data_type] = r["data"]

        return self.stddata

    def filtered_metrics(
        self,
        group_name="firefox",
        transformer=None,
        metrics=None,
        exclude=None,
        split_by=None,
        simplify_names=False,
        simplify_exclude=["statistics"],
    ):
        """Filters the metrics to only those that were requested by `metrics`.

        If metrics is Falsey (None, empty list, etc.) then no metrics
        will be filtered. The entries in metrics are pattern matched with
        the subtests in the standardized data (not a regular expression).
        For example, if "firstPaint" is in metrics, then all subtests which
        contain this string in their name will be kept.

        :param metrics list: List of metrics to keep.
        :param exclude list: List of string matchers to exclude from the metrics
            gathered/reported.
        :param split_by str: The name of a metric to use to split up data by.
        :param simplify_exclude list: List of string matchers to exclude
            from the naming simplification process.
        :return dict: Standardized notebook data containing the
            requested metrics.
        """
        results = self.get_standardized_data(
            group_name=group_name, transformer=transformer
        )
        if not metrics:
            return results
        if not exclude:
            exclude = []
        if not simplify_exclude:
            simplify_exclude = []

        # Get the field to split the results by (if any)
        if split_by is not None:
            splitting_entry = None
            for data_type, data_info in results.items():
                for res in data_info:
                    if split_by in res["subtest"]:
                        splitting_entry = res
                        break
            if splitting_entry is not None:
                split_by = defaultdict(list)
                for c, entry in enumerate(splitting_entry["data"]):
                    split_by[entry["value"]].append(c)

        # Filter metrics
        filtered = {}
        for data_type, data_info in results.items():
            newresults = []
            for res in data_info:
                if any([met["name"] in res["subtest"] for met in metrics]) and not any(
                    [met in res["subtest"] for met in exclude]
                ):
                    res["transformer"] = self.ptnb_config[data_type][
                        "custom_transformer"
                    ]
                    newresults.append(res)
            filtered[data_type] = newresults

        # Simplify the filtered metric names
        if simplify_names:

            def _simplify(name):
                if any([met in name for met in simplify_exclude]):
                    return None
                return name.split(".")[-1]

            self._alter_name(filtered, res, filter=_simplify)

        # Split the filtered results
        if split_by is not None:
            newfilt = {}
            total_iterations = sum([len(inds) for _, inds in split_by.items()])
            for data_type in filtered:
                if not filtered[data_type]:
                    # Ignore empty data types
                    continue

                newresults = []
                newfilt[data_type] = newresults
                for split, indices in split_by.items():
                    for res in filtered[data_type]:
                        if len(res["data"]) != total_iterations:
                            # Skip data that cannot be split
                            continue
                        splitres = {key: val for key, val in res.items()}
                        splitres["subtest"] += " " + split
                        splitres["data"] = [res["data"][i] for i in indices]
                        splitres["transformer"] = self.ptnb_config[data_type][
                            "custom_transformer"
                        ]

                        newresults.append(splitres)

            filtered = newfilt

        return filtered

    def _alter_name(self, filtered, res, filter):
        previous = []
        for data_type, data_info in filtered.items():
            for res in data_info:
                new = filter(res["subtest"])
                if new is None:
                    continue
                if new in previous:
                    self.logger.warning(
                        f"Another metric which ends with `{new}` was already found. "
                        f"{res['subtest']} will not be simplified."
                    )
                    continue
                res["subtest"] = new
                previous.append(new)


_metrics = {}


def filtered_metrics(
    metadata,
    path,
    prefix,
    group_name="firefox",
    transformer=None,
    metrics=None,
    settings=False,
    exclude=None,
    split_by=None,
    simplify_names=False,
    simplify_exclude=["statistics"],
):
    """Returns standardized data extracted from the metadata instance.

    We're caching an instance of MetricsStorage per metrics/storage
    combination and compute the data only once when this function is called.
    """
    key = path, prefix
    if key not in _metrics:
        storage = _metrics[key] = MetricsStorage(path, prefix, metadata)
        storage.set_results(metadata.get_results())
    else:
        storage = _metrics[key]

    results = storage.filtered_metrics(
        group_name=group_name,
        transformer=transformer,
        metrics=metrics,
        exclude=exclude,
        split_by=split_by,
        simplify_names=simplify_names,
        simplify_exclude=simplify_exclude,
    )

    # XXX returning two different types is a problem
    # in case settings is false, we should return None for it
    # and always return a 2-tuple
    if settings:
        return results, storage.settings
    return results