import functools
import itertools
import os
import re
import subprocess
from typing import Iterable, FrozenSet, Tuple, Optional, List

from debian.debian_support import Release
from lsprotocol.types import Diagnostic, Range, Position, DiagnosticSeverity

from debputy.lsp.quickfixes import propose_correct_text_quick_fix
from debputy.lsp.text_util import LintCapablePositionCodec
from debputy.util import _info, _warn

_SPELL_CHECKER_DICT = "/usr/share/hunspell/en_US.dic"
_SPELL_CHECKER_AFF = "/usr/share/hunspell/en_US.aff"
_WORD_PARTS = re.compile(r"(\S+)")
_PRUNE_SYMBOLS_RE = re.compile(r"(\w+(?:-\w+|'\w+)?)")
_FIND_QUOTE_CHAR = re.compile(r'["`]')
_LOOKS_LIKE_FILENAME = re.compile(
    r"""
      [.]{0,3}/[a-z0-9]+(/[a-z0-9]+)+/*
    | [a-z0-9-_]+(/[a-z0-9]+)+/*
    | [a-z0-9_]+(/[a-z0-9_]+){2,}/*
    | (?:\S+)?[.][a-z]{1,3}

""",
    re.VERBOSE,
)
_LOOKS_LIKE_PROGRAMMING_TERM = re.compile(
    r"""
    (
        # Java identifier Camel Case
          [a-z][a-z0-9]*(?:[A-Z]{1,3}[a-z0-9]+)+
        # Type name Camel Case
        | [A-Z]{1,3}[a-z0-9]+(?:[A-Z]{1,3}[a-z0-9]+)+
        # Type name Camel Case with underscore (seen in Dh_Lib.pm among other
        | [A-Z]{1,3}[a-z0-9]+(?:_[A-Z]{1,3}[a-z0-9]+)+
        # Perl module
        | [A-Z]{1,3}[a-z0-9]+(?:_[A-Z]{1,3}[a-z0-9]+)*(::[A-Z]{1,3}[a-z0-9]+(?:_[A-Z]{1,3}[a-z0-9]+)*)+
        # Probably an abbreviation
        | [A-Z]{3,}
        # Perl/Python identifiers or Jinja templates
        | [$%&@_]?[{]?[{]?[a-z][a-z0-9]*(?:_[a-z0-9]+)+(?:(?:->)?[\[{]\S+|}}?)?
        # SCREAMING_SNAKE_CASE (environment variables plus -DVAR=B or $FOO)
        | [-$%&*_]{0,2}[A-Z][A-Z0-9]*(_[A-Z0-9]+)+(?:=\S+)?
        | \#[A-Z][A-Z0-9]*(_[A-Z0-9]+)+\#
        # Subcommand names. Require at least two "-" to avoid skipping hyphenated words
        | [a-z][a-z0-9]*(-[a-z0-9]+){2,}
        # Short args
        | -[a-z0-9]+
        # Things like 32bit
        | \d{2,}-?[a-z]+
        # Source package (we do not have a package without prefix/suffix because it covers 95% of all lowercase words)
        | src:[a-z0-9][-+.a-z0-9]+
        | [a-z0-9][-+.a-z0-9]+:(?:any|native)
        # Version
        | v\d+(?:[.]\S+)?
        # chmod symbolic mode or math
        | \S*=\S+
    )
""",
    re.VERBOSE,
)
_LOOKS_LIKE_EMAIL = re.compile(
    r"""
    <[^>@\s]+@[^>@\s]+>
""",
    re.VERBOSE,
)
_NO_CORRECTIONS = tuple()
_WORDLISTS = [
    "debian-wordlist.dic",
]
_NAMELISTS = [
    "logins-and-people.dic",
]
_PERSONAL_DICTS = [
    "${HOME}/.hunspell_default",
    "${HOME}/.hunspell_en_US",
]


try:
    if not os.path.lexists(_SPELL_CHECKER_DICT) or not os.path.lexists(
        _SPELL_CHECKER_AFF
    ):
        raise ImportError
    from hunspell import HunSpell

    _HAS_HUNSPELL = True
except ImportError:
    _HAS_HUNSPELL = False


def _read_wordlist(
    base_dir: str, wordlist_name: str, *, namelist: bool = False
) -> Iterable[str]:
    with open(os.path.join(base_dir, wordlist_name)) as fd:
        w = [w.strip() for w in fd]
        yield from w
        if namelist:
            yield from (f"{n}'s" for n in w)


def _all_debian_archs() -> Iterable[str]:
    try:
        output = subprocess.check_output(["dpkg-architecture", "-L"])
    except (FileNotFoundError, subprocess.CalledProcessError) as e:
        _warn(f"dpkg-architecture -L failed: {e}")
        return tuple()

    return (x.strip() for x in output.decode("utf-8").splitlines())


@functools.lru_cache
def _builtin_exception_words() -> FrozenSet[str]:
    basedirs = os.path.dirname(__file__)
    release_names = (x for x in Release.releases)
    return frozenset(
        itertools.chain(
            itertools.chain.from_iterable(
                _read_wordlist(basedirs, wl) for wl in _WORDLISTS
            ),
            itertools.chain.from_iterable(
                _read_wordlist(basedirs, wl, namelist=True) for wl in _NAMELISTS
            ),
            release_names,
            _all_debian_archs(),
        )
    )


_DEFAULT_SPELL_CHECKER: Optional["Spellchecker"] = None


def spellcheck_line(
    lines: List[str],
    position_codec: LintCapablePositionCodec,
    line_no: int,
    line: str,
) -> Iterable[Diagnostic]:
    spell_checker = default_spellchecker()
    for word, pos, endpos in spell_checker.iter_words(line):
        corrections = spell_checker.provide_corrections_for(word)
        if not corrections:
            continue
        word_range_server_units = Range(
            Position(line_no, pos),
            Position(line_no, endpos),
        )
        word_range = position_codec.range_to_client_units(
            lines,
            word_range_server_units,
        )
        yield Diagnostic(
            word_range,
            f'Spelling "{word}"',
            severity=DiagnosticSeverity.Hint,
            source="debputy",
            data=[propose_correct_text_quick_fix(c) for c in corrections],
        )


def default_spellchecker() -> "Spellchecker":
    global _DEFAULT_SPELL_CHECKER
    spellchecker = _DEFAULT_SPELL_CHECKER
    if spellchecker is None:
        if _HAS_HUNSPELL:
            spellchecker = HunspellSpellchecker()
        else:
            spellchecker = _do_nothing_spellchecker()
        _DEFAULT_SPELL_CHECKER = spellchecker
    return spellchecker


@functools.lru_cache()
def _do_nothing_spellchecker() -> "Spellchecker":
    return EverythingIsCorrectSpellchecker()


def disable_spellchecking() -> None:
    global _DEFAULT_SPELL_CHECKER
    _DEFAULT_SPELL_CHECKER = _do_nothing_spellchecker()


def _skip_quoted_parts(line: str) -> Iterable[Tuple[str, int]]:
    current_pos = 0
    while True:
        try:
            m = _FIND_QUOTE_CHAR.search(line, current_pos)
            if m is None:
                if current_pos == 0:
                    yield line, 0
                else:
                    yield line[current_pos:], current_pos
                return
            starting_marker_pos = m.span()[0]
            quote_char = m.group()
            end_marker_pos = line.index(quote_char, starting_marker_pos + 1)
        except ValueError:
            yield line[current_pos:], current_pos
            return

        part = line[current_pos:starting_marker_pos]

        if not part.isspace():
            yield part, current_pos
        current_pos = end_marker_pos + 1


def _split_line_to_words(line: str) -> Iterable[Tuple[str, int, int]]:
    for line_part, part_pos in _skip_quoted_parts(line):
        for m in _WORD_PARTS.finditer(line_part):
            fullword = m.group(1)
            if fullword.startswith("--"):
                # CLI arg
                continue
            if _LOOKS_LIKE_PROGRAMMING_TERM.match(fullword):
                continue
            if _LOOKS_LIKE_FILENAME.match(fullword):
                continue
            if _LOOKS_LIKE_EMAIL.match(fullword):
                continue
            mpos = m.span(1)[0]
            for sm in _PRUNE_SYMBOLS_RE.finditer(fullword):
                pos, endpos = sm.span(1)
                offset = part_pos + mpos
                yield sm.group(1), pos + offset, endpos + offset


class Spellchecker:

    @staticmethod
    def do_nothing_spellchecker() -> "Spellchecker":
        return EverythingIsCorrectSpellchecker()

    def iter_words(self, line: str) -> Iterable[Tuple[str, int, int]]:
        yield from _split_line_to_words(line)

    def provide_corrections_for(self, word: str) -> Iterable[str]:
        raise NotImplementedError

    def ignore_word(self, word: str) -> None:
        raise NotImplementedError


class EverythingIsCorrectSpellchecker(Spellchecker):
    def provide_corrections_for(self, word: str) -> Iterable[str]:
        return _NO_CORRECTIONS

    def ignore_word(self, word: str) -> None:
        # It is hard to ignore words, when you never check them in the fist place.
        pass


class HunspellSpellchecker(Spellchecker):

    def __init__(self) -> None:
        self._checker = HunSpell(_SPELL_CHECKER_DICT, _SPELL_CHECKER_AFF)
        for w in _builtin_exception_words():
            self._checker.add(w)
        self._load_personal_exclusions()

    def provide_corrections_for(self, word: str) -> Iterable[str]:
        if word.startswith(
            (
                "dpkg-",
                "dh-",
                "dh_",
                "debian-",
                "debconf-",
                "update-",
                "DEB_",
                "DPKG_",
            )
        ):
            return _NO_CORRECTIONS
        # 'ing is deliberately forcing a word into another word-class
        if word.endswith(("'ing", "-nss")):
            return _NO_CORRECTIONS
        return self._lookup(word)

    @functools.lru_cache(128)
    def _lookup(self, word: str) -> Iterable[str]:
        if self._checker.spell(word):
            return _NO_CORRECTIONS
        return self._checker.suggest(word)

    def ignore_word(self, word: str) -> None:
        self._checker.add(word)

    def _load_personal_exclusions(self) -> None:
        for filename in _PERSONAL_DICTS:
            if filename.startswith("${"):
                end_index = filename.index("}")
                varname = filename[2:end_index]
                value = os.environ.get(varname)
                if value is None:
                    continue
                filename = value + filename[end_index + 1 :]
            if os.path.isfile(filename):
                _info(f"Loading personal spelling dictionary from {filename}")
                self._checker.add_dic(filename)