Adding upstream version 2.14.0+dfsg.upstream/2.14.0+dfsg upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 11:33:32 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 11:33:32 +0000
commit: 1f403ad2197fc7442409f434ee574f3e6b46fb73 (patch)
tree: 0299c6dd11d5edfa918a29b6456bc1875f1d288c /scripts
parent: Initial commit. (diff)
download: pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.tar.xz
pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.zip
15 files changed, 2491 insertions, 0 deletions
diff --git a/scripts/check_crlf.py b/scripts/check_crlf.py
new file mode 100644
index 0000000..c03b68d
--- /dev/null
+++ b/scripts/check_crlf.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+"""
+    Checker for line endings
+    ~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Make sure Python (.py) and Bash completion (.bashcomp) files do not
+    contain CR/LF newlines.
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import sys
+import os
+
+if __name__ == '__main__':
+    for directory in sys.argv[1:]:
+        if not os.path.exists(directory):
+            continue
+
+        for root, dirs, files in os.walk(directory):
+            for filename in files:
+                if not filename.endswith('.py') and not filename.endswith('.bashcomp'):
+                    continue
+
+                full_path = os.path.join(root, filename)
+                with open(full_path, 'rb') as f:
+                    if b'\r\n' in f.read():
+                        print('CR/LF found in', full_path)
+                        sys.exit(1)
+
+    sys.exit(0)
diff --git a/scripts/check_repeated_token.py b/scripts/check_repeated_token.py
new file mode 100755
index 0000000..1636281
--- /dev/null
+++ b/scripts/check_repeated_token.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+"""
+    Checker for repeated tokens
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Helper script to find suspicious lexers which produce the same token
+    repeatedly, i.e. for example:
+
+    .. code::
+
+      'd'           Text
+      'a'           Text
+      't'           Text
+      'a'           Text
+      'b'           Text
+      'a'           Text
+      's'           Text
+      'e'           Text
+
+    This script has two test modes: Check for tokens repeating more often than
+    a given threshold, and exclude anything but single-character tokens.
+    Repeated single-character tokens are quite problematic as they result in
+    bloated output and are usually an indication that someone is missing
+    a + or * in the regex.
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+import argparse
+import sys
+
+from utility import unpack_output_file, process_output_files
+
+
+def check_file(path, threshold, single_only):
+    current_token = ''
+    current_token_repeat_count = 1
+
+    for value, token, linenumber in unpack_output_file(path):
+        if single_only and len(value) > 1:
+            token = ''
+            current_token_repeat_count = 1
+            continue
+
+        if token != current_token:
+            current_token = token
+            current_token_repeat_count = 1
+        else:
+            current_token_repeat_count += 1
+
+        if current_token_repeat_count > threshold:
+            print(f'{path}:{linenumber}')
+            return False
+
+    return True
+
+
+def main(args):
+    def check_file_callback(path):
+        return check_file(path, args.threshold, args.single)
+
+    if process_output_files(args.TEST_ROOT, check_file_callback) > 0:
+        return 1
+    return 0
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('TEST_ROOT',
+                        help='Root directory containing the tests')
+    parser.add_argument('-t', '--threshold', type=int, default=5,
+                        help='Warn if a token repeats itself more often then '
+                             'this number.')
+    parser.add_argument('-s', '--single', action='store_true', default=False,
+                        help='Only look at tokens matching a single character')
+    args = parser.parse_args()
+    sys.exit(main(args))
diff --git a/scripts/check_sources.py b/scripts/check_sources.py
new file mode 100755
index 0000000..1feb1a3
--- /dev/null
+++ b/scripts/check_sources.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python
+"""
+    Checker for file headers
+    ~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Make sure each Python file has a correct file header
+    including copyright and license information.
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import io
+import os
+import re
+import sys
+import getopt
+from os.path import join, splitext, abspath
+
+
+checkers = {}
+
+
+def checker(*suffixes, **kwds):
+    only_pkg = kwds.pop('only_pkg', False)
+
+    def deco(func):
+        for suffix in suffixes:
+            checkers.setdefault(suffix, []).append(func)
+        func.only_pkg = only_pkg
+        return func
+    return deco
+
+
+name_mail_re = r'[\w ]+(<.*?>)?'
+copyright_re = re.compile(r'^    :copyright: Copyright 2006-2022 by '
+                          r'the Pygments team, see AUTHORS\.$')
+copyright_2_re = re.compile(r'^                %s(, %s)*[,.]$' %
+                            (name_mail_re, name_mail_re))
+is_const_re  = re.compile(r'if.*?==\s+(None|False|True)\b')
+
+misspellings = ["developement", "adress", "verificate",  # ALLOW-MISSPELLING
+                "informations", "unlexer"]               # ALLOW-MISSPELLING
+
+
+@checker('.py')
+def check_syntax(fn, lines):
+    if not lines:
+        yield 0, "empty file"
+        return
+    if '#!/' in lines[0]:
+        lines = lines[1:]
+    try:
+        compile('\n'.join(lines), fn, "exec")
+    except SyntaxError as err:
+        yield 0, "not compilable: %s" % err
+
+
+@checker('.py')
+def check_style_and_encoding(fn, lines):
+    for lno, line in enumerate(lines):
+        if is_const_re.search(line):
+            yield lno+1, 'using == None/True/False'
+
+
+@checker('.py', only_pkg=True)
+def check_fileheader(fn, lines):
+    # line number correction
+    c = 1
+    if lines[0:1] == ['#!/usr/bin/env python']:
+        lines = lines[1:]
+        c = 2
+
+    llist = []
+    docopen = False
+    for lno, line in enumerate(lines):
+        llist.append(line)
+        if lno == 0:
+            if line != '"""' and line != 'r"""':
+                yield 2, f'missing docstring begin ("""), found {line!r}'
+            else:
+                docopen = True
+        elif docopen:
+            if line == '"""':
+                # end of docstring
+                if lno <= 3:
+                    yield lno+c, "missing module name in docstring"
+                break
+
+            if line != "" and line[:4] != '    ' and docopen:
+                yield lno+c, "missing correct docstring indentation"
+
+            if lno == 1:
+                # if not in package, don't check the module name
+                modname = fn[:-3].replace('/', '.').replace('.__init__', '')
+                while modname:
+                    if line.lower()[4:] == modname:
+                        break
+                    modname = '.'.join(modname.split('.')[1:])
+                else:
+                    yield 3, "wrong module name in docstring heading"
+                modnamelen = len(line.strip())
+            elif lno == 2:
+                if line.strip() != modnamelen * "~":
+                    yield 4, "wrong module name underline, should be ~~~...~"
+
+    else:
+        yield 0, "missing end and/or start of docstring..."
+
+    # check for copyright and license fields
+    license = llist[-2:-1]
+    if license != ["    :license: BSD, see LICENSE for details."]:
+        yield 0, "no correct license info"
+
+    ci = -3
+    copyright = llist[ci:ci+1]
+    while copyright and copyright_2_re.match(copyright[0]):
+        ci -= 1
+        copyright = llist[ci:ci+1]
+    if not copyright or not copyright_re.match(copyright[0]):
+        yield 0, "no correct copyright info"
+
+
+def main(argv):
+    try:
+        gopts, args = getopt.getopt(argv[1:], "vi:")
+    except getopt.GetoptError:
+        print("Usage: %s [-v] [-i ignorepath]* [path]" % argv[0])
+        return 2
+    opts = {}
+    for opt, val in gopts:
+        if opt == '-i':
+            val = abspath(val)
+        opts.setdefault(opt, []).append(val)
+
+    if len(args) == 0:
+        path = '.'
+    elif len(args) == 1:
+        path = args[0]
+    else:
+        print("Usage: %s [-v] [-i ignorepath]* [path]" % argv[0])
+        return 2
+
+    verbose = '-v' in opts
+
+    num = 0
+    out = io.StringIO()
+
+    for root, dirs, files in os.walk(path):
+        for excl in ['.tox', '.git', 'examplefiles']:
+            if excl in dirs:
+                dirs.remove(excl)
+        if '-i' in opts and abspath(root) in opts['-i']:
+            del dirs[:]
+            continue
+        # XXX: awkward: for the Makefile call: don't check non-package
+        #      files for file headers
+        in_pygments_pkg = root.startswith('./pygments')
+        for fn in files:
+
+            fn = join(root, fn)
+            if fn[:2] == './':
+                fn = fn[2:]
+
+            if '-i' in opts and abspath(fn) in opts['-i']:
+                continue
+
+            ext = splitext(fn)[1]
+            checkerlist = checkers.get(ext, None)
+            if not checkerlist:
+                continue
+
+            if verbose:
+                print("Checking %s..." % fn)
+
+            try:
+                with open(fn, 'rb') as f:
+                    lines = f.read().decode('utf-8').splitlines()
+            except OSError as err:
+                print("%s: cannot open: %s" % (fn, err))
+                num += 1
+                continue
+
+            for checker in checkerlist:
+                if not in_pygments_pkg and checker.only_pkg:
+                    continue
+                for lno, msg in checker(fn, lines):
+                    print('%s:%d: %s' % (fn, lno, msg), file=out)
+                    num += 1
+    if verbose:
+        print()
+    if num == 0:
+        print("No errors found.")
+    else:
+        print(out.getvalue().rstrip('\n'))
+        print("%d error%s found." % (num, num > 1 and "s" or ""))
+    return int(num > 0)
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv))
diff --git a/scripts/check_whitespace_token.py b/scripts/check_whitespace_token.py
new file mode 100644
index 0000000..f5d0970
--- /dev/null
+++ b/scripts/check_whitespace_token.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+"""
+    Checker for whitespace tokens
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Helper script to find whitespace which is not of token type `Whitespace`
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+import argparse
+import sys
+import re
+
+from utility import unpack_output_file, process_output_files
+
+
+def check_file(path):
+    whitespace_re = re.compile('\s+')
+
+    for value, token, linenumber in unpack_output_file(path):
+        if whitespace_re.fullmatch(value):
+            # We allow " " if it's inside a Literal.String for example
+            if 'Literal' in token:
+                continue
+
+            # If whitespace is part of a comment, we accept that as well,
+            # as comments may be similarly highlighted to literals
+            if 'Comment' in token:
+                continue
+
+            if 'Whitespace' in token:
+                continue
+
+            print(f'{path}:{linenumber}')
+            return False
+
+    return True
+
+
+def main(args):
+    if process_output_files(args.TEST_ROOT, check_file) > 0:
+        return 1
+    return 0
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('TEST_ROOT',
+                        help='Root directory containing the tests')
+    args = parser.parse_args()
+    sys.exit(main(args))
diff --git a/scripts/count_token_references.py b/scripts/count_token_references.py
new file mode 100755
index 0000000..8e798c2
--- /dev/null
+++ b/scripts/count_token_references.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python
+"""
+Count number of references to tokens in lexer source
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+:program:`count_token_references` counts how many references to all existing
+tokens it can find by "grepping" the the source code of the lexers. This can
+be used to find typos in token names, as those tokens are only used by one lexer.
+
+:program:`count_token_references` supports the following options:
+
+.. program:: count_token_references
+
+.. option:: -v, --verbose
+    This gives output while the script is collecting information.
+
+.. option:: --minfiles <COUNT>
+    Only report about tokens that are referenced in at least this many lexer
+    source files (default 1).
+
+.. option:: --maxfiles <COUNT>
+    Only report about tokens that are referenced in at most this many lexer
+    source files (default 1).
+
+.. option:: --minlines <COUNT>
+    Only report about tokens that are referenced in at least this many lexer
+    source lines (default 1).
+
+.. option:: --maxlines <COUNT>
+    Only report about tokens that are referenced in at most this many lexer
+    source lines (default 10).
+
+.. option:: -s, --subtokens
+    When ``--subtoken`` is given each token is also counted for each of its
+    parent tokens. I.e. if we have 10 occurrences of the token
+    ``Token.Literal.Number.Integer`` and 10 occurrences of the token
+    ``Token.Literal.Number.Hex`` but none for ``Token.Literal.Number``, with
+    ``--subtoken`` ``Token.Literal.Number`` would be counted as having
+    20 references.
+"""
+
+import sys, argparse, re, pathlib
+
+from pygments import token, lexers
+
+
+def lookup_all_lexers():
+    """
+    Iterate through all lexers and fetch them.
+    This should create all tokens that any of the lexers produce.
+    """
+    count = 0
+    for (name, aliases, patterns, mimetypes) in lexers.get_all_lexers():
+        for a in aliases:
+            l = lexers.get_lexer_by_name(a)
+            break
+        else:
+            for p in patterns:
+                l = lexers.get_lexer_for_filename(p)
+                break
+            else:
+                for m in mimetypes:
+                    l = lexers.get_lexer_for_mimetype(m)
+                    break
+        count += 1
+    return count
+
+
+def fetch_lexer_sources():
+    """
+    Return the source code of all lexers as a dictionary, mapping filenames
+    to a list of lines.
+    """
+    lexer_dir = (pathlib.Path(__file__).parent / "../pygments/lexers").resolve()
+    lexer_sources = {
+        fn: fn.read_text(encoding='utf-8').splitlines(keepends=False)
+        for fn in lexer_dir.glob("*.py")
+    }
+    return lexer_sources
+
+
+def sub_tokens(token):
+    """
+    Generator that yields a token and all of its sub-tokens recursively.
+    """
+    yield token
+    for subtoken in token.subtypes:
+        yield from sub_tokens(subtoken)
+
+
+class FileCount:
+    """
+    Stores information about line numbers in a file.
+
+    This is used to store from which lines in a files a certain token is
+    referenced.
+    """
+    def __init__(self, filename):
+        self.filename = filename
+        self.lines = []
+
+    def __str__(self):
+        if len(self.lines) > 3:
+            lines = ", ".join(f"{line:,}" for line in self.lines[:5])
+            lines = f"{lines}, ... ({len(lines):,} lines)"
+        else:
+            lines = ", ".join(f"{line:,}" for line in self.lines)
+        return f"{self.filename.name}[{lines}]"
+
+    def add(self, linenumber):
+        self.lines.append(linenumber)
+
+    def count_lines(self):
+        return len(self.lines)
+
+
+class TokenCount:
+    """
+    Stores information about a token and in which files it is referenced.
+    """
+    def __init__(self, token):
+        self.token = token
+        self.files = {}
+
+    def add(self, filename, linenumber):
+        if filename not in self.files:
+            self.files[filename] = FileCount(filename)
+        self.files[filename].add(linenumber)
+
+    def __str__(self):
+        if len(self.files) > 3:
+            files = []
+            for (i, filecount) in enumerate(self.files.values()):
+                files.append(str(filecount))
+                if i >= 5:
+                    break
+            files = ", ".join(files) + f", ... ({len(self.files):,} files)"
+        else:
+            files = ", ".join(str(filecount) for filecount in self.files.values())
+        return f"{self.count_files():,} files, {self.count_lines():,} locations: {files}"
+
+    def count_files(self):
+        return len(self.files)
+
+    def count_lines(self):
+        return sum(fc.count_lines() for fc in self.files.values())
+
+
+def find_token_references(lexer_sources, args):
+    """
+    Find all references to all tokens in the source code of all lexers.
+
+    Note that this can't be 100% reliable, as it searches the source code for
+    certain patterns: It searches for the last two components of a token name,
+    i.e. to find references to the token ``Token.Literal.Number.Integer.Long``
+    it searches for the regular expression ``\\bInteger.Long\\b``. This
+    won't work reliably for top level token like ``Token.String`` since this
+    is often referred to as ``String``, but searching for ``\\bString\\b``
+    yields too many false positives.
+    """
+
+    # Maps token to :class:`TokenCount` objects.
+    token_references = {}
+
+    # Search for each token in each lexer source file and record in which file
+    # and in which line they are referenced
+    for t in sub_tokens(token.Token):
+        parts = list(t)[-2:]
+        if len(parts) == 0:
+            name = "Token"
+        elif len(parts) == 1:
+            name = f"Token.{parts[0]}"
+        else:
+            name = ".".join(parts)
+
+        token_references[t] = tokencount = TokenCount(t)
+
+        if name != "Token":
+            pattern = re.compile(f"\\b{name}\\b")
+
+            for (filename, sourcelines) in lexer_sources.items():
+                for (i, line) in enumerate(sourcelines, 1):
+                    if pattern.search(line) is not None:
+                        tokencount.add(filename, i)
+                        if args.subtoken:
+                            t2 = t
+                            while t2 is not token.Token:
+                                t2 = t2.parent
+                                tokencount2 = token_references[t2]
+                                tokencount2.add(filename, i)
+
+    return token_references
+
+
+def print_result(token_references, args):
+    def key(item):
+        return (item[1].count_files(), item[1].count_lines())
+
+    for (token, locations) in sorted(token_references.items(), key=key):
+        if args.minfiles <= locations.count_files() <= args.maxfiles and \
+           args.minlines <= locations.count_lines() <= args.maxlines:
+            print(f"{token}: {locations}")
+
+
+def main(args=None):
+    p = argparse.ArgumentParser(description="Count how often each token is used by the lexers")
+    p.add_argument(
+        "-v", "--verbose",
+        dest="verbose", help="Give more output.",
+        default=False, action="store_true"
+    )
+    p.add_argument(
+        "--minfiles",
+        dest="minfiles", metavar="COUNT", type=int,
+        help="Report all tokens referenced by at least COUNT lexer source files (default %(default)s)",
+        default=1
+    )
+    p.add_argument(
+        "--maxfiles",
+        dest="maxfiles", metavar="COUNT", type=int,
+        help="Report all tokens referenced by at most COUNT lexer source files (default %(default)s)",
+        default=1
+    )
+    p.add_argument(
+        "--minlines",
+        dest="minlines", metavar="COUNT", type=int,
+        help="Report all tokens referenced by at least COUNT lexer source lines (default %(default)s)",
+        default=1
+    )
+    p.add_argument(
+        "--maxlines",
+        dest="maxlines", metavar="COUNT", type=int,
+        help="Report all tokens referenced by at most COUNT lexer source lines (default %(default)s)",
+        default=10
+    )
+    p.add_argument(
+        "-s", "--subtoken",
+        dest="subtoken",
+        help="Include count of references to subtokens in the count for each token (default %(default)s)",
+        default=False, action="store_true"
+    )
+
+    args = p.parse_args(args)
+
+    if args.verbose:
+        print("Looking up all lexers ... ", end="", flush=True)
+    count = lookup_all_lexers()
+    if args.verbose:
+        print(f"found {count:,} lexers")
+
+    if args.verbose:
+        print("Fetching lexer source code ... ", end="", flush=True)
+    lexer_sources = fetch_lexer_sources()
+    if args.verbose:
+        print(f"found {len(lexer_sources):,} lexer source files")
+
+    if args.verbose:
+        print("Finding token references ... ", end="", flush=True)
+    token_references = find_token_references(lexer_sources, args)
+    if args.verbose:
+        print(f"found references to {len(token_references):,} tokens")
+
+    if args.verbose:
+        print()
+        print("Result:")
+    print_result(token_references, args)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/debug_lexer.py b/scripts/debug_lexer.py
new file mode 100755
index 0000000..6323d9c
--- /dev/null
+++ b/scripts/debug_lexer.py
@@ -0,0 +1,306 @@
+#!/usr/bin/python
+"""
+    Lexing error finder
+    ~~~~~~~~~~~~~~~~~~~
+
+    For the source files given on the command line, display
+    the text where Error tokens are being generated, along
+    with some context.
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import os
+import sys
+import struct
+
+# always prefer Pygments from source if exists
+srcpath = os.path.join(os.path.dirname(__file__), '..')
+if os.path.isdir(os.path.join(srcpath, 'pygments')):
+    sys.path.insert(0, srcpath)
+
+
+from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \
+    ProfilingRegexLexer, ProfilingRegexLexerMeta
+from pygments.lexers import get_lexer_by_name, find_lexer_class, \
+    find_lexer_class_for_filename, guess_lexer
+from pygments.token import Error, Text, _TokenType
+from pygments.cmdline import _parse_options
+
+
+class DebuggingRegexLexer(ExtendedRegexLexer):
+    """Make the state stack, position and current match instance attributes."""
+
+    def get_tokens_unprocessed(self, text, stack=('root',)):
+        """
+        Split ``text`` into (tokentype, text) pairs.
+
+        ``stack`` is the initial stack (default: ``['root']``)
+        """
+        tokendefs = self._tokens
+        self.ctx = ctx = LexerContext(text, 0)
+        ctx.stack = list(stack)
+        statetokens = tokendefs[ctx.stack[-1]]
+        while 1:
+            for rexmatch, action, new_state in statetokens:
+                self.m = m = rexmatch(text, ctx.pos, ctx.end)
+                if m:
+                    if action is not None:
+                        if type(action) is _TokenType:
+                            yield ctx.pos, action, m.group()
+                            ctx.pos = m.end()
+                        else:
+                            if not isinstance(self, ExtendedRegexLexer):
+                                yield from action(self, m)
+                                ctx.pos = m.end()
+                            else:
+                                yield from action(self, m, ctx)
+                                if not new_state:
+                                    # altered the state stack?
+                                    statetokens = tokendefs[ctx.stack[-1]]
+                    if new_state is not None:
+                        # state transition
+                        if isinstance(new_state, tuple):
+                            for state in new_state:
+                                if state == '#pop':
+                                    ctx.stack.pop()
+                                elif state == '#push':
+                                    ctx.stack.append(ctx.stack[-1])
+                                else:
+                                    ctx.stack.append(state)
+                        elif isinstance(new_state, int):
+                            # pop
+                            del ctx.stack[new_state:]
+                        elif new_state == '#push':
+                            ctx.stack.append(ctx.stack[-1])
+                        else:
+                            assert False, 'wrong state def: %r' % new_state
+                        statetokens = tokendefs[ctx.stack[-1]]
+                    break
+            else:
+                try:
+                    if ctx.pos >= ctx.end:
+                        break
+                    if text[ctx.pos] == '\n':
+                        # at EOL, reset state to 'root'
+                        ctx.stack = ['root']
+                        statetokens = tokendefs['root']
+                        yield ctx.pos, Text, '\n'
+                        ctx.pos += 1
+                        continue
+                    yield ctx.pos, Error, text[ctx.pos]
+                    ctx.pos += 1
+                except IndexError:
+                    break
+
+
+def decode_atheris(bstr):
+    """Decode a byte string into a Unicode string using the algorithm
+    of Google's Atheris fuzzer library, which aims to produce a wide
+    range of possible Unicode inputs.
+
+    Corresponds to ConsumeUnicodeImpl() with filter_surrogates=false in
+    https://github.com/google/atheris/blob/master/fuzzed_data_provider.cc
+    """
+    if len(bstr) < 2:
+        return ''
+    # The first byte only selects if the rest is decoded as ascii, "utf-16" or "utf-32"
+    spec, bstr = bstr[0], bstr[1:]
+    if spec & 1:  # pure ASCII
+        return ''.join(chr(ch & 0x7f) for ch in bstr)
+    elif spec & 2:  # UTF-16
+        bstr = bstr if len(bstr) % 2 == 0 else bstr[:-1]
+        return bstr.decode('utf16')
+
+    # else UTF-32
+    def valid_codepoint(ch):
+        ch &= 0x1fffff
+        if ch & 0x100000:
+            ch &= ~0x0f0000
+        return chr(ch)
+
+    chars = struct.unpack('%dI%dx' % divmod(len(bstr), 4), bstr)
+    return ''.join(map(valid_codepoint), chars)
+
+
+def main(fn, lexer=None, options={}):
+    if fn == '-':
+        text = sys.stdin.read()
+    else:
+        with open(fn, 'rb') as fp:
+            text = fp.read()
+        if decode_strategy == 'latin1':
+            try:
+                text = text.decode('utf8')
+            except UnicodeError:
+                print('Warning: non-UTF8 input, using latin1')
+                text = text.decode('latin1')
+        elif decode_strategy == 'utf8-ignore':
+            try:
+                text = text.decode('utf8')
+            except UnicodeError:
+                print('Warning: ignoring non-UTF8 bytes in input')
+                text = text.decode('utf8', 'ignore')
+        elif decode_strategy == 'atheris':
+            text = decode_atheris(text)
+
+    text = text.strip('\n') + '\n'
+
+    if lexer is not None:
+        lxcls = get_lexer_by_name(lexer).__class__
+    elif guess:
+        lxcls = guess_lexer(text).__class__
+        print('Using lexer: %s (%s.%s)' % (lxcls.name, lxcls.__module__,
+                                           lxcls.__name__))
+    else:
+        lxcls = find_lexer_class_for_filename(os.path.basename(fn))
+        if lxcls is None:
+            name, rest = fn.split('_', 1)
+            lxcls = find_lexer_class(name)
+            if lxcls is None:
+                raise AssertionError('no lexer found for file %r' % fn)
+        print('Using lexer: %s (%s.%s)' % (lxcls.name, lxcls.__module__,
+                                           lxcls.__name__))
+    debug_lexer = False
+    # if profile:
+    #     # does not work for e.g. ExtendedRegexLexers
+    #     if lxcls.__bases__ == (RegexLexer,):
+    #         # yes we can!  (change the metaclass)
+    #         lxcls.__class__ = ProfilingRegexLexerMeta
+    #         lxcls.__bases__ = (ProfilingRegexLexer,)
+    #         lxcls._prof_sort_index = profsort
+    # else:
+    #     if lxcls.__bases__ == (RegexLexer,):
+    #         lxcls.__bases__ = (DebuggingRegexLexer,)
+    #         debug_lexer = True
+    #     elif lxcls.__bases__ == (DebuggingRegexLexer,):
+    #         # already debugged before
+    #         debug_lexer = True
+    #     else:
+    #         # HACK: ExtendedRegexLexer subclasses will only partially work here.
+    #         lxcls.__bases__ = (DebuggingRegexLexer,)
+    #         debug_lexer = True
+
+    lx = lxcls(**options)
+    lno = 1
+    tokens = []
+    states = []
+
+    def show_token(tok, state):
+        reprs = list(map(repr, tok))
+        print('   ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ')
+        if debug_lexer:
+            print(' ' + ' ' * (29-len(reprs[0])) + ' : '.join(state)
+                  if state else '', end=' ')
+        print()
+
+    for type, val in lx.get_tokens(text):
+        lno += val.count('\n')
+        if type == Error and not ignerror:
+            print('Error parsing', fn, 'on line', lno)
+            if not showall:
+                print('Previous tokens' + (debug_lexer and ' and states' or '') + ':')
+                for i in range(max(len(tokens) - num, 0), len(tokens)):
+                    if debug_lexer:
+                        show_token(tokens[i], states[i])
+                    else:
+                        show_token(tokens[i], None)
+            print('Error token:')
+            vlen = len(repr(val))
+            print('   ' + repr(val), end=' ')
+            if debug_lexer and hasattr(lx, 'ctx'):
+                print(' ' * (60-vlen) + ' : '.join(lx.ctx.stack), end=' ')
+            print()
+            print()
+            return 1
+        tokens.append((type, val))
+        if debug_lexer:
+            if hasattr(lx, 'ctx'):
+                states.append(lx.ctx.stack[:])
+            else:
+                states.append(None)
+        if showall:
+            show_token((type, val), states[-1] if debug_lexer else None)
+    return 0
+
+
+def print_help():
+    print('''\
+Pygments development helper to quickly debug lexers.
+
+    scripts/debug_lexer.py [options] file ...
+
+Give one or more filenames to lex them and display possible error tokens
+and/or profiling info.  Files are assumed to be encoded in UTF-8.
+
+Selecting lexer and options:
+
+    -l NAME         use lexer named NAME (default is to guess from
+                    the given filenames)
+    -g              guess lexer from content
+    -u              if input is non-utf8, use "ignore" handler instead
+                    of using latin1 encoding
+    -U              use Atheris fuzzer's method of converting
+                    byte input to Unicode
+    -O OPTIONSTR    use lexer options parsed from OPTIONSTR
+
+Debugging lexing errors:
+
+    -n N            show the last N tokens on error
+    -a              always show all lexed tokens (default is only
+                    to show them when an error occurs)
+    -e              do not stop on error tokens
+
+Profiling:
+
+    -p              use the ProfilingRegexLexer to profile regexes
+                    instead of the debugging lexer
+    -s N            sort profiling output by column N (default is
+                    column 4, the time per call)
+''')
+
+
+num = 10
+showall = False
+ignerror = False
+lexer = None
+options = {}
+profile = False
+profsort = 4
+guess = False
+decode_strategy = 'latin1'
+
+if __name__ == '__main__':
+    import getopt
+    opts, args = getopt.getopt(sys.argv[1:], 'n:l:aepO:s:hguU')
+    for opt, val in opts:
+        if opt == '-n':
+            num = int(val)
+        elif opt == '-a':
+            showall = True
+        elif opt == '-e':
+            ignerror = True
+        elif opt == '-l':
+            lexer = val
+        elif opt == '-p':
+            profile = True
+        elif opt == '-s':
+            profsort = int(val)
+        elif opt == '-O':
+            options = _parse_options([val])
+        elif opt == '-g':
+            guess = True
+        elif opt == '-u':
+            decode_strategy = 'utf8-ignore'
+        elif opt == '-U':
+            decode_strategy = 'atheris'
+        elif opt == '-h':
+            print_help()
+            sys.exit(0)
+    ret = 0
+    if not args:
+        print_help()
+    for f in args:
+        ret += main(f, lexer, options)
+    sys.exit(bool(ret))
diff --git a/scripts/detect_missing_analyse_text.py b/scripts/detect_missing_analyse_text.py
new file mode 100644
index 0000000..c377b0b
--- /dev/null
+++ b/scripts/detect_missing_analyse_text.py
@@ -0,0 +1,48 @@
+"""
+    detect_missing_analyse_text
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import sys
+
+from pygments.lexers import get_all_lexers, find_lexer_class
+from pygments.lexer import Lexer
+
+import argparse
+
+
+def main(args):
+    uses = {}
+
+    for name, aliases, filenames, mimetypes in get_all_lexers(plugins=False):
+        cls = find_lexer_class(name)
+        if not cls.aliases and not args.skip_no_aliases:
+            print(cls, "has no aliases")
+        for f in filenames:
+            uses.setdefault(f, []).append(cls)
+
+    ret = 0
+    for k, v in uses.items():
+        if len(v) > 1:
+            # print("Multiple for", k, v)
+            for i in v:
+                if i.analyse_text is None:
+                    print(i, "has a None analyse_text")
+                    ret |= 1
+                elif Lexer.analyse_text.__doc__ == i.analyse_text.__doc__:
+                    print(i, "needs analyse_text, multiple lexers for", k)
+                    ret |= 2
+    return ret
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--skip-no-aliases',
+                        help='Skip checks for a lexer with no aliases',
+                        action='store_true',
+                        default=False)
+    args = parser.parse_args()
+    sys.exit(main(args))
diff --git a/scripts/gen_mapfiles.py b/scripts/gen_mapfiles.py
new file mode 100644
index 0000000..a5aed0c
--- /dev/null
+++ b/scripts/gen_mapfiles.py
@@ -0,0 +1,53 @@
+"""
+    scripts/gen_mapfiles.py
+    ~~~~~~~~~~~~~~~~~~~~~~~
+
+    Regenerate mapping files.
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+from importlib import import_module
+from pathlib import Path
+import re
+import sys
+
+top_src_dir = Path(__file__).parent.parent
+pygments_package = top_src_dir / 'pygments'
+sys.path.insert(0, str(pygments_package.parent.resolve()))
+
+from pygments.util import docstring_headline
+
+def main():
+    for key in ['lexers', 'formatters']:
+        lines = []
+        for file in (pygments_package / key).glob('[!_]*.py'):
+            module_name = '.'.join(file.relative_to(pygments_package.parent).with_suffix('').parts)
+            print(module_name)
+            module = import_module(module_name)
+            for obj_name in module.__all__:
+                obj = getattr(module, obj_name)
+                desc = (module_name, obj.name, tuple(obj.aliases), tuple(obj.filenames))
+                if key == 'lexers':
+                    desc += (tuple(obj.mimetypes),)
+                elif key == 'formatters':
+                    desc += (docstring_headline(obj),)
+                else:
+                    assert False
+                lines.append(f'    {obj_name!r}: {desc!r},')
+        # Sort to make diffs minimal.
+        lines.sort()
+        new_dict = '\n'.join(lines)
+        content = f'''# Automatically generated by scripts/gen_mapfiles.py.
+# DO NOT EDIT BY HAND; run `make mapfiles` instead.
+
+{key.upper()} = {{
+{new_dict}
+}}
+'''
+        (pygments_package / key / '_mapping.py').write_text(content, encoding='utf8')
+        print(f'=== {len(lines)} {key} processed.')
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/get_css_properties.py b/scripts/get_css_properties.py
new file mode 100644
index 0000000..3afe98e
--- /dev/null
+++ b/scripts/get_css_properties.py
@@ -0,0 +1,33 @@
+"""
+    get_css_properties
+    ~~~~~~~~~~~~~~~~~~
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+
+from pygments.util import format_lines
+import json
+import urllib.request
+
+HEADER = '''\
+"""
+    pygments.lexers._css_builtins
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    This file is autogenerated by scripts/get_css_properties.py
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+'''
+
+if __name__ == "__main__":
+    data_request = urllib.request.urlopen('https://www.w3.org/Style/CSS/all-properties.en.json')
+    data = json.load(data_request)
+    names = set([p['property'] for p in data if p['property'] != '--*'])
+
+    with open('../pygments/lexers/_css_builtins.py', 'w') as builtin_file:
+        builtin_file.write(HEADER)
+        builtin_file.write(format_lines('_css_properties', sorted(names)))
diff --git a/scripts/get_vimkw.py b/scripts/get_vimkw.py
new file mode 100644
index 0000000..0b2d82e
--- /dev/null
+++ b/scripts/get_vimkw.py
@@ -0,0 +1,72 @@
+import re
+
+from pygments.util import format_lines
+
+r_line = re.compile(r"^(syn keyword vimCommand contained|syn keyword vimOption "
+                    r"contained|syn keyword vimAutoEvent contained)\s+(.*)")
+r_item = re.compile(r"(\w+)(?:\[(\w+)\])?")
+
+HEADER = '''\
+# -*- coding: utf-8 -*-
+"""
+    pygments.lexers._vim_builtins
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    This file is autogenerated by scripts/get_vimkw.py
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+# Split up in multiple functions so it's importable by jython, which has a
+# per-method size limit.
+'''
+
+METHOD = '''\
+def _get%(key)s():
+%(body)s
+    return var
+%(key)s = _get%(key)s()
+'''
+
+def getkw(input, output):
+    out = file(output, 'w')
+
+    # Copy template from an existing file.
+    print(HEADER, file=out)
+
+    output_info = {'command': [], 'option': [], 'auto': []}
+    for line in file(input):
+        m = r_line.match(line)
+        if m:
+            # Decide which output gets mapped to d
+            if 'vimCommand' in m.group(1):
+                d = output_info['command']
+            elif 'AutoEvent' in m.group(1):
+                d = output_info['auto']
+            else:
+                d = output_info['option']
+
+            # Extract all the shortened versions
+            for i in r_item.finditer(m.group(2)):
+                d.append('(%r,%r)' %
+                         (i.group(1), "%s%s" % (i.group(1), i.group(2) or '')))
+
+    output_info['option'].append("('nnoremap','nnoremap')")
+    output_info['option'].append("('inoremap','inoremap')")
+    output_info['option'].append("('vnoremap','vnoremap')")
+
+    for key, keywordlist in output_info.items():
+        keywordlist.sort()
+        body = format_lines('var', keywordlist, raw=True, indent_level=1)
+        print(METHOD % locals(), file=out)
+
+def is_keyword(w, keywords):
+    for i in range(len(w), 0, -1):
+        if w[:i] in keywords:
+            return keywords[w[:i]][:len(w)] == w
+    return False
+
+if __name__ == "__main__":
+    getkw("/usr/share/vim/vim74/syntax/vim.vim",
+          "pygments/lexers/_vim_builtins.py")
diff --git a/scripts/pylintrc b/scripts/pylintrc
new file mode 100644
index 0000000..b602eaa
--- /dev/null
+++ b/scripts/pylintrc
@@ -0,0 +1,301 @@
+# lint Python modules using external checkers.
+# 
+# This is the main checker controlling the other ones and the reports
+# generation. It is itself both a raw checker and an astng checker in order
+# to:
+# * handle message activation / deactivation at the module level
+# * handle some basic but necessary stats'data (number of classes, methods...)
+# 
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Profiled execution.
+profile=no
+
+# Add <file or directory> to the black list. It should be a base name, not a
+# path. You may set this option multiple times.
+ignore=.svn
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Set the cache size for astng objects.
+cache-size=500
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+
+[MESSAGES CONTROL]
+
+# Enable only checker(s) with the given id(s). This option conflict with the
+# disable-checker option
+#enable-checker=
+
+# Enable all checker(s) except those with the given id(s). This option conflict
+# with the disable-checker option
+#disable-checker=
+
+# Enable all messages in the listed categories.
+#enable-msg-cat=
+
+# Disable all messages in the listed categories.
+#disable-msg-cat=
+
+# Enable the message(s) with the given id(s).
+#enable-msg=
+
+# Disable the message(s) with the given id(s).
+disable-msg=C0323,W0142,C0301,C0103,C0111,E0213,C0302,C0203,W0703,R0201
+
+
+[REPORTS]
+
+# set the output format. Available formats are text, parseable, colorized and
+# html
+output-format=colorized
+
+# Include message's id in output
+include-ids=yes
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]".
+files-output=no
+
+# Tells whether to display a full report or only the messages
+reports=yes
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note).You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (R0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Add a comment according to your evaluation note. This is used by the global
+# evaluation report (R0004).
+comment=no
+
+# Enable the report(s) with the given id(s).
+#enable-report=
+
+# Disable the report(s) with the given id(s).
+#disable-report=
+
+
+# checks for
+# * unused variables / imports
+# * undefined variables
+# * redefinition of variable from builtins or from an outer scope
+# * use of variable before assignment
+# 
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching names used for dummy variables (i.e. not used).
+dummy-variables-rgx=_|dummy
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+
+# try to find bugs in the code using type inference
+# 
+[TYPECHECK]
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# When zope mode is activated, consider the acquired-members option to ignore
+# access to some undefined attributes.
+zope=no
+
+# List of members which are usually get through zope's acquisition mechanism and
+# so shouldn't trigger E0201 when accessed (need zope=yes to be considered).
+acquired-members=REQUEST,acl_users,aq_parent
+
+
+# checks for :
+# * doc strings
+# * modules / classes / functions / methods / arguments / variables name
+# * number of arguments, local variables, branches, returns and statements in
+# functions, methods
+# * required module attributes
+# * dangerous default values as arguments
+# * redefinition of function / method / class
+# * uses of the global statement
+# 
+[BASIC]
+
+# Required attributes for module, separated by a comma
+required-attributes=
+
+# Regular expression which should only match functions or classes name which do
+# not require a docstring
+no-docstring-rgx=__.*__
+
+# Regular expression which should only match correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression which should only match correct module level names
+const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))$
+
+# Regular expression which should only match correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression which should only match correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct method names
+method-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct instance attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct list comprehension /
+# generator expression variable names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# List of builtins function names that should not be used, separated by a comma
+bad-functions=apply,input
+
+
+# checks for sign of poor/misdesign:
+# * number of methods, attributes, local variables...
+# * size, complexity of functions, methods
+# 
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=12
+
+# Maximum number of locals for function / method body
+max-locals=30
+
+# Maximum number of return / yield for function / method body
+max-returns=12
+
+# Maximum number of branch for function / method body
+max-branchs=30
+
+# Maximum number of statements in function / method body
+max-statements=60
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=20
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=0
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+
+# checks for
+# * external modules dependencies
+# * relative / wildcard imports
+# * cyclic imports
+# * uses of deprecated modules
+# 
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report R0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report R0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report R0402 must
+# not be disabled)
+int-import-graph=
+
+
+# checks for :
+# * methods without self as first argument
+# * overridden methods signature
+# * access only to existent members via self
+# * attributes not defined in the __init__ method
+# * supported interfaces implementation
+# * unreachable code
+# 
+[CLASSES]
+
+# List of interface methods to ignore, separated by a comma. This is used for
+# instance to not check methods defines in Zope's Interface base class.
+ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+
+# checks for similarities and duplicated code. This computation may be
+# memory / CPU intensive, so you should disable it if you experiments some
+# problems.
+# 
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=10
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+
+# checks for:
+# * warning notes in the code like FIXME, XXX
+# * PEP 263: source code with non ascii character but no encoding declaration
+# 
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+# checks for :
+# * unauthorized constructions
+# * strict indentation
+# * line length
+# * use of <> instead of !=
+# 
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=90
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
diff --git a/scripts/release-checklist b/scripts/release-checklist
new file mode 100644
index 0000000..087917f
--- /dev/null
+++ b/scripts/release-checklist
@@ -0,0 +1,24 @@
+Release checklist
+=================
+
+* Check ``git status``
+* ``make check``
+* LATER when configured properly: ``make pylint``
+* ``tox``
+* Update version in ``pygments/__init__.py``
+* Check setup.py metadata: long description, trove classifiers
+* Update release date/code name in ``CHANGES``
+* ``git commit``
+* Wait for the CI to finish
+* ``make clean``
+* ``python3 -m build``
+* Check the size of the generated packages. If they're significantly different from the last release, check if the repository is in a modified state and that ``make clean`` was run.
+* ``twine upload dist/Pygments-$NEWVER*``
+* Check PyPI release page for obvious errors (like different file sizes!)
+* ``git tag -a``
+* Add new ``CHANGES`` heading for next version
+* ``git commit``
+* ``git push``, ``git push --tags``
+* Add new release on https://github.com/pygments/pygments/releases
+* Add new milestone on https://github.com/pygments/pygments/milestones if needed
+* Write announcement and send to mailing list/python-announce
diff --git a/scripts/update_contrasts.py b/scripts/update_contrasts.py
new file mode 100755
index 0000000..156bc5c
--- /dev/null
+++ b/scripts/update_contrasts.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+"""
+    Updates tests/contrast/min_contrasts.json
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Whenever you have improved the minimum contrast of a style you should run
+    this script, so that the test_contrasts.py test prevents future degredations.
+"""
+
+import os
+import sys
+
+# always prefer Pygments from source if exists
+srcpath = os.path.join(os.path.dirname(__file__), "..")
+if os.path.isdir(os.path.join(srcpath, "pygments")):
+    sys.path.insert(0, srcpath)
+
+import tests.contrast.test_contrasts
+
+tests.contrast.test_contrasts.test_contrasts(fail_if_improved=False)
+tests.contrast.test_contrasts.update_json()
diff --git a/scripts/utility.py b/scripts/utility.py
new file mode 100644
index 0000000..4d59a1b
--- /dev/null
+++ b/scripts/utility.py
@@ -0,0 +1,69 @@
+"""
+    Utility functions for test scripts
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import os
+import os.path
+
+
+def unpack_output_file(path):
+    """
+    Unpack an output file into objects contining the line number, the text,
+    and the token name. The output file can be either a ``.output`` file
+    containing a token stream, or a ``.txt`` with input and tokens.
+    """
+    from collections import namedtuple
+    entry = namedtuple('OutputEntry', ['text', 'token', 'linenumber'])
+
+    skip_until_tokens = path.endswith('.txt')
+
+    for linenumber, line in enumerate(open(path).readlines()):
+        line = line.strip()
+        if not line:
+            continue
+
+        if skip_until_tokens:
+            if line != '---tokens---':
+                continue
+            else:
+                skip_until_tokens = False
+                continue
+
+        # Line can start with ' or ", so let's check which one it is
+        # and find the matching one
+        quotation_start = 0
+        quotation_end = line.rfind(line[0])
+        text = line[quotation_start+1:quotation_end]
+        token = line.split()[-1]
+        text = text.replace('\\n', '\n')
+        text = text.replace('\\t', '\t')
+        yield entry(text, token, linenumber + 1)
+
+
+def process_output_files(root_directory, callback):
+    """
+    Process all output (i.e. .output and .txt files for snippets) files
+    in a directory tree using the provided callback.
+    The callback should return ``True`` in case of success, ``False``
+    otherwise.
+
+    The function returns the number of files for which the callback returned
+    ``False``.
+    """
+    errors = 0
+    for dir, _, files in os.walk(root_directory):
+        for file in files:
+            _, ext = os.path.splitext(file)
+
+            if ext not in {'.txt', '.output'}:
+                continue
+
+            path = os.path.join(dir, file)
+            if not callback(path):
+                errors += 1
+
+    return errors
diff --git a/scripts/vim2pygments.py b/scripts/vim2pygments.py
new file mode 100755
index 0000000..ec9b63b
--- /dev/null
+++ b/scripts/vim2pygments.py
@@ -0,0 +1,932 @@
+#!/usr/bin/env python
+"""
+    Vim Colorscheme Converter
+    ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    This script converts vim colorscheme files to valid pygments
+    style classes meant for putting into modules.
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import sys
+import re
+from os import path
+from io import StringIO
+
+split_re = re.compile(r'(?<!\\)\s+')
+
+SCRIPT_NAME = 'Vim Colorscheme Converter'
+SCRIPT_VERSION = '0.1'
+
+
+COLORS = {
+    # Numeric Colors
+    '0': '#000000',
+    '1': '#c00000',
+    '2': '#008000',
+    '3': '#808000',
+    '4': '#0000c0',
+    '5': '#c000c0',
+    '6': '#008080',
+    '7': '#c0c0c0',
+    '8': '#808080',
+    '9': '#ff6060',
+    '10': '#00ff00',
+    '11': '#ffff00',
+    '12': '#8080ff',
+    '13': '#ff40ff',
+    '14': '#00ffff',
+    '15': '#ffffff',
+    # Named Colors
+    'alice': '#f0f8ff',
+    'aliceblue': '#f0f8ff',
+    'antique': '#faebd7',
+    'antiquewhite': '#faebd7',
+    'antiquewhite1': '#ffefdb',
+    'antiquewhite2': '#eedfcc',
+    'antiquewhite3': '#cdc0b0',
+    'antiquewhite4': '#8b8378',
+    'aquamarine': '#7fffd4',
+    'aquamarine1': '#7fffd4',
+    'aquamarine2': '#76eec6',
+    'aquamarine3': '#66cdaa',
+    'aquamarine4': '#458b74',
+    'azure': '#f0ffff',
+    'azure1': '#f0ffff',
+    'azure2': '#e0eeee',
+    'azure3': '#c1cdcd',
+    'azure4': '#838b8b',
+    'beige': '#f5f5dc',
+    'bisque': '#ffe4c4',
+    'bisque1': '#ffe4c4',
+    'bisque2': '#eed5b7',
+    'bisque3': '#cdb79e',
+    'bisque4': '#8b7d6b',
+    'black': '#000000',
+    'blanched': '#ffebcd',
+    'blanchedalmond': '#ffebcd',
+    'blue': '#8a2be2',
+    'blue1': '#0000ff',
+    'blue2': '#0000ee',
+    'blue3': '#0000cd',
+    'blue4': '#00008b',
+    'blueviolet': '#8a2be2',
+    'brown': '#a52a2a',
+    'brown1': '#ff4040',
+    'brown2': '#ee3b3b',
+    'brown3': '#cd3333',
+    'brown4': '#8b2323',
+    'burlywood': '#deb887',
+    'burlywood1': '#ffd39b',
+    'burlywood2': '#eec591',
+    'burlywood3': '#cdaa7d',
+    'burlywood4': '#8b7355',
+    'cadet': '#5f9ea0',
+    'cadetblue': '#5f9ea0',
+    'cadetblue1': '#98f5ff',
+    'cadetblue2': '#8ee5ee',
+    'cadetblue3': '#7ac5cd',
+    'cadetblue4': '#53868b',
+    'chartreuse': '#7fff00',
+    'chartreuse1': '#7fff00',
+    'chartreuse2': '#76ee00',
+    'chartreuse3': '#66cd00',
+    'chartreuse4': '#458b00',
+    'chocolate': '#d2691e',
+    'chocolate1': '#ff7f24',
+    'chocolate2': '#ee7621',
+    'chocolate3': '#cd661d',
+    'chocolate4': '#8b4513',
+    'coral': '#ff7f50',
+    'coral1': '#ff7256',
+    'coral2': '#ee6a50',
+    'coral3': '#cd5b45',
+    'coral4': '#8b3e2f',
+    'cornflower': '#6495ed',
+    'cornflowerblue': '#6495ed',
+    'cornsilk': '#fff8dc',
+    'cornsilk1': '#fff8dc',
+    'cornsilk2': '#eee8cd',
+    'cornsilk3': '#cdc8b1',
+    'cornsilk4': '#8b8878',
+    'cyan': '#00ffff',
+    'cyan1': '#00ffff',
+    'cyan2': '#00eeee',
+    'cyan3': '#00cdcd',
+    'cyan4': '#008b8b',
+    'dark': '#8b0000',
+    'darkblue': '#00008b',
+    'darkcyan': '#008b8b',
+    'darkgoldenrod': '#b8860b',
+    'darkgoldenrod1': '#ffb90f',
+    'darkgoldenrod2': '#eead0e',
+    'darkgoldenrod3': '#cd950c',
+    'darkgoldenrod4': '#8b6508',
+    'darkgray': '#a9a9a9',
+    'darkgreen': '#006400',
+    'darkgrey': '#a9a9a9',
+    'darkkhaki': '#bdb76b',
+    'darkmagenta': '#8b008b',
+    'darkolivegreen': '#556b2f',
+    'darkolivegreen1': '#caff70',
+    'darkolivegreen2': '#bcee68',
+    'darkolivegreen3': '#a2cd5a',
+    'darkolivegreen4': '#6e8b3d',
+    'darkorange': '#ff8c00',
+    'darkorange1': '#ff7f00',
+    'darkorange2': '#ee7600',
+    'darkorange3': '#cd6600',
+    'darkorange4': '#8b4500',
+    'darkorchid': '#9932cc',
+    'darkorchid1': '#bf3eff',
+    'darkorchid2': '#b23aee',
+    'darkorchid3': '#9a32cd',
+    'darkorchid4': '#68228b',
+    'darkred': '#8b0000',
+    'darksalmon': '#e9967a',
+    'darkseagreen': '#8fbc8f',
+    'darkseagreen1': '#c1ffc1',
+    'darkseagreen2': '#b4eeb4',
+    'darkseagreen3': '#9bcd9b',
+    'darkseagreen4': '#698b69',
+    'darkslateblue': '#483d8b',
+    'darkslategray': '#2f4f4f',
+    'darkslategray1': '#97ffff',
+    'darkslategray2': '#8deeee',
+    'darkslategray3': '#79cdcd',
+    'darkslategray4': '#528b8b',
+    'darkslategrey': '#2f4f4f',
+    'darkturquoise': '#00ced1',
+    'darkviolet': '#9400d3',
+    'deep': '#ff1493',
+    'deeppink': '#ff1493',
+    'deeppink1': '#ff1493',
+    'deeppink2': '#ee1289',
+    'deeppink3': '#cd1076',
+    'deeppink4': '#8b0a50',
+    'deepskyblue': '#00bfff',
+    'deepskyblue1': '#00bfff',
+    'deepskyblue2': '#00b2ee',
+    'deepskyblue3': '#009acd',
+    'deepskyblue4': '#00688b',
+    'dim': '#696969',
+    'dimgray': '#696969',
+    'dimgrey': '#696969',
+    'dodger': '#1e90ff',
+    'dodgerblue': '#1e90ff',
+    'dodgerblue1': '#1e90ff',
+    'dodgerblue2': '#1c86ee',
+    'dodgerblue3': '#1874cd',
+    'dodgerblue4': '#104e8b',
+    'firebrick': '#b22222',
+    'firebrick1': '#ff3030',
+    'firebrick2': '#ee2c2c',
+    'firebrick3': '#cd2626',
+    'firebrick4': '#8b1a1a',
+    'floral': '#fffaf0',
+    'floralwhite': '#fffaf0',
+    'forest': '#228b22',
+    'forestgreen': '#228b22',
+    'gainsboro': '#dcdcdc',
+    'ghost': '#f8f8ff',
+    'ghostwhite': '#f8f8ff',
+    'gold': '#ffd700',
+    'gold1': '#ffd700',
+    'gold2': '#eec900',
+    'gold3': '#cdad00',
+    'gold4': '#8b7500',
+    'goldenrod': '#daa520',
+    'goldenrod1': '#ffc125',
+    'goldenrod2': '#eeb422',
+    'goldenrod3': '#cd9b1d',
+    'goldenrod4': '#8b6914',
+    'gray': '#bebebe',
+    'gray0': '#000000',
+    'gray1': '#030303',
+    'gray10': '#1a1a1a',
+    'gray100': '#ffffff',
+    'gray11': '#1c1c1c',
+    'gray12': '#1f1f1f',
+    'gray13': '#212121',
+    'gray14': '#242424',
+    'gray15': '#262626',
+    'gray16': '#292929',
+    'gray17': '#2b2b2b',
+    'gray18': '#2e2e2e',
+    'gray19': '#303030',
+    'gray2': '#050505',
+    'gray20': '#333333',
+    'gray21': '#363636',
+    'gray22': '#383838',
+    'gray23': '#3b3b3b',
+    'gray24': '#3d3d3d',
+    'gray25': '#404040',
+    'gray26': '#424242',
+    'gray27': '#454545',
+    'gray28': '#474747',
+    'gray29': '#4a4a4a',
+    'gray3': '#080808',
+    'gray30': '#4d4d4d',
+    'gray31': '#4f4f4f',
+    'gray32': '#525252',
+    'gray33': '#545454',
+    'gray34': '#575757',
+    'gray35': '#595959',
+    'gray36': '#5c5c5c',
+    'gray37': '#5e5e5e',
+    'gray38': '#616161',
+    'gray39': '#636363',
+    'gray4': '#0a0a0a',
+    'gray40': '#666666',
+    'gray41': '#696969',
+    'gray42': '#6b6b6b',
+    'gray43': '#6e6e6e',
+    'gray44': '#707070',
+    'gray45': '#737373',
+    'gray46': '#757575',
+    'gray47': '#787878',
+    'gray48': '#7a7a7a',
+    'gray49': '#7d7d7d',
+    'gray5': '#0d0d0d',
+    'gray50': '#7f7f7f',
+    'gray51': '#828282',
+    'gray52': '#858585',
+    'gray53': '#878787',
+    'gray54': '#8a8a8a',
+    'gray55': '#8c8c8c',
+    'gray56': '#8f8f8f',
+    'gray57': '#919191',
+    'gray58': '#949494',
+    'gray59': '#969696',
+    'gray6': '#0f0f0f',
+    'gray60': '#999999',
+    'gray61': '#9c9c9c',
+    'gray62': '#9e9e9e',
+    'gray63': '#a1a1a1',
+    'gray64': '#a3a3a3',
+    'gray65': '#a6a6a6',
+    'gray66': '#a8a8a8',
+    'gray67': '#ababab',
+    'gray68': '#adadad',
+    'gray69': '#b0b0b0',
+    'gray7': '#121212',
+    'gray70': '#b3b3b3',
+    'gray71': '#b5b5b5',
+    'gray72': '#b8b8b8',
+    'gray73': '#bababa',
+    'gray74': '#bdbdbd',
+    'gray75': '#bfbfbf',
+    'gray76': '#c2c2c2',
+    'gray77': '#c4c4c4',
+    'gray78': '#c7c7c7',
+    'gray79': '#c9c9c9',
+    'gray8': '#141414',
+    'gray80': '#cccccc',
+    'gray81': '#cfcfcf',
+    'gray82': '#d1d1d1',
+    'gray83': '#d4d4d4',
+    'gray84': '#d6d6d6',
+    'gray85': '#d9d9d9',
+    'gray86': '#dbdbdb',
+    'gray87': '#dedede',
+    'gray88': '#e0e0e0',
+    'gray89': '#e3e3e3',
+    'gray9': '#171717',
+    'gray90': '#e5e5e5',
+    'gray91': '#e8e8e8',
+    'gray92': '#ebebeb',
+    'gray93': '#ededed',
+    'gray94': '#f0f0f0',
+    'gray95': '#f2f2f2',
+    'gray96': '#f5f5f5',
+    'gray97': '#f7f7f7',
+    'gray98': '#fafafa',
+    'gray99': '#fcfcfc',
+    'green': '#adff2f',
+    'green1': '#00ff00',
+    'green2': '#00ee00',
+    'green3': '#00cd00',
+    'green4': '#008b00',
+    'greenyellow': '#adff2f',
+    'grey': '#bebebe',
+    'grey0': '#000000',
+    'grey1': '#030303',
+    'grey10': '#1a1a1a',
+    'grey100': '#ffffff',
+    'grey11': '#1c1c1c',
+    'grey12': '#1f1f1f',
+    'grey13': '#212121',
+    'grey14': '#242424',
+    'grey15': '#262626',
+    'grey16': '#292929',
+    'grey17': '#2b2b2b',
+    'grey18': '#2e2e2e',
+    'grey19': '#303030',
+    'grey2': '#050505',
+    'grey20': '#333333',
+    'grey21': '#363636',
+    'grey22': '#383838',
+    'grey23': '#3b3b3b',
+    'grey24': '#3d3d3d',
+    'grey25': '#404040',
+    'grey26': '#424242',
+    'grey27': '#454545',
+    'grey28': '#474747',
+    'grey29': '#4a4a4a',
+    'grey3': '#080808',
+    'grey30': '#4d4d4d',
+    'grey31': '#4f4f4f',
+    'grey32': '#525252',
+    'grey33': '#545454',
+    'grey34': '#575757',
+    'grey35': '#595959',
+    'grey36': '#5c5c5c',
+    'grey37': '#5e5e5e',
+    'grey38': '#616161',
+    'grey39': '#636363',
+    'grey4': '#0a0a0a',
+    'grey40': '#666666',
+    'grey41': '#696969',
+    'grey42': '#6b6b6b',
+    'grey43': '#6e6e6e',
+    'grey44': '#707070',
+    'grey45': '#737373',
+    'grey46': '#757575',
+    'grey47': '#787878',
+    'grey48': '#7a7a7a',
+    'grey49': '#7d7d7d',
+    'grey5': '#0d0d0d',
+    'grey50': '#7f7f7f',
+    'grey51': '#828282',
+    'grey52': '#858585',
+    'grey53': '#878787',
+    'grey54': '#8a8a8a',
+    'grey55': '#8c8c8c',
+    'grey56': '#8f8f8f',
+    'grey57': '#919191',
+    'grey58': '#949494',
+    'grey59': '#969696',
+    'grey6': '#0f0f0f',
+    'grey60': '#999999',
+    'grey61': '#9c9c9c',
+    'grey62': '#9e9e9e',
+    'grey63': '#a1a1a1',
+    'grey64': '#a3a3a3',
+    'grey65': '#a6a6a6',
+    'grey66': '#a8a8a8',
+    'grey67': '#ababab',
+    'grey68': '#adadad',
+    'grey69': '#b0b0b0',
+    'grey7': '#121212',
+    'grey70': '#b3b3b3',
+    'grey71': '#b5b5b5',
+    'grey72': '#b8b8b8',
+    'grey73': '#bababa',
+    'grey74': '#bdbdbd',
+    'grey75': '#bfbfbf',
+    'grey76': '#c2c2c2',
+    'grey77': '#c4c4c4',
+    'grey78': '#c7c7c7',
+    'grey79': '#c9c9c9',
+    'grey8': '#141414',
+    'grey80': '#cccccc',
+    'grey81': '#cfcfcf',
+    'grey82': '#d1d1d1',
+    'grey83': '#d4d4d4',
+    'grey84': '#d6d6d6',
+    'grey85': '#d9d9d9',
+    'grey86': '#dbdbdb',
+    'grey87': '#dedede',
+    'grey88': '#e0e0e0',
+    'grey89': '#e3e3e3',
+    'grey9': '#171717',
+    'grey90': '#e5e5e5',
+    'grey91': '#e8e8e8',
+    'grey92': '#ebebeb',
+    'grey93': '#ededed',
+    'grey94': '#f0f0f0',
+    'grey95': '#f2f2f2',
+    'grey96': '#f5f5f5',
+    'grey97': '#f7f7f7',
+    'grey98': '#fafafa',
+    'grey99': '#fcfcfc',
+    'honeydew': '#f0fff0',
+    'honeydew1': '#f0fff0',
+    'honeydew2': '#e0eee0',
+    'honeydew3': '#c1cdc1',
+    'honeydew4': '#838b83',
+    'hot': '#ff69b4',
+    'hotpink': '#ff69b4',
+    'hotpink1': '#ff6eb4',
+    'hotpink2': '#ee6aa7',
+    'hotpink3': '#cd6090',
+    'hotpink4': '#8b3a62',
+    'indian': '#cd5c5c',
+    'indianred': '#cd5c5c',
+    'indianred1': '#ff6a6a',
+    'indianred2': '#ee6363',
+    'indianred3': '#cd5555',
+    'indianred4': '#8b3a3a',
+    'ivory': '#fffff0',
+    'ivory1': '#fffff0',
+    'ivory2': '#eeeee0',
+    'ivory3': '#cdcdc1',
+    'ivory4': '#8b8b83',
+    'khaki': '#f0e68c',
+    'khaki1': '#fff68f',
+    'khaki2': '#eee685',
+    'khaki3': '#cdc673',
+    'khaki4': '#8b864e',
+    'lavender': '#fff0f5',
+    'lavenderblush': '#fff0f5',
+    'lavenderblush1': '#fff0f5',
+    'lavenderblush2': '#eee0e5',
+    'lavenderblush3': '#cdc1c5',
+    'lavenderblush4': '#8b8386',
+    'lawn': '#7cfc00',
+    'lawngreen': '#7cfc00',
+    'lemon': '#fffacd',
+    'lemonchiffon': '#fffacd',
+    'lemonchiffon1': '#fffacd',
+    'lemonchiffon2': '#eee9bf',
+    'lemonchiffon3': '#cdc9a5',
+    'lemonchiffon4': '#8b8970',
+    'light': '#90ee90',
+    'lightblue': '#add8e6',
+    'lightblue1': '#bfefff',
+    'lightblue2': '#b2dfee',
+    'lightblue3': '#9ac0cd',
+    'lightblue4': '#68838b',
+    'lightcoral': '#f08080',
+    'lightcyan': '#e0ffff',
+    'lightcyan1': '#e0ffff',
+    'lightcyan2': '#d1eeee',
+    'lightcyan3': '#b4cdcd',
+    'lightcyan4': '#7a8b8b',
+    'lightgoldenrod': '#eedd82',
+    'lightgoldenrod1': '#ffec8b',
+    'lightgoldenrod2': '#eedc82',
+    'lightgoldenrod3': '#cdbe70',
+    'lightgoldenrod4': '#8b814c',
+    'lightgoldenrodyellow': '#fafad2',
+    'lightgray': '#d3d3d3',
+    'lightgreen': '#90ee90',
+    'lightgrey': '#d3d3d3',
+    'lightpink': '#ffb6c1',
+    'lightpink1': '#ffaeb9',
+    'lightpink2': '#eea2ad',
+    'lightpink3': '#cd8c95',
+    'lightpink4': '#8b5f65',
+    'lightsalmon': '#ffa07a',
+    'lightsalmon1': '#ffa07a',
+    'lightsalmon2': '#ee9572',
+    'lightsalmon3': '#cd8162',
+    'lightsalmon4': '#8b5742',
+    'lightseagreen': '#20b2aa',
+    'lightskyblue': '#87cefa',
+    'lightskyblue1': '#b0e2ff',
+    'lightskyblue2': '#a4d3ee',
+    'lightskyblue3': '#8db6cd',
+    'lightskyblue4': '#607b8b',
+    'lightslateblue': '#8470ff',
+    'lightslategray': '#778899',
+    'lightslategrey': '#778899',
+    'lightsteelblue': '#b0c4de',
+    'lightsteelblue1': '#cae1ff',
+    'lightsteelblue2': '#bcd2ee',
+    'lightsteelblue3': '#a2b5cd',
+    'lightsteelblue4': '#6e7b8b',
+    'lightyellow': '#ffffe0',
+    'lightyellow1': '#ffffe0',
+    'lightyellow2': '#eeeed1',
+    'lightyellow3': '#cdcdb4',
+    'lightyellow4': '#8b8b7a',
+    'lime': '#32cd32',
+    'limegreen': '#32cd32',
+    'linen': '#faf0e6',
+    'magenta': '#ff00ff',
+    'magenta1': '#ff00ff',
+    'magenta2': '#ee00ee',
+    'magenta3': '#cd00cd',
+    'magenta4': '#8b008b',
+    'maroon': '#b03060',
+    'maroon1': '#ff34b3',
+    'maroon2': '#ee30a7',
+    'maroon3': '#cd2990',
+    'maroon4': '#8b1c62',
+    'medium': '#9370db',
+    'mediumaquamarine': '#66cdaa',
+    'mediumblue': '#0000cd',
+    'mediumorchid': '#ba55d3',
+    'mediumorchid1': '#e066ff',
+    'mediumorchid2': '#d15fee',
+    'mediumorchid3': '#b452cd',
+    'mediumorchid4': '#7a378b',
+    'mediumpurple': '#9370db',
+    'mediumpurple1': '#ab82ff',
+    'mediumpurple2': '#9f79ee',
+    'mediumpurple3': '#8968cd',
+    'mediumpurple4': '#5d478b',
+    'mediumseagreen': '#3cb371',
+    'mediumslateblue': '#7b68ee',
+    'mediumspringgreen': '#00fa9a',
+    'mediumturquoise': '#48d1cc',
+    'mediumvioletred': '#c71585',
+    'midnight': '#191970',
+    'midnightblue': '#191970',
+    'mint': '#f5fffa',
+    'mintcream': '#f5fffa',
+    'misty': '#ffe4e1',
+    'mistyrose': '#ffe4e1',
+    'mistyrose1': '#ffe4e1',
+    'mistyrose2': '#eed5d2',
+    'mistyrose3': '#cdb7b5',
+    'mistyrose4': '#8b7d7b',
+    'moccasin': '#ffe4b5',
+    'navajo': '#ffdead',
+    'navajowhite': '#ffdead',
+    'navajowhite1': '#ffdead',
+    'navajowhite2': '#eecfa1',
+    'navajowhite3': '#cdb38b',
+    'navajowhite4': '#8b795e',
+    'navy': '#000080',
+    'navyblue': '#000080',
+    'old': '#fdf5e6',
+    'oldlace': '#fdf5e6',
+    'olive': '#6b8e23',
+    'olivedrab': '#6b8e23',
+    'olivedrab1': '#c0ff3e',
+    'olivedrab2': '#b3ee3a',
+    'olivedrab3': '#9acd32',
+    'olivedrab4': '#698b22',
+    'orange': '#ff4500',
+    'orange1': '#ffa500',
+    'orange2': '#ee9a00',
+    'orange3': '#cd8500',
+    'orange4': '#8b5a00',
+    'orangered': '#ff4500',
+    'orangered1': '#ff4500',
+    'orangered2': '#ee4000',
+    'orangered3': '#cd3700',
+    'orangered4': '#8b2500',
+    'orchid': '#da70d6',
+    'orchid1': '#ff83fa',
+    'orchid2': '#ee7ae9',
+    'orchid3': '#cd69c9',
+    'orchid4': '#8b4789',
+    'pale': '#db7093',
+    'palegoldenrod': '#eee8aa',
+    'palegreen': '#98fb98',
+    'palegreen1': '#9aff9a',
+    'palegreen2': '#90ee90',
+    'palegreen3': '#7ccd7c',
+    'palegreen4': '#548b54',
+    'paleturquoise': '#afeeee',
+    'paleturquoise1': '#bbffff',
+    'paleturquoise2': '#aeeeee',
+    'paleturquoise3': '#96cdcd',
+    'paleturquoise4': '#668b8b',
+    'palevioletred': '#db7093',
+    'palevioletred1': '#ff82ab',
+    'palevioletred2': '#ee799f',
+    'palevioletred3': '#cd6889',
+    'palevioletred4': '#8b475d',
+    'papaya': '#ffefd5',
+    'papayawhip': '#ffefd5',
+    'peach': '#ffdab9',
+    'peachpuff': '#ffdab9',
+    'peachpuff1': '#ffdab9',
+    'peachpuff2': '#eecbad',
+    'peachpuff3': '#cdaf95',
+    'peachpuff4': '#8b7765',
+    'peru': '#cd853f',
+    'pink': '#ffc0cb',
+    'pink1': '#ffb5c5',
+    'pink2': '#eea9b8',
+    'pink3': '#cd919e',
+    'pink4': '#8b636c',
+    'plum': '#dda0dd',
+    'plum1': '#ffbbff',
+    'plum2': '#eeaeee',
+    'plum3': '#cd96cd',
+    'plum4': '#8b668b',
+    'powder': '#b0e0e6',
+    'powderblue': '#b0e0e6',
+    'purple': '#a020f0',
+    'purple1': '#9b30ff',
+    'purple2': '#912cee',
+    'purple3': '#7d26cd',
+    'purple4': '#551a8b',
+    'red': '#ff0000',
+    'red1': '#ff0000',
+    'red2': '#ee0000',
+    'red3': '#cd0000',
+    'red4': '#8b0000',
+    'rosy': '#bc8f8f',
+    'rosybrown': '#bc8f8f',
+    'rosybrown1': '#ffc1c1',
+    'rosybrown2': '#eeb4b4',
+    'rosybrown3': '#cd9b9b',
+    'rosybrown4': '#8b6969',
+    'royal': '#4169e1',
+    'royalblue': '#4169e1',
+    'royalblue1': '#4876ff',
+    'royalblue2': '#436eee',
+    'royalblue3': '#3a5fcd',
+    'royalblue4': '#27408b',
+    'saddle': '#8b4513',
+    'saddlebrown': '#8b4513',
+    'salmon': '#fa8072',
+    'salmon1': '#ff8c69',
+    'salmon2': '#ee8262',
+    'salmon3': '#cd7054',
+    'salmon4': '#8b4c39',
+    'sandy': '#f4a460',
+    'sandybrown': '#f4a460',
+    'sea': '#2e8b57',
+    'seagreen': '#2e8b57',
+    'seagreen1': '#54ff9f',
+    'seagreen2': '#4eee94',
+    'seagreen3': '#43cd80',
+    'seagreen4': '#2e8b57',
+    'seashell': '#fff5ee',
+    'seashell1': '#fff5ee',
+    'seashell2': '#eee5de',
+    'seashell3': '#cdc5bf',
+    'seashell4': '#8b8682',
+    'sienna': '#a0522d',
+    'sienna1': '#ff8247',
+    'sienna2': '#ee7942',
+    'sienna3': '#cd6839',
+    'sienna4': '#8b4726',
+    'sky': '#87ceeb',
+    'skyblue': '#87ceeb',
+    'skyblue1': '#87ceff',
+    'skyblue2': '#7ec0ee',
+    'skyblue3': '#6ca6cd',
+    'skyblue4': '#4a708b',
+    'slate': '#6a5acd',
+    'slateblue': '#6a5acd',
+    'slateblue1': '#836fff',
+    'slateblue2': '#7a67ee',
+    'slateblue3': '#6959cd',
+    'slateblue4': '#473c8b',
+    'slategray': '#708090',
+    'slategray1': '#c6e2ff',
+    'slategray2': '#b9d3ee',
+    'slategray3': '#9fb6cd',
+    'slategray4': '#6c7b8b',
+    'slategrey': '#708090',
+    'snow': '#fffafa',
+    'snow1': '#fffafa',
+    'snow2': '#eee9e9',
+    'snow3': '#cdc9c9',
+    'snow4': '#8b8989',
+    'spring': '#00ff7f',
+    'springgreen': '#00ff7f',
+    'springgreen1': '#00ff7f',
+    'springgreen2': '#00ee76',
+    'springgreen3': '#00cd66',
+    'springgreen4': '#008b45',
+    'steel': '#4682b4',
+    'steelblue': '#4682b4',
+    'steelblue1': '#63b8ff',
+    'steelblue2': '#5cacee',
+    'steelblue3': '#4f94cd',
+    'steelblue4': '#36648b',
+    'tan': '#d2b48c',
+    'tan1': '#ffa54f',
+    'tan2': '#ee9a49',
+    'tan3': '#cd853f',
+    'tan4': '#8b5a2b',
+    'thistle': '#d8bfd8',
+    'thistle1': '#ffe1ff',
+    'thistle2': '#eed2ee',
+    'thistle3': '#cdb5cd',
+    'thistle4': '#8b7b8b',
+    'tomato': '#ff6347',
+    'tomato1': '#ff6347',
+    'tomato2': '#ee5c42',
+    'tomato3': '#cd4f39',
+    'tomato4': '#8b3626',
+    'turquoise': '#40e0d0',
+    'turquoise1': '#00f5ff',
+    'turquoise2': '#00e5ee',
+    'turquoise3': '#00c5cd',
+    'turquoise4': '#00868b',
+    'violet': '#ee82ee',
+    'violetred': '#d02090',
+    'violetred1': '#ff3e96',
+    'violetred2': '#ee3a8c',
+    'violetred3': '#cd3278',
+    'violetred4': '#8b2252',
+    'wheat': '#f5deb3',
+    'wheat1': '#ffe7ba',
+    'wheat2': '#eed8ae',
+    'wheat3': '#cdba96',
+    'wheat4': '#8b7e66',
+    'white': '#ffffff',
+    'whitesmoke': '#f5f5f5',
+    'yellow': '#ffff00',
+    'yellow1': '#ffff00',
+    'yellow2': '#eeee00',
+    'yellow3': '#cdcd00',
+    'yellow4': '#8b8b00',
+    'yellowgreen': '#9acd32'
+}
+
+TOKENS = {
+    'normal':           '',
+    'string':           'String',
+    'number':           'Number',
+    'float':            'Number.Float',
+    'constant':         'Name.Constant',
+    'number':           'Number',
+    'statement':        ('Keyword', 'Name.Tag'),
+    'identifier':       'Name.Variable',
+    'operator':         'Operator.Word',
+    'label':            'Name.Label',
+    'exception':        'Name.Exception',
+    'function':         ('Name.Function', 'Name.Attribute'),
+    'preproc':          'Comment.Preproc',
+    'comment':          'Comment',
+    'type':             'Keyword.Type',
+    'diffadd':          'Generic.Inserted',
+    'diffdelete':       'Generic.Deleted',
+    'error':            'Generic.Error',
+    'errormsg':         'Generic.Traceback',
+    'title':            ('Generic.Heading', 'Generic.Subheading'),
+    'underlined':       'Generic.Emph',
+    'special':          'Name.Entity',
+    'nontext':          'Generic.Output'
+}
+
+TOKEN_TYPES = set()
+for token in TOKENS.values():
+    if not isinstance(token, tuple):
+        token = (token,)
+    for token in token:
+        if token:
+            TOKEN_TYPES.add(token.split('.')[0])
+
+
+def get_vim_color(color):
+    if color.startswith('#'):
+        if len(color) == 7:
+            return color
+        else:
+            return '#%s0' % '0'.join(color)[1:]
+    return COLORS.get(color.lower())
+
+
+def find_colors(code):
+    colors = {'Normal': {}}
+    bg_color = None
+    def set(attrib, value):
+        if token not in colors:
+            colors[token] = {}
+        if key.startswith('gui') or attrib not in colors[token]:
+            colors[token][attrib] = value
+
+    for line in code.splitlines():
+        if line.startswith('"'):
+            continue
+        parts = split_re.split(line.strip())
+        if len(parts) == 2 and parts[0] == 'set':
+            p = parts[1].split()
+            if p[0] == 'background' and p[1] == 'dark':
+                token = 'Normal'
+                bg_color = '#000000'
+        elif len(parts) > 2 and \
+           len(parts[0]) >= 2 and \
+           'highlight'.startswith(parts[0]):
+            token = parts[1].lower()
+            if token not in TOKENS:
+                continue
+            for item in parts[2:]:
+                p = item.split('=', 1)
+                if not len(p) == 2:
+                    continue
+                key, value = p
+                if key in ('ctermfg', 'guifg'):
+                    color = get_vim_color(value)
+                    if color:
+                        set('color', color)
+                elif key in ('ctermbg', 'guibg'):
+                    color = get_vim_color(value)
+                    if color:
+                        set('bgcolor', color)
+                elif key in ('term', 'cterm', 'gui'):
+                    items = value.split(',')
+                    for item in items:
+                        item = item.lower()
+                        if item == 'none':
+                            set('noinherit', True)
+                        elif item == 'bold':
+                            set('bold', True)
+                        elif item == 'underline':
+                            set('underline', True)
+                        elif item == 'italic':
+                            set('italic', True)
+
+    if bg_color is not None and not colors['Normal'].get('bgcolor'):
+        colors['Normal']['bgcolor'] = bg_color
+
+    color_map = {}
+    for token, styles in colors.items():
+        if token in TOKENS:
+            tmp = []
+            if styles.get('noinherit'):
+                tmp.append('noinherit')
+            if 'color' in styles:
+                tmp.append(styles['color'])
+            if 'bgcolor' in styles:
+                tmp.append('bg:' + styles['bgcolor'])
+            if styles.get('bold'):
+                tmp.append('bold')
+            if styles.get('italic'):
+                tmp.append('italic')
+            if styles.get('underline'):
+                tmp.append('underline')
+            tokens = TOKENS[token]
+            if not isinstance(tokens, tuple):
+                tokens = (tokens,)
+            for token in tokens:
+                color_map[token] = ' '.join(tmp)
+
+    default_token = color_map.pop('')
+    return default_token, color_map
+
+
+class StyleWriter:
+
+    def __init__(self, code, name):
+        self.code = code
+        self.name = name.lower()
+
+    def write_header(self, out):
+        out.write('# -*- coding: utf-8 -*-\n"""\n')
+        out.write('    %s Colorscheme\n' % self.name.title())
+        out.write('    %s\n\n' % ('~' * (len(self.name) + 12)))
+        out.write('    Converted by %s\n' % SCRIPT_NAME)
+        out.write('"""\nfrom pygments.style import Style\n')
+        out.write('from pygments.token import Token, %s\n\n' % ', '.join(TOKEN_TYPES))
+        out.write('class %sStyle(Style):\n\n' % self.name.title())
+
+    def write(self, out):
+        self.write_header(out)
+        default_token, tokens = find_colors(self.code)
+        tokens = list(tokens.items())
+        tokens.sort(lambda a, b: cmp(len(a[0]), len(a[1])))
+        bg_color = [x[3:] for x in default_token.split() if x.startswith('bg:')]
+        if bg_color:
+            out.write('    background_color = %r\n' % bg_color[0])
+        out.write('    styles = {\n')
+        out.write('        %-20s%r,\n' % ('Token:', default_token))
+        for token, definition in tokens:
+            if definition:
+                out.write('        %-20s%r,\n' % (token + ':', definition))
+        out.write('    }')
+
+    def __repr__(self):
+        out = StringIO()
+        self.write_style(out)
+        return out.getvalue()
+
+
+def convert(filename, stream=None):
+    name = path.basename(filename)
+    if name.endswith('.vim'):
+        name = name[:-4]
+    f = file(filename)
+    code = f.read()
+    f.close()
+    writer = StyleWriter(code, name)
+    if stream is not None:
+        out = stream
+    else:
+        out = StringIO()
+    writer.write(out)
+    if stream is None:
+        return out.getvalue()
+
+
+def main():
+    if len(sys.argv) != 2 or sys.argv[1] in ('-h', '--help'):
+        print('Usage: %s <filename.vim>' % sys.argv[0])
+        return 2
+    if sys.argv[1] in ('-v', '--version'):
+        print('%s %s' % (SCRIPT_NAME, SCRIPT_VERSION))
+        return
+    filename = sys.argv[1]
+    if not (path.exists(filename) and path.isfile(filename)):
+        print('Error: %s not found' % filename)
+        return 1
+    convert(filename, sys.stdout)
+    sys.stdout.write('\n')
+
+
+if __name__ == '__main__':
+    sys.exit(main() or 0)
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 11:33:32 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 11:33:32 +0000
commit	1f403ad2197fc7442409f434ee574f3e6b46fb73 (patch)
tree	0299c6dd11d5edfa918a29b6456bc1875f1d288c /scripts
parent	Initial commit. (diff)
download	pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.tar.xz pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.zip