summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 11:33:32 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 11:33:32 +0000
commit1f403ad2197fc7442409f434ee574f3e6b46fb73 (patch)
tree0299c6dd11d5edfa918a29b6456bc1875f1d288c /scripts
parentInitial commit. (diff)
downloadpygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.tar.xz
pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.zip
Adding upstream version 2.14.0+dfsg.upstream/2.14.0+dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'scripts')
-rw-r--r--scripts/check_crlf.py32
-rwxr-xr-xscripts/check_repeated_token.py77
-rwxr-xr-xscripts/check_sources.py201
-rw-r--r--scripts/check_whitespace_token.py52
-rwxr-xr-xscripts/count_token_references.py270
-rwxr-xr-xscripts/debug_lexer.py306
-rw-r--r--scripts/detect_missing_analyse_text.py48
-rw-r--r--scripts/gen_mapfiles.py53
-rw-r--r--scripts/get_css_properties.py33
-rw-r--r--scripts/get_vimkw.py72
-rw-r--r--scripts/pylintrc301
-rw-r--r--scripts/release-checklist24
-rwxr-xr-xscripts/update_contrasts.py21
-rw-r--r--scripts/utility.py69
-rwxr-xr-xscripts/vim2pygments.py932
15 files changed, 2491 insertions, 0 deletions
diff --git a/scripts/check_crlf.py b/scripts/check_crlf.py
new file mode 100644
index 0000000..c03b68d
--- /dev/null
+++ b/scripts/check_crlf.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+"""
+ Checker for line endings
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Make sure Python (.py) and Bash completion (.bashcomp) files do not
+ contain CR/LF newlines.
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import sys
+import os
+
+if __name__ == '__main__':
+ for directory in sys.argv[1:]:
+ if not os.path.exists(directory):
+ continue
+
+ for root, dirs, files in os.walk(directory):
+ for filename in files:
+ if not filename.endswith('.py') and not filename.endswith('.bashcomp'):
+ continue
+
+ full_path = os.path.join(root, filename)
+ with open(full_path, 'rb') as f:
+ if b'\r\n' in f.read():
+ print('CR/LF found in', full_path)
+ sys.exit(1)
+
+ sys.exit(0)
diff --git a/scripts/check_repeated_token.py b/scripts/check_repeated_token.py
new file mode 100755
index 0000000..1636281
--- /dev/null
+++ b/scripts/check_repeated_token.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+"""
+ Checker for repeated tokens
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Helper script to find suspicious lexers which produce the same token
+ repeatedly, i.e. for example:
+
+ .. code::
+
+ 'd' Text
+ 'a' Text
+ 't' Text
+ 'a' Text
+ 'b' Text
+ 'a' Text
+ 's' Text
+ 'e' Text
+
+ This script has two test modes: Check for tokens repeating more often than
+ a given threshold, and exclude anything but single-character tokens.
+ Repeated single-character tokens are quite problematic as they result in
+ bloated output and are usually an indication that someone is missing
+ a + or * in the regex.
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+import argparse
+import sys
+
+from utility import unpack_output_file, process_output_files
+
+
+def check_file(path, threshold, single_only):
+ current_token = ''
+ current_token_repeat_count = 1
+
+ for value, token, linenumber in unpack_output_file(path):
+ if single_only and len(value) > 1:
+ token = ''
+ current_token_repeat_count = 1
+ continue
+
+ if token != current_token:
+ current_token = token
+ current_token_repeat_count = 1
+ else:
+ current_token_repeat_count += 1
+
+ if current_token_repeat_count > threshold:
+ print(f'{path}:{linenumber}')
+ return False
+
+ return True
+
+
+def main(args):
+ def check_file_callback(path):
+ return check_file(path, args.threshold, args.single)
+
+ if process_output_files(args.TEST_ROOT, check_file_callback) > 0:
+ return 1
+ return 0
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('TEST_ROOT',
+ help='Root directory containing the tests')
+ parser.add_argument('-t', '--threshold', type=int, default=5,
+ help='Warn if a token repeats itself more often then '
+ 'this number.')
+ parser.add_argument('-s', '--single', action='store_true', default=False,
+ help='Only look at tokens matching a single character')
+ args = parser.parse_args()
+ sys.exit(main(args))
diff --git a/scripts/check_sources.py b/scripts/check_sources.py
new file mode 100755
index 0000000..1feb1a3
--- /dev/null
+++ b/scripts/check_sources.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python
+"""
+ Checker for file headers
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Make sure each Python file has a correct file header
+ including copyright and license information.
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import io
+import os
+import re
+import sys
+import getopt
+from os.path import join, splitext, abspath
+
+
+checkers = {}
+
+
+def checker(*suffixes, **kwds):
+ only_pkg = kwds.pop('only_pkg', False)
+
+ def deco(func):
+ for suffix in suffixes:
+ checkers.setdefault(suffix, []).append(func)
+ func.only_pkg = only_pkg
+ return func
+ return deco
+
+
+name_mail_re = r'[\w ]+(<.*?>)?'
+copyright_re = re.compile(r'^ :copyright: Copyright 2006-2022 by '
+ r'the Pygments team, see AUTHORS\.$')
+copyright_2_re = re.compile(r'^ %s(, %s)*[,.]$' %
+ (name_mail_re, name_mail_re))
+is_const_re = re.compile(r'if.*?==\s+(None|False|True)\b')
+
+misspellings = ["developement", "adress", "verificate", # ALLOW-MISSPELLING
+ "informations", "unlexer"] # ALLOW-MISSPELLING
+
+
+@checker('.py')
+def check_syntax(fn, lines):
+ if not lines:
+ yield 0, "empty file"
+ return
+ if '#!/' in lines[0]:
+ lines = lines[1:]
+ try:
+ compile('\n'.join(lines), fn, "exec")
+ except SyntaxError as err:
+ yield 0, "not compilable: %s" % err
+
+
+@checker('.py')
+def check_style_and_encoding(fn, lines):
+ for lno, line in enumerate(lines):
+ if is_const_re.search(line):
+ yield lno+1, 'using == None/True/False'
+
+
+@checker('.py', only_pkg=True)
+def check_fileheader(fn, lines):
+ # line number correction
+ c = 1
+ if lines[0:1] == ['#!/usr/bin/env python']:
+ lines = lines[1:]
+ c = 2
+
+ llist = []
+ docopen = False
+ for lno, line in enumerate(lines):
+ llist.append(line)
+ if lno == 0:
+ if line != '"""' and line != 'r"""':
+ yield 2, f'missing docstring begin ("""), found {line!r}'
+ else:
+ docopen = True
+ elif docopen:
+ if line == '"""':
+ # end of docstring
+ if lno <= 3:
+ yield lno+c, "missing module name in docstring"
+ break
+
+ if line != "" and line[:4] != ' ' and docopen:
+ yield lno+c, "missing correct docstring indentation"
+
+ if lno == 1:
+ # if not in package, don't check the module name
+ modname = fn[:-3].replace('/', '.').replace('.__init__', '')
+ while modname:
+ if line.lower()[4:] == modname:
+ break
+ modname = '.'.join(modname.split('.')[1:])
+ else:
+ yield 3, "wrong module name in docstring heading"
+ modnamelen = len(line.strip())
+ elif lno == 2:
+ if line.strip() != modnamelen * "~":
+ yield 4, "wrong module name underline, should be ~~~...~"
+
+ else:
+ yield 0, "missing end and/or start of docstring..."
+
+ # check for copyright and license fields
+ license = llist[-2:-1]
+ if license != [" :license: BSD, see LICENSE for details."]:
+ yield 0, "no correct license info"
+
+ ci = -3
+ copyright = llist[ci:ci+1]
+ while copyright and copyright_2_re.match(copyright[0]):
+ ci -= 1
+ copyright = llist[ci:ci+1]
+ if not copyright or not copyright_re.match(copyright[0]):
+ yield 0, "no correct copyright info"
+
+
+def main(argv):
+ try:
+ gopts, args = getopt.getopt(argv[1:], "vi:")
+ except getopt.GetoptError:
+ print("Usage: %s [-v] [-i ignorepath]* [path]" % argv[0])
+ return 2
+ opts = {}
+ for opt, val in gopts:
+ if opt == '-i':
+ val = abspath(val)
+ opts.setdefault(opt, []).append(val)
+
+ if len(args) == 0:
+ path = '.'
+ elif len(args) == 1:
+ path = args[0]
+ else:
+ print("Usage: %s [-v] [-i ignorepath]* [path]" % argv[0])
+ return 2
+
+ verbose = '-v' in opts
+
+ num = 0
+ out = io.StringIO()
+
+ for root, dirs, files in os.walk(path):
+ for excl in ['.tox', '.git', 'examplefiles']:
+ if excl in dirs:
+ dirs.remove(excl)
+ if '-i' in opts and abspath(root) in opts['-i']:
+ del dirs[:]
+ continue
+ # XXX: awkward: for the Makefile call: don't check non-package
+ # files for file headers
+ in_pygments_pkg = root.startswith('./pygments')
+ for fn in files:
+
+ fn = join(root, fn)
+ if fn[:2] == './':
+ fn = fn[2:]
+
+ if '-i' in opts and abspath(fn) in opts['-i']:
+ continue
+
+ ext = splitext(fn)[1]
+ checkerlist = checkers.get(ext, None)
+ if not checkerlist:
+ continue
+
+ if verbose:
+ print("Checking %s..." % fn)
+
+ try:
+ with open(fn, 'rb') as f:
+ lines = f.read().decode('utf-8').splitlines()
+ except OSError as err:
+ print("%s: cannot open: %s" % (fn, err))
+ num += 1
+ continue
+
+ for checker in checkerlist:
+ if not in_pygments_pkg and checker.only_pkg:
+ continue
+ for lno, msg in checker(fn, lines):
+ print('%s:%d: %s' % (fn, lno, msg), file=out)
+ num += 1
+ if verbose:
+ print()
+ if num == 0:
+ print("No errors found.")
+ else:
+ print(out.getvalue().rstrip('\n'))
+ print("%d error%s found." % (num, num > 1 and "s" or ""))
+ return int(num > 0)
+
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv))
diff --git a/scripts/check_whitespace_token.py b/scripts/check_whitespace_token.py
new file mode 100644
index 0000000..f5d0970
--- /dev/null
+++ b/scripts/check_whitespace_token.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+"""
+ Checker for whitespace tokens
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Helper script to find whitespace which is not of token type `Whitespace`
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+import argparse
+import sys
+import re
+
+from utility import unpack_output_file, process_output_files
+
+
+def check_file(path):
+ whitespace_re = re.compile('\s+')
+
+ for value, token, linenumber in unpack_output_file(path):
+ if whitespace_re.fullmatch(value):
+ # We allow " " if it's inside a Literal.String for example
+ if 'Literal' in token:
+ continue
+
+ # If whitespace is part of a comment, we accept that as well,
+ # as comments may be similarly highlighted to literals
+ if 'Comment' in token:
+ continue
+
+ if 'Whitespace' in token:
+ continue
+
+ print(f'{path}:{linenumber}')
+ return False
+
+ return True
+
+
+def main(args):
+ if process_output_files(args.TEST_ROOT, check_file) > 0:
+ return 1
+ return 0
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('TEST_ROOT',
+ help='Root directory containing the tests')
+ args = parser.parse_args()
+ sys.exit(main(args))
diff --git a/scripts/count_token_references.py b/scripts/count_token_references.py
new file mode 100755
index 0000000..8e798c2
--- /dev/null
+++ b/scripts/count_token_references.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python
+"""
+Count number of references to tokens in lexer source
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+:program:`count_token_references` counts how many references to all existing
+tokens it can find by "grepping" the the source code of the lexers. This can
+be used to find typos in token names, as those tokens are only used by one lexer.
+
+:program:`count_token_references` supports the following options:
+
+.. program:: count_token_references
+
+.. option:: -v, --verbose
+ This gives output while the script is collecting information.
+
+.. option:: --minfiles <COUNT>
+ Only report about tokens that are referenced in at least this many lexer
+ source files (default 1).
+
+.. option:: --maxfiles <COUNT>
+ Only report about tokens that are referenced in at most this many lexer
+ source files (default 1).
+
+.. option:: --minlines <COUNT>
+ Only report about tokens that are referenced in at least this many lexer
+ source lines (default 1).
+
+.. option:: --maxlines <COUNT>
+ Only report about tokens that are referenced in at most this many lexer
+ source lines (default 10).
+
+.. option:: -s, --subtokens
+ When ``--subtoken`` is given each token is also counted for each of its
+ parent tokens. I.e. if we have 10 occurrences of the token
+ ``Token.Literal.Number.Integer`` and 10 occurrences of the token
+ ``Token.Literal.Number.Hex`` but none for ``Token.Literal.Number``, with
+ ``--subtoken`` ``Token.Literal.Number`` would be counted as having
+ 20 references.
+"""
+
+import sys, argparse, re, pathlib
+
+from pygments import token, lexers
+
+
+def lookup_all_lexers():
+ """
+ Iterate through all lexers and fetch them.
+ This should create all tokens that any of the lexers produce.
+ """
+ count = 0
+ for (name, aliases, patterns, mimetypes) in lexers.get_all_lexers():
+ for a in aliases:
+ l = lexers.get_lexer_by_name(a)
+ break
+ else:
+ for p in patterns:
+ l = lexers.get_lexer_for_filename(p)
+ break
+ else:
+ for m in mimetypes:
+ l = lexers.get_lexer_for_mimetype(m)
+ break
+ count += 1
+ return count
+
+
+def fetch_lexer_sources():
+ """
+ Return the source code of all lexers as a dictionary, mapping filenames
+ to a list of lines.
+ """
+ lexer_dir = (pathlib.Path(__file__).parent / "../pygments/lexers").resolve()
+ lexer_sources = {
+ fn: fn.read_text(encoding='utf-8').splitlines(keepends=False)
+ for fn in lexer_dir.glob("*.py")
+ }
+ return lexer_sources
+
+
+def sub_tokens(token):
+ """
+ Generator that yields a token and all of its sub-tokens recursively.
+ """
+ yield token
+ for subtoken in token.subtypes:
+ yield from sub_tokens(subtoken)
+
+
+class FileCount:
+ """
+ Stores information about line numbers in a file.
+
+ This is used to store from which lines in a files a certain token is
+ referenced.
+ """
+ def __init__(self, filename):
+ self.filename = filename
+ self.lines = []
+
+ def __str__(self):
+ if len(self.lines) > 3:
+ lines = ", ".join(f"{line:,}" for line in self.lines[:5])
+ lines = f"{lines}, ... ({len(lines):,} lines)"
+ else:
+ lines = ", ".join(f"{line:,}" for line in self.lines)
+ return f"{self.filename.name}[{lines}]"
+
+ def add(self, linenumber):
+ self.lines.append(linenumber)
+
+ def count_lines(self):
+ return len(self.lines)
+
+
+class TokenCount:
+ """
+ Stores information about a token and in which files it is referenced.
+ """
+ def __init__(self, token):
+ self.token = token
+ self.files = {}
+
+ def add(self, filename, linenumber):
+ if filename not in self.files:
+ self.files[filename] = FileCount(filename)
+ self.files[filename].add(linenumber)
+
+ def __str__(self):
+ if len(self.files) > 3:
+ files = []
+ for (i, filecount) in enumerate(self.files.values()):
+ files.append(str(filecount))
+ if i >= 5:
+ break
+ files = ", ".join(files) + f", ... ({len(self.files):,} files)"
+ else:
+ files = ", ".join(str(filecount) for filecount in self.files.values())
+ return f"{self.count_files():,} files, {self.count_lines():,} locations: {files}"
+
+ def count_files(self):
+ return len(self.files)
+
+ def count_lines(self):
+ return sum(fc.count_lines() for fc in self.files.values())
+
+
+def find_token_references(lexer_sources, args):
+ """
+ Find all references to all tokens in the source code of all lexers.
+
+ Note that this can't be 100% reliable, as it searches the source code for
+ certain patterns: It searches for the last two components of a token name,
+ i.e. to find references to the token ``Token.Literal.Number.Integer.Long``
+ it searches for the regular expression ``\\bInteger.Long\\b``. This
+ won't work reliably for top level token like ``Token.String`` since this
+ is often referred to as ``String``, but searching for ``\\bString\\b``
+ yields too many false positives.
+ """
+
+ # Maps token to :class:`TokenCount` objects.
+ token_references = {}
+
+ # Search for each token in each lexer source file and record in which file
+ # and in which line they are referenced
+ for t in sub_tokens(token.Token):
+ parts = list(t)[-2:]
+ if len(parts) == 0:
+ name = "Token"
+ elif len(parts) == 1:
+ name = f"Token.{parts[0]}"
+ else:
+ name = ".".join(parts)
+
+ token_references[t] = tokencount = TokenCount(t)
+
+ if name != "Token":
+ pattern = re.compile(f"\\b{name}\\b")
+
+ for (filename, sourcelines) in lexer_sources.items():
+ for (i, line) in enumerate(sourcelines, 1):
+ if pattern.search(line) is not None:
+ tokencount.add(filename, i)
+ if args.subtoken:
+ t2 = t
+ while t2 is not token.Token:
+ t2 = t2.parent
+ tokencount2 = token_references[t2]
+ tokencount2.add(filename, i)
+
+ return token_references
+
+
+def print_result(token_references, args):
+ def key(item):
+ return (item[1].count_files(), item[1].count_lines())
+
+ for (token, locations) in sorted(token_references.items(), key=key):
+ if args.minfiles <= locations.count_files() <= args.maxfiles and \
+ args.minlines <= locations.count_lines() <= args.maxlines:
+ print(f"{token}: {locations}")
+
+
+def main(args=None):
+ p = argparse.ArgumentParser(description="Count how often each token is used by the lexers")
+ p.add_argument(
+ "-v", "--verbose",
+ dest="verbose", help="Give more output.",
+ default=False, action="store_true"
+ )
+ p.add_argument(
+ "--minfiles",
+ dest="minfiles", metavar="COUNT", type=int,
+ help="Report all tokens referenced by at least COUNT lexer source files (default %(default)s)",
+ default=1
+ )
+ p.add_argument(
+ "--maxfiles",
+ dest="maxfiles", metavar="COUNT", type=int,
+ help="Report all tokens referenced by at most COUNT lexer source files (default %(default)s)",
+ default=1
+ )
+ p.add_argument(
+ "--minlines",
+ dest="minlines", metavar="COUNT", type=int,
+ help="Report all tokens referenced by at least COUNT lexer source lines (default %(default)s)",
+ default=1
+ )
+ p.add_argument(
+ "--maxlines",
+ dest="maxlines", metavar="COUNT", type=int,
+ help="Report all tokens referenced by at most COUNT lexer source lines (default %(default)s)",
+ default=10
+ )
+ p.add_argument(
+ "-s", "--subtoken",
+ dest="subtoken",
+ help="Include count of references to subtokens in the count for each token (default %(default)s)",
+ default=False, action="store_true"
+ )
+
+ args = p.parse_args(args)
+
+ if args.verbose:
+ print("Looking up all lexers ... ", end="", flush=True)
+ count = lookup_all_lexers()
+ if args.verbose:
+ print(f"found {count:,} lexers")
+
+ if args.verbose:
+ print("Fetching lexer source code ... ", end="", flush=True)
+ lexer_sources = fetch_lexer_sources()
+ if args.verbose:
+ print(f"found {len(lexer_sources):,} lexer source files")
+
+ if args.verbose:
+ print("Finding token references ... ", end="", flush=True)
+ token_references = find_token_references(lexer_sources, args)
+ if args.verbose:
+ print(f"found references to {len(token_references):,} tokens")
+
+ if args.verbose:
+ print()
+ print("Result:")
+ print_result(token_references, args)
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/scripts/debug_lexer.py b/scripts/debug_lexer.py
new file mode 100755
index 0000000..6323d9c
--- /dev/null
+++ b/scripts/debug_lexer.py
@@ -0,0 +1,306 @@
+#!/usr/bin/python
+"""
+ Lexing error finder
+ ~~~~~~~~~~~~~~~~~~~
+
+ For the source files given on the command line, display
+ the text where Error tokens are being generated, along
+ with some context.
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import os
+import sys
+import struct
+
+# always prefer Pygments from source if exists
+srcpath = os.path.join(os.path.dirname(__file__), '..')
+if os.path.isdir(os.path.join(srcpath, 'pygments')):
+ sys.path.insert(0, srcpath)
+
+
+from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \
+ ProfilingRegexLexer, ProfilingRegexLexerMeta
+from pygments.lexers import get_lexer_by_name, find_lexer_class, \
+ find_lexer_class_for_filename, guess_lexer
+from pygments.token import Error, Text, _TokenType
+from pygments.cmdline import _parse_options
+
+
+class DebuggingRegexLexer(ExtendedRegexLexer):
+ """Make the state stack, position and current match instance attributes."""
+
+ def get_tokens_unprocessed(self, text, stack=('root',)):
+ """
+ Split ``text`` into (tokentype, text) pairs.
+
+ ``stack`` is the initial stack (default: ``['root']``)
+ """
+ tokendefs = self._tokens
+ self.ctx = ctx = LexerContext(text, 0)
+ ctx.stack = list(stack)
+ statetokens = tokendefs[ctx.stack[-1]]
+ while 1:
+ for rexmatch, action, new_state in statetokens:
+ self.m = m = rexmatch(text, ctx.pos, ctx.end)
+ if m:
+ if action is not None:
+ if type(action) is _TokenType:
+ yield ctx.pos, action, m.group()
+ ctx.pos = m.end()
+ else:
+ if not isinstance(self, ExtendedRegexLexer):
+ yield from action(self, m)
+ ctx.pos = m.end()
+ else:
+ yield from action(self, m, ctx)
+ if not new_state:
+ # altered the state stack?
+ statetokens = tokendefs[ctx.stack[-1]]
+ if new_state is not None:
+ # state transition
+ if isinstance(new_state, tuple):
+ for state in new_state:
+ if state == '#pop':
+ ctx.stack.pop()
+ elif state == '#push':
+ ctx.stack.append(ctx.stack[-1])
+ else:
+ ctx.stack.append(state)
+ elif isinstance(new_state, int):
+ # pop
+ del ctx.stack[new_state:]
+ elif new_state == '#push':
+ ctx.stack.append(ctx.stack[-1])
+ else:
+ assert False, 'wrong state def: %r' % new_state
+ statetokens = tokendefs[ctx.stack[-1]]
+ break
+ else:
+ try:
+ if ctx.pos >= ctx.end:
+ break
+ if text[ctx.pos] == '\n':
+ # at EOL, reset state to 'root'
+ ctx.stack = ['root']
+ statetokens = tokendefs['root']
+ yield ctx.pos, Text, '\n'
+ ctx.pos += 1
+ continue
+ yield ctx.pos, Error, text[ctx.pos]
+ ctx.pos += 1
+ except IndexError:
+ break
+
+
+def decode_atheris(bstr):
+ """Decode a byte string into a Unicode string using the algorithm
+ of Google's Atheris fuzzer library, which aims to produce a wide
+ range of possible Unicode inputs.
+
+ Corresponds to ConsumeUnicodeImpl() with filter_surrogates=false in
+ https://github.com/google/atheris/blob/master/fuzzed_data_provider.cc
+ """
+ if len(bstr) < 2:
+ return ''
+ # The first byte only selects if the rest is decoded as ascii, "utf-16" or "utf-32"
+ spec, bstr = bstr[0], bstr[1:]
+ if spec & 1: # pure ASCII
+ return ''.join(chr(ch & 0x7f) for ch in bstr)
+ elif spec & 2: # UTF-16
+ bstr = bstr if len(bstr) % 2 == 0 else bstr[:-1]
+ return bstr.decode('utf16')
+
+ # else UTF-32
+ def valid_codepoint(ch):
+ ch &= 0x1fffff
+ if ch & 0x100000:
+ ch &= ~0x0f0000
+ return chr(ch)
+
+ chars = struct.unpack('%dI%dx' % divmod(len(bstr), 4), bstr)
+ return ''.join(map(valid_codepoint), chars)
+
+
+def main(fn, lexer=None, options={}):
+ if fn == '-':
+ text = sys.stdin.read()
+ else:
+ with open(fn, 'rb') as fp:
+ text = fp.read()
+ if decode_strategy == 'latin1':
+ try:
+ text = text.decode('utf8')
+ except UnicodeError:
+ print('Warning: non-UTF8 input, using latin1')
+ text = text.decode('latin1')
+ elif decode_strategy == 'utf8-ignore':
+ try:
+ text = text.decode('utf8')
+ except UnicodeError:
+ print('Warning: ignoring non-UTF8 bytes in input')
+ text = text.decode('utf8', 'ignore')
+ elif decode_strategy == 'atheris':
+ text = decode_atheris(text)
+
+ text = text.strip('\n') + '\n'
+
+ if lexer is not None:
+ lxcls = get_lexer_by_name(lexer).__class__
+ elif guess:
+ lxcls = guess_lexer(text).__class__
+ print('Using lexer: %s (%s.%s)' % (lxcls.name, lxcls.__module__,
+ lxcls.__name__))
+ else:
+ lxcls = find_lexer_class_for_filename(os.path.basename(fn))
+ if lxcls is None:
+ name, rest = fn.split('_', 1)
+ lxcls = find_lexer_class(name)
+ if lxcls is None:
+ raise AssertionError('no lexer found for file %r' % fn)
+ print('Using lexer: %s (%s.%s)' % (lxcls.name, lxcls.__module__,
+ lxcls.__name__))
+ debug_lexer = False
+ # if profile:
+ # # does not work for e.g. ExtendedRegexLexers
+ # if lxcls.__bases__ == (RegexLexer,):
+ # # yes we can! (change the metaclass)
+ # lxcls.__class__ = ProfilingRegexLexerMeta
+ # lxcls.__bases__ = (ProfilingRegexLexer,)
+ # lxcls._prof_sort_index = profsort
+ # else:
+ # if lxcls.__bases__ == (RegexLexer,):
+ # lxcls.__bases__ = (DebuggingRegexLexer,)
+ # debug_lexer = True
+ # elif lxcls.__bases__ == (DebuggingRegexLexer,):
+ # # already debugged before
+ # debug_lexer = True
+ # else:
+ # # HACK: ExtendedRegexLexer subclasses will only partially work here.
+ # lxcls.__bases__ = (DebuggingRegexLexer,)
+ # debug_lexer = True
+
+ lx = lxcls(**options)
+ lno = 1
+ tokens = []
+ states = []
+
+ def show_token(tok, state):
+ reprs = list(map(repr, tok))
+ print(' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ')
+ if debug_lexer:
+ print(' ' + ' ' * (29-len(reprs[0])) + ' : '.join(state)
+ if state else '', end=' ')
+ print()
+
+ for type, val in lx.get_tokens(text):
+ lno += val.count('\n')
+ if type == Error and not ignerror:
+ print('Error parsing', fn, 'on line', lno)
+ if not showall:
+ print('Previous tokens' + (debug_lexer and ' and states' or '') + ':')
+ for i in range(max(len(tokens) - num, 0), len(tokens)):
+ if debug_lexer:
+ show_token(tokens[i], states[i])
+ else:
+ show_token(tokens[i], None)
+ print('Error token:')
+ vlen = len(repr(val))
+ print(' ' + repr(val), end=' ')
+ if debug_lexer and hasattr(lx, 'ctx'):
+ print(' ' * (60-vlen) + ' : '.join(lx.ctx.stack), end=' ')
+ print()
+ print()
+ return 1
+ tokens.append((type, val))
+ if debug_lexer:
+ if hasattr(lx, 'ctx'):
+ states.append(lx.ctx.stack[:])
+ else:
+ states.append(None)
+ if showall:
+ show_token((type, val), states[-1] if debug_lexer else None)
+ return 0
+
+
+def print_help():
+ print('''\
+Pygments development helper to quickly debug lexers.
+
+ scripts/debug_lexer.py [options] file ...
+
+Give one or more filenames to lex them and display possible error tokens
+and/or profiling info. Files are assumed to be encoded in UTF-8.
+
+Selecting lexer and options:
+
+ -l NAME use lexer named NAME (default is to guess from
+ the given filenames)
+ -g guess lexer from content
+ -u if input is non-utf8, use "ignore" handler instead
+ of using latin1 encoding
+ -U use Atheris fuzzer's method of converting
+ byte input to Unicode
+ -O OPTIONSTR use lexer options parsed from OPTIONSTR
+
+Debugging lexing errors:
+
+ -n N show the last N tokens on error
+ -a always show all lexed tokens (default is only
+ to show them when an error occurs)
+ -e do not stop on error tokens
+
+Profiling:
+
+ -p use the ProfilingRegexLexer to profile regexes
+ instead of the debugging lexer
+ -s N sort profiling output by column N (default is
+ column 4, the time per call)
+''')
+
+
+num = 10
+showall = False
+ignerror = False
+lexer = None
+options = {}
+profile = False
+profsort = 4
+guess = False
+decode_strategy = 'latin1'
+
+if __name__ == '__main__':
+ import getopt
+ opts, args = getopt.getopt(sys.argv[1:], 'n:l:aepO:s:hguU')
+ for opt, val in opts:
+ if opt == '-n':
+ num = int(val)
+ elif opt == '-a':
+ showall = True
+ elif opt == '-e':
+ ignerror = True
+ elif opt == '-l':
+ lexer = val
+ elif opt == '-p':
+ profile = True
+ elif opt == '-s':
+ profsort = int(val)
+ elif opt == '-O':
+ options = _parse_options([val])
+ elif opt == '-g':
+ guess = True
+ elif opt == '-u':
+ decode_strategy = 'utf8-ignore'
+ elif opt == '-U':
+ decode_strategy = 'atheris'
+ elif opt == '-h':
+ print_help()
+ sys.exit(0)
+ ret = 0
+ if not args:
+ print_help()
+ for f in args:
+ ret += main(f, lexer, options)
+ sys.exit(bool(ret))
diff --git a/scripts/detect_missing_analyse_text.py b/scripts/detect_missing_analyse_text.py
new file mode 100644
index 0000000..c377b0b
--- /dev/null
+++ b/scripts/detect_missing_analyse_text.py
@@ -0,0 +1,48 @@
+"""
+ detect_missing_analyse_text
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import sys
+
+from pygments.lexers import get_all_lexers, find_lexer_class
+from pygments.lexer import Lexer
+
+import argparse
+
+
+def main(args):
+ uses = {}
+
+ for name, aliases, filenames, mimetypes in get_all_lexers(plugins=False):
+ cls = find_lexer_class(name)
+ if not cls.aliases and not args.skip_no_aliases:
+ print(cls, "has no aliases")
+ for f in filenames:
+ uses.setdefault(f, []).append(cls)
+
+ ret = 0
+ for k, v in uses.items():
+ if len(v) > 1:
+ # print("Multiple for", k, v)
+ for i in v:
+ if i.analyse_text is None:
+ print(i, "has a None analyse_text")
+ ret |= 1
+ elif Lexer.analyse_text.__doc__ == i.analyse_text.__doc__:
+ print(i, "needs analyse_text, multiple lexers for", k)
+ ret |= 2
+ return ret
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--skip-no-aliases',
+ help='Skip checks for a lexer with no aliases',
+ action='store_true',
+ default=False)
+ args = parser.parse_args()
+ sys.exit(main(args))
diff --git a/scripts/gen_mapfiles.py b/scripts/gen_mapfiles.py
new file mode 100644
index 0000000..a5aed0c
--- /dev/null
+++ b/scripts/gen_mapfiles.py
@@ -0,0 +1,53 @@
+"""
+ scripts/gen_mapfiles.py
+ ~~~~~~~~~~~~~~~~~~~~~~~
+
+ Regenerate mapping files.
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+from importlib import import_module
+from pathlib import Path
+import re
+import sys
+
+top_src_dir = Path(__file__).parent.parent
+pygments_package = top_src_dir / 'pygments'
+sys.path.insert(0, str(pygments_package.parent.resolve()))
+
+from pygments.util import docstring_headline
+
+def main():
+ for key in ['lexers', 'formatters']:
+ lines = []
+ for file in (pygments_package / key).glob('[!_]*.py'):
+ module_name = '.'.join(file.relative_to(pygments_package.parent).with_suffix('').parts)
+ print(module_name)
+ module = import_module(module_name)
+ for obj_name in module.__all__:
+ obj = getattr(module, obj_name)
+ desc = (module_name, obj.name, tuple(obj.aliases), tuple(obj.filenames))
+ if key == 'lexers':
+ desc += (tuple(obj.mimetypes),)
+ elif key == 'formatters':
+ desc += (docstring_headline(obj),)
+ else:
+ assert False
+ lines.append(f' {obj_name!r}: {desc!r},')
+ # Sort to make diffs minimal.
+ lines.sort()
+ new_dict = '\n'.join(lines)
+ content = f'''# Automatically generated by scripts/gen_mapfiles.py.
+# DO NOT EDIT BY HAND; run `make mapfiles` instead.
+
+{key.upper()} = {{
+{new_dict}
+}}
+'''
+ (pygments_package / key / '_mapping.py').write_text(content, encoding='utf8')
+ print(f'=== {len(lines)} {key} processed.')
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/get_css_properties.py b/scripts/get_css_properties.py
new file mode 100644
index 0000000..3afe98e
--- /dev/null
+++ b/scripts/get_css_properties.py
@@ -0,0 +1,33 @@
+"""
+ get_css_properties
+ ~~~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+
+from pygments.util import format_lines
+import json
+import urllib.request
+
+HEADER = '''\
+"""
+ pygments.lexers._css_builtins
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ This file is autogenerated by scripts/get_css_properties.py
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+'''
+
+if __name__ == "__main__":
+ data_request = urllib.request.urlopen('https://www.w3.org/Style/CSS/all-properties.en.json')
+ data = json.load(data_request)
+ names = set([p['property'] for p in data if p['property'] != '--*'])
+
+ with open('../pygments/lexers/_css_builtins.py', 'w') as builtin_file:
+ builtin_file.write(HEADER)
+ builtin_file.write(format_lines('_css_properties', sorted(names)))
diff --git a/scripts/get_vimkw.py b/scripts/get_vimkw.py
new file mode 100644
index 0000000..0b2d82e
--- /dev/null
+++ b/scripts/get_vimkw.py
@@ -0,0 +1,72 @@
+import re
+
+from pygments.util import format_lines
+
+r_line = re.compile(r"^(syn keyword vimCommand contained|syn keyword vimOption "
+ r"contained|syn keyword vimAutoEvent contained)\s+(.*)")
+r_item = re.compile(r"(\w+)(?:\[(\w+)\])?")
+
+HEADER = '''\
+# -*- coding: utf-8 -*-
+"""
+ pygments.lexers._vim_builtins
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ This file is autogenerated by scripts/get_vimkw.py
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+# Split up in multiple functions so it's importable by jython, which has a
+# per-method size limit.
+'''
+
+METHOD = '''\
+def _get%(key)s():
+%(body)s
+ return var
+%(key)s = _get%(key)s()
+'''
+
+def getkw(input, output):
+ out = file(output, 'w')
+
+ # Copy template from an existing file.
+ print(HEADER, file=out)
+
+ output_info = {'command': [], 'option': [], 'auto': []}
+ for line in file(input):
+ m = r_line.match(line)
+ if m:
+ # Decide which output gets mapped to d
+ if 'vimCommand' in m.group(1):
+ d = output_info['command']
+ elif 'AutoEvent' in m.group(1):
+ d = output_info['auto']
+ else:
+ d = output_info['option']
+
+ # Extract all the shortened versions
+ for i in r_item.finditer(m.group(2)):
+ d.append('(%r,%r)' %
+ (i.group(1), "%s%s" % (i.group(1), i.group(2) or '')))
+
+ output_info['option'].append("('nnoremap','nnoremap')")
+ output_info['option'].append("('inoremap','inoremap')")
+ output_info['option'].append("('vnoremap','vnoremap')")
+
+ for key, keywordlist in output_info.items():
+ keywordlist.sort()
+ body = format_lines('var', keywordlist, raw=True, indent_level=1)
+ print(METHOD % locals(), file=out)
+
+def is_keyword(w, keywords):
+ for i in range(len(w), 0, -1):
+ if w[:i] in keywords:
+ return keywords[w[:i]][:len(w)] == w
+ return False
+
+if __name__ == "__main__":
+ getkw("/usr/share/vim/vim74/syntax/vim.vim",
+ "pygments/lexers/_vim_builtins.py")
diff --git a/scripts/pylintrc b/scripts/pylintrc
new file mode 100644
index 0000000..b602eaa
--- /dev/null
+++ b/scripts/pylintrc
@@ -0,0 +1,301 @@
+# lint Python modules using external checkers.
+#
+# This is the main checker controlling the other ones and the reports
+# generation. It is itself both a raw checker and an astng checker in order
+# to:
+# * handle message activation / deactivation at the module level
+# * handle some basic but necessary stats'data (number of classes, methods...)
+#
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Profiled execution.
+profile=no
+
+# Add <file or directory> to the black list. It should be a base name, not a
+# path. You may set this option multiple times.
+ignore=.svn
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Set the cache size for astng objects.
+cache-size=500
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+
+[MESSAGES CONTROL]
+
+# Enable only checker(s) with the given id(s). This option conflict with the
+# disable-checker option
+#enable-checker=
+
+# Enable all checker(s) except those with the given id(s). This option conflict
+# with the disable-checker option
+#disable-checker=
+
+# Enable all messages in the listed categories.
+#enable-msg-cat=
+
+# Disable all messages in the listed categories.
+#disable-msg-cat=
+
+# Enable the message(s) with the given id(s).
+#enable-msg=
+
+# Disable the message(s) with the given id(s).
+disable-msg=C0323,W0142,C0301,C0103,C0111,E0213,C0302,C0203,W0703,R0201
+
+
+[REPORTS]
+
+# set the output format. Available formats are text, parseable, colorized and
+# html
+output-format=colorized
+
+# Include message's id in output
+include-ids=yes
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]".
+files-output=no
+
+# Tells whether to display a full report or only the messages
+reports=yes
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note).You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (R0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Add a comment according to your evaluation note. This is used by the global
+# evaluation report (R0004).
+comment=no
+
+# Enable the report(s) with the given id(s).
+#enable-report=
+
+# Disable the report(s) with the given id(s).
+#disable-report=
+
+
+# checks for
+# * unused variables / imports
+# * undefined variables
+# * redefinition of variable from builtins or from an outer scope
+# * use of variable before assignment
+#
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching names used for dummy variables (i.e. not used).
+dummy-variables-rgx=_|dummy
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+
+# try to find bugs in the code using type inference
+#
+[TYPECHECK]
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# When zope mode is activated, consider the acquired-members option to ignore
+# access to some undefined attributes.
+zope=no
+
+# List of members which are usually get through zope's acquisition mechanism and
+# so shouldn't trigger E0201 when accessed (need zope=yes to be considered).
+acquired-members=REQUEST,acl_users,aq_parent
+
+
+# checks for :
+# * doc strings
+# * modules / classes / functions / methods / arguments / variables name
+# * number of arguments, local variables, branches, returns and statements in
+# functions, methods
+# * required module attributes
+# * dangerous default values as arguments
+# * redefinition of function / method / class
+# * uses of the global statement
+#
+[BASIC]
+
+# Required attributes for module, separated by a comma
+required-attributes=
+
+# Regular expression which should only match functions or classes name which do
+# not require a docstring
+no-docstring-rgx=__.*__
+
+# Regular expression which should only match correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression which should only match correct module level names
+const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))$
+
+# Regular expression which should only match correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression which should only match correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct method names
+method-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct instance attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct list comprehension /
+# generator expression variable names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# List of builtins function names that should not be used, separated by a comma
+bad-functions=apply,input
+
+
+# checks for sign of poor/misdesign:
+# * number of methods, attributes, local variables...
+# * size, complexity of functions, methods
+#
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=12
+
+# Maximum number of locals for function / method body
+max-locals=30
+
+# Maximum number of return / yield for function / method body
+max-returns=12
+
+# Maximum number of branch for function / method body
+max-branchs=30
+
+# Maximum number of statements in function / method body
+max-statements=60
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=20
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=0
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+
+# checks for
+# * external modules dependencies
+# * relative / wildcard imports
+# * cyclic imports
+# * uses of deprecated modules
+#
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report R0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report R0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report R0402 must
+# not be disabled)
+int-import-graph=
+
+
+# checks for :
+# * methods without self as first argument
+# * overridden methods signature
+# * access only to existent members via self
+# * attributes not defined in the __init__ method
+# * supported interfaces implementation
+# * unreachable code
+#
+[CLASSES]
+
+# List of interface methods to ignore, separated by a comma. This is used for
+# instance to not check methods defines in Zope's Interface base class.
+ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+
+# checks for similarities and duplicated code. This computation may be
+# memory / CPU intensive, so you should disable it if you experiments some
+# problems.
+#
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=10
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+
+# checks for:
+# * warning notes in the code like FIXME, XXX
+# * PEP 263: source code with non ascii character but no encoding declaration
+#
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+# checks for :
+# * unauthorized constructions
+# * strict indentation
+# * line length
+# * use of <> instead of !=
+#
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=90
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+indent-string=' '
diff --git a/scripts/release-checklist b/scripts/release-checklist
new file mode 100644
index 0000000..087917f
--- /dev/null
+++ b/scripts/release-checklist
@@ -0,0 +1,24 @@
+Release checklist
+=================
+
+* Check ``git status``
+* ``make check``
+* LATER when configured properly: ``make pylint``
+* ``tox``
+* Update version in ``pygments/__init__.py``
+* Check setup.py metadata: long description, trove classifiers
+* Update release date/code name in ``CHANGES``
+* ``git commit``
+* Wait for the CI to finish
+* ``make clean``
+* ``python3 -m build``
+* Check the size of the generated packages. If they're significantly different from the last release, check if the repository is in a modified state and that ``make clean`` was run.
+* ``twine upload dist/Pygments-$NEWVER*``
+* Check PyPI release page for obvious errors (like different file sizes!)
+* ``git tag -a``
+* Add new ``CHANGES`` heading for next version
+* ``git commit``
+* ``git push``, ``git push --tags``
+* Add new release on https://github.com/pygments/pygments/releases
+* Add new milestone on https://github.com/pygments/pygments/milestones if needed
+* Write announcement and send to mailing list/python-announce
diff --git a/scripts/update_contrasts.py b/scripts/update_contrasts.py
new file mode 100755
index 0000000..156bc5c
--- /dev/null
+++ b/scripts/update_contrasts.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+"""
+ Updates tests/contrast/min_contrasts.json
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Whenever you have improved the minimum contrast of a style you should run
+ this script, so that the test_contrasts.py test prevents future degredations.
+"""
+
+import os
+import sys
+
+# always prefer Pygments from source if exists
+srcpath = os.path.join(os.path.dirname(__file__), "..")
+if os.path.isdir(os.path.join(srcpath, "pygments")):
+ sys.path.insert(0, srcpath)
+
+import tests.contrast.test_contrasts
+
+tests.contrast.test_contrasts.test_contrasts(fail_if_improved=False)
+tests.contrast.test_contrasts.update_json()
diff --git a/scripts/utility.py b/scripts/utility.py
new file mode 100644
index 0000000..4d59a1b
--- /dev/null
+++ b/scripts/utility.py
@@ -0,0 +1,69 @@
+"""
+ Utility functions for test scripts
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import os
+import os.path
+
+
+def unpack_output_file(path):
+ """
+ Unpack an output file into objects contining the line number, the text,
+ and the token name. The output file can be either a ``.output`` file
+ containing a token stream, or a ``.txt`` with input and tokens.
+ """
+ from collections import namedtuple
+ entry = namedtuple('OutputEntry', ['text', 'token', 'linenumber'])
+
+ skip_until_tokens = path.endswith('.txt')
+
+ for linenumber, line in enumerate(open(path).readlines()):
+ line = line.strip()
+ if not line:
+ continue
+
+ if skip_until_tokens:
+ if line != '---tokens---':
+ continue
+ else:
+ skip_until_tokens = False
+ continue
+
+ # Line can start with ' or ", so let's check which one it is
+ # and find the matching one
+ quotation_start = 0
+ quotation_end = line.rfind(line[0])
+ text = line[quotation_start+1:quotation_end]
+ token = line.split()[-1]
+ text = text.replace('\\n', '\n')
+ text = text.replace('\\t', '\t')
+ yield entry(text, token, linenumber + 1)
+
+
+def process_output_files(root_directory, callback):
+ """
+ Process all output (i.e. .output and .txt files for snippets) files
+ in a directory tree using the provided callback.
+ The callback should return ``True`` in case of success, ``False``
+ otherwise.
+
+ The function returns the number of files for which the callback returned
+ ``False``.
+ """
+ errors = 0
+ for dir, _, files in os.walk(root_directory):
+ for file in files:
+ _, ext = os.path.splitext(file)
+
+ if ext not in {'.txt', '.output'}:
+ continue
+
+ path = os.path.join(dir, file)
+ if not callback(path):
+ errors += 1
+
+ return errors
diff --git a/scripts/vim2pygments.py b/scripts/vim2pygments.py
new file mode 100755
index 0000000..ec9b63b
--- /dev/null
+++ b/scripts/vim2pygments.py
@@ -0,0 +1,932 @@
+#!/usr/bin/env python
+"""
+ Vim Colorscheme Converter
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ This script converts vim colorscheme files to valid pygments
+ style classes meant for putting into modules.
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import sys
+import re
+from os import path
+from io import StringIO
+
+split_re = re.compile(r'(?<!\\)\s+')
+
+SCRIPT_NAME = 'Vim Colorscheme Converter'
+SCRIPT_VERSION = '0.1'
+
+
+COLORS = {
+ # Numeric Colors
+ '0': '#000000',
+ '1': '#c00000',
+ '2': '#008000',
+ '3': '#808000',
+ '4': '#0000c0',
+ '5': '#c000c0',
+ '6': '#008080',
+ '7': '#c0c0c0',
+ '8': '#808080',
+ '9': '#ff6060',
+ '10': '#00ff00',
+ '11': '#ffff00',
+ '12': '#8080ff',
+ '13': '#ff40ff',
+ '14': '#00ffff',
+ '15': '#ffffff',
+ # Named Colors
+ 'alice': '#f0f8ff',
+ 'aliceblue': '#f0f8ff',
+ 'antique': '#faebd7',
+ 'antiquewhite': '#faebd7',
+ 'antiquewhite1': '#ffefdb',
+ 'antiquewhite2': '#eedfcc',
+ 'antiquewhite3': '#cdc0b0',
+ 'antiquewhite4': '#8b8378',
+ 'aquamarine': '#7fffd4',
+ 'aquamarine1': '#7fffd4',
+ 'aquamarine2': '#76eec6',
+ 'aquamarine3': '#66cdaa',
+ 'aquamarine4': '#458b74',
+ 'azure': '#f0ffff',
+ 'azure1': '#f0ffff',
+ 'azure2': '#e0eeee',
+ 'azure3': '#c1cdcd',
+ 'azure4': '#838b8b',
+ 'beige': '#f5f5dc',
+ 'bisque': '#ffe4c4',
+ 'bisque1': '#ffe4c4',
+ 'bisque2': '#eed5b7',
+ 'bisque3': '#cdb79e',
+ 'bisque4': '#8b7d6b',
+ 'black': '#000000',
+ 'blanched': '#ffebcd',
+ 'blanchedalmond': '#ffebcd',
+ 'blue': '#8a2be2',
+ 'blue1': '#0000ff',
+ 'blue2': '#0000ee',
+ 'blue3': '#0000cd',
+ 'blue4': '#00008b',
+ 'blueviolet': '#8a2be2',
+ 'brown': '#a52a2a',
+ 'brown1': '#ff4040',
+ 'brown2': '#ee3b3b',
+ 'brown3': '#cd3333',
+ 'brown4': '#8b2323',
+ 'burlywood': '#deb887',
+ 'burlywood1': '#ffd39b',
+ 'burlywood2': '#eec591',
+ 'burlywood3': '#cdaa7d',
+ 'burlywood4': '#8b7355',
+ 'cadet': '#5f9ea0',
+ 'cadetblue': '#5f9ea0',
+ 'cadetblue1': '#98f5ff',
+ 'cadetblue2': '#8ee5ee',
+ 'cadetblue3': '#7ac5cd',
+ 'cadetblue4': '#53868b',
+ 'chartreuse': '#7fff00',
+ 'chartreuse1': '#7fff00',
+ 'chartreuse2': '#76ee00',
+ 'chartreuse3': '#66cd00',
+ 'chartreuse4': '#458b00',
+ 'chocolate': '#d2691e',
+ 'chocolate1': '#ff7f24',
+ 'chocolate2': '#ee7621',
+ 'chocolate3': '#cd661d',
+ 'chocolate4': '#8b4513',
+ 'coral': '#ff7f50',
+ 'coral1': '#ff7256',
+ 'coral2': '#ee6a50',
+ 'coral3': '#cd5b45',
+ 'coral4': '#8b3e2f',
+ 'cornflower': '#6495ed',
+ 'cornflowerblue': '#6495ed',
+ 'cornsilk': '#fff8dc',
+ 'cornsilk1': '#fff8dc',
+ 'cornsilk2': '#eee8cd',
+ 'cornsilk3': '#cdc8b1',
+ 'cornsilk4': '#8b8878',
+ 'cyan': '#00ffff',
+ 'cyan1': '#00ffff',
+ 'cyan2': '#00eeee',
+ 'cyan3': '#00cdcd',
+ 'cyan4': '#008b8b',
+ 'dark': '#8b0000',
+ 'darkblue': '#00008b',
+ 'darkcyan': '#008b8b',
+ 'darkgoldenrod': '#b8860b',
+ 'darkgoldenrod1': '#ffb90f',
+ 'darkgoldenrod2': '#eead0e',
+ 'darkgoldenrod3': '#cd950c',
+ 'darkgoldenrod4': '#8b6508',
+ 'darkgray': '#a9a9a9',
+ 'darkgreen': '#006400',
+ 'darkgrey': '#a9a9a9',
+ 'darkkhaki': '#bdb76b',
+ 'darkmagenta': '#8b008b',
+ 'darkolivegreen': '#556b2f',
+ 'darkolivegreen1': '#caff70',
+ 'darkolivegreen2': '#bcee68',
+ 'darkolivegreen3': '#a2cd5a',
+ 'darkolivegreen4': '#6e8b3d',
+ 'darkorange': '#ff8c00',
+ 'darkorange1': '#ff7f00',
+ 'darkorange2': '#ee7600',
+ 'darkorange3': '#cd6600',
+ 'darkorange4': '#8b4500',
+ 'darkorchid': '#9932cc',
+ 'darkorchid1': '#bf3eff',
+ 'darkorchid2': '#b23aee',
+ 'darkorchid3': '#9a32cd',
+ 'darkorchid4': '#68228b',
+ 'darkred': '#8b0000',
+ 'darksalmon': '#e9967a',
+ 'darkseagreen': '#8fbc8f',
+ 'darkseagreen1': '#c1ffc1',
+ 'darkseagreen2': '#b4eeb4',
+ 'darkseagreen3': '#9bcd9b',
+ 'darkseagreen4': '#698b69',
+ 'darkslateblue': '#483d8b',
+ 'darkslategray': '#2f4f4f',
+ 'darkslategray1': '#97ffff',
+ 'darkslategray2': '#8deeee',
+ 'darkslategray3': '#79cdcd',
+ 'darkslategray4': '#528b8b',
+ 'darkslategrey': '#2f4f4f',
+ 'darkturquoise': '#00ced1',
+ 'darkviolet': '#9400d3',
+ 'deep': '#ff1493',
+ 'deeppink': '#ff1493',
+ 'deeppink1': '#ff1493',
+ 'deeppink2': '#ee1289',
+ 'deeppink3': '#cd1076',
+ 'deeppink4': '#8b0a50',
+ 'deepskyblue': '#00bfff',
+ 'deepskyblue1': '#00bfff',
+ 'deepskyblue2': '#00b2ee',
+ 'deepskyblue3': '#009acd',
+ 'deepskyblue4': '#00688b',
+ 'dim': '#696969',
+ 'dimgray': '#696969',
+ 'dimgrey': '#696969',
+ 'dodger': '#1e90ff',
+ 'dodgerblue': '#1e90ff',
+ 'dodgerblue1': '#1e90ff',
+ 'dodgerblue2': '#1c86ee',
+ 'dodgerblue3': '#1874cd',
+ 'dodgerblue4': '#104e8b',
+ 'firebrick': '#b22222',
+ 'firebrick1': '#ff3030',
+ 'firebrick2': '#ee2c2c',
+ 'firebrick3': '#cd2626',
+ 'firebrick4': '#8b1a1a',
+ 'floral': '#fffaf0',
+ 'floralwhite': '#fffaf0',
+ 'forest': '#228b22',
+ 'forestgreen': '#228b22',
+ 'gainsboro': '#dcdcdc',
+ 'ghost': '#f8f8ff',
+ 'ghostwhite': '#f8f8ff',
+ 'gold': '#ffd700',
+ 'gold1': '#ffd700',
+ 'gold2': '#eec900',
+ 'gold3': '#cdad00',
+ 'gold4': '#8b7500',
+ 'goldenrod': '#daa520',
+ 'goldenrod1': '#ffc125',
+ 'goldenrod2': '#eeb422',
+ 'goldenrod3': '#cd9b1d',
+ 'goldenrod4': '#8b6914',
+ 'gray': '#bebebe',
+ 'gray0': '#000000',
+ 'gray1': '#030303',
+ 'gray10': '#1a1a1a',
+ 'gray100': '#ffffff',
+ 'gray11': '#1c1c1c',
+ 'gray12': '#1f1f1f',
+ 'gray13': '#212121',
+ 'gray14': '#242424',
+ 'gray15': '#262626',
+ 'gray16': '#292929',
+ 'gray17': '#2b2b2b',
+ 'gray18': '#2e2e2e',
+ 'gray19': '#303030',
+ 'gray2': '#050505',
+ 'gray20': '#333333',
+ 'gray21': '#363636',
+ 'gray22': '#383838',
+ 'gray23': '#3b3b3b',
+ 'gray24': '#3d3d3d',
+ 'gray25': '#404040',
+ 'gray26': '#424242',
+ 'gray27': '#454545',
+ 'gray28': '#474747',
+ 'gray29': '#4a4a4a',
+ 'gray3': '#080808',
+ 'gray30': '#4d4d4d',
+ 'gray31': '#4f4f4f',
+ 'gray32': '#525252',
+ 'gray33': '#545454',
+ 'gray34': '#575757',
+ 'gray35': '#595959',
+ 'gray36': '#5c5c5c',
+ 'gray37': '#5e5e5e',
+ 'gray38': '#616161',
+ 'gray39': '#636363',
+ 'gray4': '#0a0a0a',
+ 'gray40': '#666666',
+ 'gray41': '#696969',
+ 'gray42': '#6b6b6b',
+ 'gray43': '#6e6e6e',
+ 'gray44': '#707070',
+ 'gray45': '#737373',
+ 'gray46': '#757575',
+ 'gray47': '#787878',
+ 'gray48': '#7a7a7a',
+ 'gray49': '#7d7d7d',
+ 'gray5': '#0d0d0d',
+ 'gray50': '#7f7f7f',
+ 'gray51': '#828282',
+ 'gray52': '#858585',
+ 'gray53': '#878787',
+ 'gray54': '#8a8a8a',
+ 'gray55': '#8c8c8c',
+ 'gray56': '#8f8f8f',
+ 'gray57': '#919191',
+ 'gray58': '#949494',
+ 'gray59': '#969696',
+ 'gray6': '#0f0f0f',
+ 'gray60': '#999999',
+ 'gray61': '#9c9c9c',
+ 'gray62': '#9e9e9e',
+ 'gray63': '#a1a1a1',
+ 'gray64': '#a3a3a3',
+ 'gray65': '#a6a6a6',
+ 'gray66': '#a8a8a8',
+ 'gray67': '#ababab',
+ 'gray68': '#adadad',
+ 'gray69': '#b0b0b0',
+ 'gray7': '#121212',
+ 'gray70': '#b3b3b3',
+ 'gray71': '#b5b5b5',
+ 'gray72': '#b8b8b8',
+ 'gray73': '#bababa',
+ 'gray74': '#bdbdbd',
+ 'gray75': '#bfbfbf',
+ 'gray76': '#c2c2c2',
+ 'gray77': '#c4c4c4',
+ 'gray78': '#c7c7c7',
+ 'gray79': '#c9c9c9',
+ 'gray8': '#141414',
+ 'gray80': '#cccccc',
+ 'gray81': '#cfcfcf',
+ 'gray82': '#d1d1d1',
+ 'gray83': '#d4d4d4',
+ 'gray84': '#d6d6d6',
+ 'gray85': '#d9d9d9',
+ 'gray86': '#dbdbdb',
+ 'gray87': '#dedede',
+ 'gray88': '#e0e0e0',
+ 'gray89': '#e3e3e3',
+ 'gray9': '#171717',
+ 'gray90': '#e5e5e5',
+ 'gray91': '#e8e8e8',
+ 'gray92': '#ebebeb',
+ 'gray93': '#ededed',
+ 'gray94': '#f0f0f0',
+ 'gray95': '#f2f2f2',
+ 'gray96': '#f5f5f5',
+ 'gray97': '#f7f7f7',
+ 'gray98': '#fafafa',
+ 'gray99': '#fcfcfc',
+ 'green': '#adff2f',
+ 'green1': '#00ff00',
+ 'green2': '#00ee00',
+ 'green3': '#00cd00',
+ 'green4': '#008b00',
+ 'greenyellow': '#adff2f',
+ 'grey': '#bebebe',
+ 'grey0': '#000000',
+ 'grey1': '#030303',
+ 'grey10': '#1a1a1a',
+ 'grey100': '#ffffff',
+ 'grey11': '#1c1c1c',
+ 'grey12': '#1f1f1f',
+ 'grey13': '#212121',
+ 'grey14': '#242424',
+ 'grey15': '#262626',
+ 'grey16': '#292929',
+ 'grey17': '#2b2b2b',
+ 'grey18': '#2e2e2e',
+ 'grey19': '#303030',
+ 'grey2': '#050505',
+ 'grey20': '#333333',
+ 'grey21': '#363636',
+ 'grey22': '#383838',
+ 'grey23': '#3b3b3b',
+ 'grey24': '#3d3d3d',
+ 'grey25': '#404040',
+ 'grey26': '#424242',
+ 'grey27': '#454545',
+ 'grey28': '#474747',
+ 'grey29': '#4a4a4a',
+ 'grey3': '#080808',
+ 'grey30': '#4d4d4d',
+ 'grey31': '#4f4f4f',
+ 'grey32': '#525252',
+ 'grey33': '#545454',
+ 'grey34': '#575757',
+ 'grey35': '#595959',
+ 'grey36': '#5c5c5c',
+ 'grey37': '#5e5e5e',
+ 'grey38': '#616161',
+ 'grey39': '#636363',
+ 'grey4': '#0a0a0a',
+ 'grey40': '#666666',
+ 'grey41': '#696969',
+ 'grey42': '#6b6b6b',
+ 'grey43': '#6e6e6e',
+ 'grey44': '#707070',
+ 'grey45': '#737373',
+ 'grey46': '#757575',
+ 'grey47': '#787878',
+ 'grey48': '#7a7a7a',
+ 'grey49': '#7d7d7d',
+ 'grey5': '#0d0d0d',
+ 'grey50': '#7f7f7f',
+ 'grey51': '#828282',
+ 'grey52': '#858585',
+ 'grey53': '#878787',
+ 'grey54': '#8a8a8a',
+ 'grey55': '#8c8c8c',
+ 'grey56': '#8f8f8f',
+ 'grey57': '#919191',
+ 'grey58': '#949494',
+ 'grey59': '#969696',
+ 'grey6': '#0f0f0f',
+ 'grey60': '#999999',
+ 'grey61': '#9c9c9c',
+ 'grey62': '#9e9e9e',
+ 'grey63': '#a1a1a1',
+ 'grey64': '#a3a3a3',
+ 'grey65': '#a6a6a6',
+ 'grey66': '#a8a8a8',
+ 'grey67': '#ababab',
+ 'grey68': '#adadad',
+ 'grey69': '#b0b0b0',
+ 'grey7': '#121212',
+ 'grey70': '#b3b3b3',
+ 'grey71': '#b5b5b5',
+ 'grey72': '#b8b8b8',
+ 'grey73': '#bababa',
+ 'grey74': '#bdbdbd',
+ 'grey75': '#bfbfbf',
+ 'grey76': '#c2c2c2',
+ 'grey77': '#c4c4c4',
+ 'grey78': '#c7c7c7',
+ 'grey79': '#c9c9c9',
+ 'grey8': '#141414',
+ 'grey80': '#cccccc',
+ 'grey81': '#cfcfcf',
+ 'grey82': '#d1d1d1',
+ 'grey83': '#d4d4d4',
+ 'grey84': '#d6d6d6',
+ 'grey85': '#d9d9d9',
+ 'grey86': '#dbdbdb',
+ 'grey87': '#dedede',
+ 'grey88': '#e0e0e0',
+ 'grey89': '#e3e3e3',
+ 'grey9': '#171717',
+ 'grey90': '#e5e5e5',
+ 'grey91': '#e8e8e8',
+ 'grey92': '#ebebeb',
+ 'grey93': '#ededed',
+ 'grey94': '#f0f0f0',
+ 'grey95': '#f2f2f2',
+ 'grey96': '#f5f5f5',
+ 'grey97': '#f7f7f7',
+ 'grey98': '#fafafa',
+ 'grey99': '#fcfcfc',
+ 'honeydew': '#f0fff0',
+ 'honeydew1': '#f0fff0',
+ 'honeydew2': '#e0eee0',
+ 'honeydew3': '#c1cdc1',
+ 'honeydew4': '#838b83',
+ 'hot': '#ff69b4',
+ 'hotpink': '#ff69b4',
+ 'hotpink1': '#ff6eb4',
+ 'hotpink2': '#ee6aa7',
+ 'hotpink3': '#cd6090',
+ 'hotpink4': '#8b3a62',
+ 'indian': '#cd5c5c',
+ 'indianred': '#cd5c5c',
+ 'indianred1': '#ff6a6a',
+ 'indianred2': '#ee6363',
+ 'indianred3': '#cd5555',
+ 'indianred4': '#8b3a3a',
+ 'ivory': '#fffff0',
+ 'ivory1': '#fffff0',
+ 'ivory2': '#eeeee0',
+ 'ivory3': '#cdcdc1',
+ 'ivory4': '#8b8b83',
+ 'khaki': '#f0e68c',
+ 'khaki1': '#fff68f',
+ 'khaki2': '#eee685',
+ 'khaki3': '#cdc673',
+ 'khaki4': '#8b864e',
+ 'lavender': '#fff0f5',
+ 'lavenderblush': '#fff0f5',
+ 'lavenderblush1': '#fff0f5',
+ 'lavenderblush2': '#eee0e5',
+ 'lavenderblush3': '#cdc1c5',
+ 'lavenderblush4': '#8b8386',
+ 'lawn': '#7cfc00',
+ 'lawngreen': '#7cfc00',
+ 'lemon': '#fffacd',
+ 'lemonchiffon': '#fffacd',
+ 'lemonchiffon1': '#fffacd',
+ 'lemonchiffon2': '#eee9bf',
+ 'lemonchiffon3': '#cdc9a5',
+ 'lemonchiffon4': '#8b8970',
+ 'light': '#90ee90',
+ 'lightblue': '#add8e6',
+ 'lightblue1': '#bfefff',
+ 'lightblue2': '#b2dfee',
+ 'lightblue3': '#9ac0cd',
+ 'lightblue4': '#68838b',
+ 'lightcoral': '#f08080',
+ 'lightcyan': '#e0ffff',
+ 'lightcyan1': '#e0ffff',
+ 'lightcyan2': '#d1eeee',
+ 'lightcyan3': '#b4cdcd',
+ 'lightcyan4': '#7a8b8b',
+ 'lightgoldenrod': '#eedd82',
+ 'lightgoldenrod1': '#ffec8b',
+ 'lightgoldenrod2': '#eedc82',
+ 'lightgoldenrod3': '#cdbe70',
+ 'lightgoldenrod4': '#8b814c',
+ 'lightgoldenrodyellow': '#fafad2',
+ 'lightgray': '#d3d3d3',
+ 'lightgreen': '#90ee90',
+ 'lightgrey': '#d3d3d3',
+ 'lightpink': '#ffb6c1',
+ 'lightpink1': '#ffaeb9',
+ 'lightpink2': '#eea2ad',
+ 'lightpink3': '#cd8c95',
+ 'lightpink4': '#8b5f65',
+ 'lightsalmon': '#ffa07a',
+ 'lightsalmon1': '#ffa07a',
+ 'lightsalmon2': '#ee9572',
+ 'lightsalmon3': '#cd8162',
+ 'lightsalmon4': '#8b5742',
+ 'lightseagreen': '#20b2aa',
+ 'lightskyblue': '#87cefa',
+ 'lightskyblue1': '#b0e2ff',
+ 'lightskyblue2': '#a4d3ee',
+ 'lightskyblue3': '#8db6cd',
+ 'lightskyblue4': '#607b8b',
+ 'lightslateblue': '#8470ff',
+ 'lightslategray': '#778899',
+ 'lightslategrey': '#778899',
+ 'lightsteelblue': '#b0c4de',
+ 'lightsteelblue1': '#cae1ff',
+ 'lightsteelblue2': '#bcd2ee',
+ 'lightsteelblue3': '#a2b5cd',
+ 'lightsteelblue4': '#6e7b8b',
+ 'lightyellow': '#ffffe0',
+ 'lightyellow1': '#ffffe0',
+ 'lightyellow2': '#eeeed1',
+ 'lightyellow3': '#cdcdb4',
+ 'lightyellow4': '#8b8b7a',
+ 'lime': '#32cd32',
+ 'limegreen': '#32cd32',
+ 'linen': '#faf0e6',
+ 'magenta': '#ff00ff',
+ 'magenta1': '#ff00ff',
+ 'magenta2': '#ee00ee',
+ 'magenta3': '#cd00cd',
+ 'magenta4': '#8b008b',
+ 'maroon': '#b03060',
+ 'maroon1': '#ff34b3',
+ 'maroon2': '#ee30a7',
+ 'maroon3': '#cd2990',
+ 'maroon4': '#8b1c62',
+ 'medium': '#9370db',
+ 'mediumaquamarine': '#66cdaa',
+ 'mediumblue': '#0000cd',
+ 'mediumorchid': '#ba55d3',
+ 'mediumorchid1': '#e066ff',
+ 'mediumorchid2': '#d15fee',
+ 'mediumorchid3': '#b452cd',
+ 'mediumorchid4': '#7a378b',
+ 'mediumpurple': '#9370db',
+ 'mediumpurple1': '#ab82ff',
+ 'mediumpurple2': '#9f79ee',
+ 'mediumpurple3': '#8968cd',
+ 'mediumpurple4': '#5d478b',
+ 'mediumseagreen': '#3cb371',
+ 'mediumslateblue': '#7b68ee',
+ 'mediumspringgreen': '#00fa9a',
+ 'mediumturquoise': '#48d1cc',
+ 'mediumvioletred': '#c71585',
+ 'midnight': '#191970',
+ 'midnightblue': '#191970',
+ 'mint': '#f5fffa',
+ 'mintcream': '#f5fffa',
+ 'misty': '#ffe4e1',
+ 'mistyrose': '#ffe4e1',
+ 'mistyrose1': '#ffe4e1',
+ 'mistyrose2': '#eed5d2',
+ 'mistyrose3': '#cdb7b5',
+ 'mistyrose4': '#8b7d7b',
+ 'moccasin': '#ffe4b5',
+ 'navajo': '#ffdead',
+ 'navajowhite': '#ffdead',
+ 'navajowhite1': '#ffdead',
+ 'navajowhite2': '#eecfa1',
+ 'navajowhite3': '#cdb38b',
+ 'navajowhite4': '#8b795e',
+ 'navy': '#000080',
+ 'navyblue': '#000080',
+ 'old': '#fdf5e6',
+ 'oldlace': '#fdf5e6',
+ 'olive': '#6b8e23',
+ 'olivedrab': '#6b8e23',
+ 'olivedrab1': '#c0ff3e',
+ 'olivedrab2': '#b3ee3a',
+ 'olivedrab3': '#9acd32',
+ 'olivedrab4': '#698b22',
+ 'orange': '#ff4500',
+ 'orange1': '#ffa500',
+ 'orange2': '#ee9a00',
+ 'orange3': '#cd8500',
+ 'orange4': '#8b5a00',
+ 'orangered': '#ff4500',
+ 'orangered1': '#ff4500',
+ 'orangered2': '#ee4000',
+ 'orangered3': '#cd3700',
+ 'orangered4': '#8b2500',
+ 'orchid': '#da70d6',
+ 'orchid1': '#ff83fa',
+ 'orchid2': '#ee7ae9',
+ 'orchid3': '#cd69c9',
+ 'orchid4': '#8b4789',
+ 'pale': '#db7093',
+ 'palegoldenrod': '#eee8aa',
+ 'palegreen': '#98fb98',
+ 'palegreen1': '#9aff9a',
+ 'palegreen2': '#90ee90',
+ 'palegreen3': '#7ccd7c',
+ 'palegreen4': '#548b54',
+ 'paleturquoise': '#afeeee',
+ 'paleturquoise1': '#bbffff',
+ 'paleturquoise2': '#aeeeee',
+ 'paleturquoise3': '#96cdcd',
+ 'paleturquoise4': '#668b8b',
+ 'palevioletred': '#db7093',
+ 'palevioletred1': '#ff82ab',
+ 'palevioletred2': '#ee799f',
+ 'palevioletred3': '#cd6889',
+ 'palevioletred4': '#8b475d',
+ 'papaya': '#ffefd5',
+ 'papayawhip': '#ffefd5',
+ 'peach': '#ffdab9',
+ 'peachpuff': '#ffdab9',
+ 'peachpuff1': '#ffdab9',
+ 'peachpuff2': '#eecbad',
+ 'peachpuff3': '#cdaf95',
+ 'peachpuff4': '#8b7765',
+ 'peru': '#cd853f',
+ 'pink': '#ffc0cb',
+ 'pink1': '#ffb5c5',
+ 'pink2': '#eea9b8',
+ 'pink3': '#cd919e',
+ 'pink4': '#8b636c',
+ 'plum': '#dda0dd',
+ 'plum1': '#ffbbff',
+ 'plum2': '#eeaeee',
+ 'plum3': '#cd96cd',
+ 'plum4': '#8b668b',
+ 'powder': '#b0e0e6',
+ 'powderblue': '#b0e0e6',
+ 'purple': '#a020f0',
+ 'purple1': '#9b30ff',
+ 'purple2': '#912cee',
+ 'purple3': '#7d26cd',
+ 'purple4': '#551a8b',
+ 'red': '#ff0000',
+ 'red1': '#ff0000',
+ 'red2': '#ee0000',
+ 'red3': '#cd0000',
+ 'red4': '#8b0000',
+ 'rosy': '#bc8f8f',
+ 'rosybrown': '#bc8f8f',
+ 'rosybrown1': '#ffc1c1',
+ 'rosybrown2': '#eeb4b4',
+ 'rosybrown3': '#cd9b9b',
+ 'rosybrown4': '#8b6969',
+ 'royal': '#4169e1',
+ 'royalblue': '#4169e1',
+ 'royalblue1': '#4876ff',
+ 'royalblue2': '#436eee',
+ 'royalblue3': '#3a5fcd',
+ 'royalblue4': '#27408b',
+ 'saddle': '#8b4513',
+ 'saddlebrown': '#8b4513',
+ 'salmon': '#fa8072',
+ 'salmon1': '#ff8c69',
+ 'salmon2': '#ee8262',
+ 'salmon3': '#cd7054',
+ 'salmon4': '#8b4c39',
+ 'sandy': '#f4a460',
+ 'sandybrown': '#f4a460',
+ 'sea': '#2e8b57',
+ 'seagreen': '#2e8b57',
+ 'seagreen1': '#54ff9f',
+ 'seagreen2': '#4eee94',
+ 'seagreen3': '#43cd80',
+ 'seagreen4': '#2e8b57',
+ 'seashell': '#fff5ee',
+ 'seashell1': '#fff5ee',
+ 'seashell2': '#eee5de',
+ 'seashell3': '#cdc5bf',
+ 'seashell4': '#8b8682',
+ 'sienna': '#a0522d',
+ 'sienna1': '#ff8247',
+ 'sienna2': '#ee7942',
+ 'sienna3': '#cd6839',
+ 'sienna4': '#8b4726',
+ 'sky': '#87ceeb',
+ 'skyblue': '#87ceeb',
+ 'skyblue1': '#87ceff',
+ 'skyblue2': '#7ec0ee',
+ 'skyblue3': '#6ca6cd',
+ 'skyblue4': '#4a708b',
+ 'slate': '#6a5acd',
+ 'slateblue': '#6a5acd',
+ 'slateblue1': '#836fff',
+ 'slateblue2': '#7a67ee',
+ 'slateblue3': '#6959cd',
+ 'slateblue4': '#473c8b',
+ 'slategray': '#708090',
+ 'slategray1': '#c6e2ff',
+ 'slategray2': '#b9d3ee',
+ 'slategray3': '#9fb6cd',
+ 'slategray4': '#6c7b8b',
+ 'slategrey': '#708090',
+ 'snow': '#fffafa',
+ 'snow1': '#fffafa',
+ 'snow2': '#eee9e9',
+ 'snow3': '#cdc9c9',
+ 'snow4': '#8b8989',
+ 'spring': '#00ff7f',
+ 'springgreen': '#00ff7f',
+ 'springgreen1': '#00ff7f',
+ 'springgreen2': '#00ee76',
+ 'springgreen3': '#00cd66',
+ 'springgreen4': '#008b45',
+ 'steel': '#4682b4',
+ 'steelblue': '#4682b4',
+ 'steelblue1': '#63b8ff',
+ 'steelblue2': '#5cacee',
+ 'steelblue3': '#4f94cd',
+ 'steelblue4': '#36648b',
+ 'tan': '#d2b48c',
+ 'tan1': '#ffa54f',
+ 'tan2': '#ee9a49',
+ 'tan3': '#cd853f',
+ 'tan4': '#8b5a2b',
+ 'thistle': '#d8bfd8',
+ 'thistle1': '#ffe1ff',
+ 'thistle2': '#eed2ee',
+ 'thistle3': '#cdb5cd',
+ 'thistle4': '#8b7b8b',
+ 'tomato': '#ff6347',
+ 'tomato1': '#ff6347',
+ 'tomato2': '#ee5c42',
+ 'tomato3': '#cd4f39',
+ 'tomato4': '#8b3626',
+ 'turquoise': '#40e0d0',
+ 'turquoise1': '#00f5ff',
+ 'turquoise2': '#00e5ee',
+ 'turquoise3': '#00c5cd',
+ 'turquoise4': '#00868b',
+ 'violet': '#ee82ee',
+ 'violetred': '#d02090',
+ 'violetred1': '#ff3e96',
+ 'violetred2': '#ee3a8c',
+ 'violetred3': '#cd3278',
+ 'violetred4': '#8b2252',
+ 'wheat': '#f5deb3',
+ 'wheat1': '#ffe7ba',
+ 'wheat2': '#eed8ae',
+ 'wheat3': '#cdba96',
+ 'wheat4': '#8b7e66',
+ 'white': '#ffffff',
+ 'whitesmoke': '#f5f5f5',
+ 'yellow': '#ffff00',
+ 'yellow1': '#ffff00',
+ 'yellow2': '#eeee00',
+ 'yellow3': '#cdcd00',
+ 'yellow4': '#8b8b00',
+ 'yellowgreen': '#9acd32'
+}
+
+TOKENS = {
+ 'normal': '',
+ 'string': 'String',
+ 'number': 'Number',
+ 'float': 'Number.Float',
+ 'constant': 'Name.Constant',
+ 'number': 'Number',
+ 'statement': ('Keyword', 'Name.Tag'),
+ 'identifier': 'Name.Variable',
+ 'operator': 'Operator.Word',
+ 'label': 'Name.Label',
+ 'exception': 'Name.Exception',
+ 'function': ('Name.Function', 'Name.Attribute'),
+ 'preproc': 'Comment.Preproc',
+ 'comment': 'Comment',
+ 'type': 'Keyword.Type',
+ 'diffadd': 'Generic.Inserted',
+ 'diffdelete': 'Generic.Deleted',
+ 'error': 'Generic.Error',
+ 'errormsg': 'Generic.Traceback',
+ 'title': ('Generic.Heading', 'Generic.Subheading'),
+ 'underlined': 'Generic.Emph',
+ 'special': 'Name.Entity',
+ 'nontext': 'Generic.Output'
+}
+
+TOKEN_TYPES = set()
+for token in TOKENS.values():
+ if not isinstance(token, tuple):
+ token = (token,)
+ for token in token:
+ if token:
+ TOKEN_TYPES.add(token.split('.')[0])
+
+
+def get_vim_color(color):
+ if color.startswith('#'):
+ if len(color) == 7:
+ return color
+ else:
+ return '#%s0' % '0'.join(color)[1:]
+ return COLORS.get(color.lower())
+
+
+def find_colors(code):
+ colors = {'Normal': {}}
+ bg_color = None
+ def set(attrib, value):
+ if token not in colors:
+ colors[token] = {}
+ if key.startswith('gui') or attrib not in colors[token]:
+ colors[token][attrib] = value
+
+ for line in code.splitlines():
+ if line.startswith('"'):
+ continue
+ parts = split_re.split(line.strip())
+ if len(parts) == 2 and parts[0] == 'set':
+ p = parts[1].split()
+ if p[0] == 'background' and p[1] == 'dark':
+ token = 'Normal'
+ bg_color = '#000000'
+ elif len(parts) > 2 and \
+ len(parts[0]) >= 2 and \
+ 'highlight'.startswith(parts[0]):
+ token = parts[1].lower()
+ if token not in TOKENS:
+ continue
+ for item in parts[2:]:
+ p = item.split('=', 1)
+ if not len(p) == 2:
+ continue
+ key, value = p
+ if key in ('ctermfg', 'guifg'):
+ color = get_vim_color(value)
+ if color:
+ set('color', color)
+ elif key in ('ctermbg', 'guibg'):
+ color = get_vim_color(value)
+ if color:
+ set('bgcolor', color)
+ elif key in ('term', 'cterm', 'gui'):
+ items = value.split(',')
+ for item in items:
+ item = item.lower()
+ if item == 'none':
+ set('noinherit', True)
+ elif item == 'bold':
+ set('bold', True)
+ elif item == 'underline':
+ set('underline', True)
+ elif item == 'italic':
+ set('italic', True)
+
+ if bg_color is not None and not colors['Normal'].get('bgcolor'):
+ colors['Normal']['bgcolor'] = bg_color
+
+ color_map = {}
+ for token, styles in colors.items():
+ if token in TOKENS:
+ tmp = []
+ if styles.get('noinherit'):
+ tmp.append('noinherit')
+ if 'color' in styles:
+ tmp.append(styles['color'])
+ if 'bgcolor' in styles:
+ tmp.append('bg:' + styles['bgcolor'])
+ if styles.get('bold'):
+ tmp.append('bold')
+ if styles.get('italic'):
+ tmp.append('italic')
+ if styles.get('underline'):
+ tmp.append('underline')
+ tokens = TOKENS[token]
+ if not isinstance(tokens, tuple):
+ tokens = (tokens,)
+ for token in tokens:
+ color_map[token] = ' '.join(tmp)
+
+ default_token = color_map.pop('')
+ return default_token, color_map
+
+
+class StyleWriter:
+
+ def __init__(self, code, name):
+ self.code = code
+ self.name = name.lower()
+
+ def write_header(self, out):
+ out.write('# -*- coding: utf-8 -*-\n"""\n')
+ out.write(' %s Colorscheme\n' % self.name.title())
+ out.write(' %s\n\n' % ('~' * (len(self.name) + 12)))
+ out.write(' Converted by %s\n' % SCRIPT_NAME)
+ out.write('"""\nfrom pygments.style import Style\n')
+ out.write('from pygments.token import Token, %s\n\n' % ', '.join(TOKEN_TYPES))
+ out.write('class %sStyle(Style):\n\n' % self.name.title())
+
+ def write(self, out):
+ self.write_header(out)
+ default_token, tokens = find_colors(self.code)
+ tokens = list(tokens.items())
+ tokens.sort(lambda a, b: cmp(len(a[0]), len(a[1])))
+ bg_color = [x[3:] for x in default_token.split() if x.startswith('bg:')]
+ if bg_color:
+ out.write(' background_color = %r\n' % bg_color[0])
+ out.write(' styles = {\n')
+ out.write(' %-20s%r,\n' % ('Token:', default_token))
+ for token, definition in tokens:
+ if definition:
+ out.write(' %-20s%r,\n' % (token + ':', definition))
+ out.write(' }')
+
+ def __repr__(self):
+ out = StringIO()
+ self.write_style(out)
+ return out.getvalue()
+
+
+def convert(filename, stream=None):
+ name = path.basename(filename)
+ if name.endswith('.vim'):
+ name = name[:-4]
+ f = file(filename)
+ code = f.read()
+ f.close()
+ writer = StyleWriter(code, name)
+ if stream is not None:
+ out = stream
+ else:
+ out = StringIO()
+ writer.write(out)
+ if stream is None:
+ return out.getvalue()
+
+
+def main():
+ if len(sys.argv) != 2 or sys.argv[1] in ('-h', '--help'):
+ print('Usage: %s <filename.vim>' % sys.argv[0])
+ return 2
+ if sys.argv[1] in ('-v', '--version'):
+ print('%s %s' % (SCRIPT_NAME, SCRIPT_VERSION))
+ return
+ filename = sys.argv[1]
+ if not (path.exists(filename) and path.isfile(filename)):
+ print('Error: %s not found' % filename)
+ return 1
+ convert(filename, sys.stdout)
+ sys.stdout.write('\n')
+
+
+if __name__ == '__main__':
+ sys.exit(main() or 0)