diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 11:33:32 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 11:33:32 +0000 |
commit | 1f403ad2197fc7442409f434ee574f3e6b46fb73 (patch) | |
tree | 0299c6dd11d5edfa918a29b6456bc1875f1d288c | |
parent | Initial commit. (diff) | |
download | pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.tar.xz pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.zip |
Adding upstream version 2.14.0+dfsg.upstream/2.14.0+dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
1029 files changed, 149790 insertions, 0 deletions
diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..a6c6adb --- /dev/null +++ b/.coveragerc @@ -0,0 +1,3 @@ +[run] +include = + pygments/* diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e08d9e1 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,16 @@ +doc +tests +Pygments.egg-info +TAGS +build +dist +htmlcov +venv +**/__pycache__ +.* +*.rst +*.egg +*.pyo +.*.sw[op] + +!/doc/pyodide/meta.yaml diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..44a295f --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +tests/examplefiles/*/*.output linguist-generated diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..2d26dfb --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,72 @@ +name: Pygments + +on: [push, pull_request] + +permissions: + contents: read # to fetch code (actions/checkout) + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest] + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"] + exclude: + - os: ubuntu-latest + python-version: "3.6" + include: + - os: ubuntu-20.04 + python-version: "3.6" + max-parallel: 4 + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install package + run: | + pip install -r requirements.txt + pip install . + - name: Test package + run: pytest -W error + + check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + - name: Run make check + run: make check + - name: Fail if the basic checks failed + run: make check + if: runner.os == 'Linux' + + check-mapfiles: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + - name: Regenerate mapfiles + run: make mapfiles + - name: Fail if mapfiles changed + run: | + if git ls-files -m | grep mapping; then + echo 'Please run "make mapfiles" and add the changes to a commit.' + exit 1 + fi + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Check out regexlint + run: git clone https://github.com/pygments/regexlint + - name: Run regexlint + run: make regexlint REGEXLINT=`pwd`/regexlint diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 0000000..41adf4e --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,40 @@ +name: Docs + +on: + push: + branches: + - master + +permissions: {} +jobs: + build: + permissions: + contents: write # to push pages branch (peaceiris/actions-gh-pages) + + runs-on: ubuntu-latest + steps: + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: "3.10" + - name: Checkout Pygments + uses: actions/checkout@v2 + - name: Install Sphinx & WCAG contrast ratio + run: pip install Sphinx wcag-contrast-ratio + - name: Create Pyodide WASM package + run: cd doc && make pyodide + - name: Sphinx build + run: | + cd doc + WEBSITE_BUILD=1 make dirhtml + touch _build/dirhtml/.nojekyll + echo -e 'pygments.org\nwww.pygments.org' > _build/dirhtml/CNAME + echo 'Automated deployment of docs for GitHub pages.' > _build/dirhtml/README + - name: Deploy to repo + if: github.repository_owner == 'pygments' + uses: peaceiris/actions-gh-pages@v3 + with: + deploy_key: ${{ secrets.ACTIONS_DEPLOY_KEY }} + external_repository: pygments/pygments.github.io + publish_branch: master + publish_dir: ./doc/_build/dirhtml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cbfddbe --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +*.egg +*.pyc +*.pyo +.*.sw[op] +/.pytest_cache/ +/.idea/ +/.project +/.tags +/.tox/ +/.cache/ +/Pygments.egg-info/* +/TAGS +/build/* +/dist/* +/doc/_build +/.coverage +/htmlcov +/.vscode +venv/ +.venv/ +.DS_Store @@ -0,0 +1,264 @@ +Pygments is written and maintained by Georg Brandl <georg@python.org>. + +Major developers are Tim Hatch <tim@timhatch.com> and Armin Ronacher +<armin.ronacher@active-4.com>. + +Other contributors, listed alphabetically, are: + +* Sam Aaron -- Ioke lexer +* Jean Abou Samra -- LilyPond lexer +* João Abecasis -- JSLT lexer +* Ali Afshar -- image formatter +* Thomas Aglassinger -- Easytrieve, JCL, Rexx, Transact-SQL and VBScript + lexers +* Muthiah Annamalai -- Ezhil lexer +* Kumar Appaiah -- Debian control lexer +* Andreas Amann -- AppleScript lexer +* Timothy Armstrong -- Dart lexer fixes +* Jeffrey Arnold -- R/S, Rd, BUGS, Jags, and Stan lexers +* EirÃkr Ã…sheim -- Uxntal lexer +* Jeremy Ashkenas -- CoffeeScript lexer +* José JoaquÃn Atria -- Praat lexer +* Stefan Matthias Aust -- Smalltalk lexer +* Lucas Bajolet -- Nit lexer +* Ben Bangert -- Mako lexers +* Max Battcher -- Darcs patch lexer +* Thomas Baruchel -- APL lexer +* Tim Baumann -- (Literate) Agda lexer +* Paul Baumgart, 280 North, Inc. -- Objective-J lexer +* Michael Bayer -- Myghty lexers +* Thomas Beale -- Archetype lexers +* John Benediktsson -- Factor lexer +* Trevor Bergeron -- mIRC formatter +* Vincent Bernat -- LessCSS lexer +* Christopher Bertels -- Fancy lexer +* Sébastien Bigaret -- QVT Operational lexer +* Jarrett Billingsley -- MiniD lexer +* Adam Blinkinsop -- Haskell, Redcode lexers +* Stéphane Blondon -- Procfile, SGF and Sieve lexers +* Frits van Bommel -- assembler lexers +* Pierre Bourdon -- bugfixes +* Martijn Braam -- Kernel log lexer, BARE lexer +* Matthias Bussonnier -- ANSI style handling for terminal-256 formatter +* chebee7i -- Python traceback lexer improvements +* Hiram Chirino -- Scaml and Jade lexers +* Mauricio Caceres -- SAS and Stata lexers. +* Ian Cooper -- VGL lexer +* David Corbett -- Inform, Jasmin, JSGF, Snowball, and TADS 3 lexers +* Leaf Corcoran -- MoonScript lexer +* Christopher Creutzig -- MuPAD lexer +* Daniël W. Crompton -- Pike lexer +* Pete Curry -- bugfixes +* Bryan Davis -- EBNF lexer +* Bruno Deferrari -- Shen lexer +* Walter Dörwald -- UL4 lexer +* Luke Drummond -- Meson lexer +* Giedrius Dubinskas -- HTML formatter improvements +* Owen Durni -- Haxe lexer +* Alexander Dutton, Oxford University Computing Services -- SPARQL lexer +* James Edwards -- Terraform lexer +* Nick Efford -- Python 3 lexer +* Sven Efftinge -- Xtend lexer +* Artem Egorkine -- terminal256 formatter +* Matthew Fernandez -- CAmkES lexer +* PaweÅ‚ Fertyk -- GDScript lexer, HTML formatter improvements +* Michael Ficarra -- CPSA lexer +* James H. Fisher -- PostScript lexer +* William S. Fulton -- SWIG lexer +* Carlos Galdino -- Elixir and Elixir Console lexers +* Michael Galloy -- IDL lexer +* Naveen Garg -- Autohotkey lexer +* Simon Garnotel -- FreeFem++ lexer +* Laurent Gautier -- R/S lexer +* Alex Gaynor -- PyPy log lexer +* Richard Gerkin -- Igor Pro lexer +* Alain Gilbert -- TypeScript lexer +* Alex Gilding -- BlitzBasic lexer +* GitHub, Inc -- DASM16, Augeas, TOML, and Slash lexers +* Bertrand Goetzmann -- Groovy lexer +* Krzysiek Goj -- Scala lexer +* Rostyslav Golda -- FloScript lexer +* Andrey Golovizin -- BibTeX lexers +* Matt Good -- Genshi, Cheetah lexers +* MichaÅ‚ Górny -- vim modeline support +* Alex Gosse -- TrafficScript lexer +* Patrick Gotthardt -- PHP namespaces support +* Hubert Gruniaux -- C and C++ lexer improvements +* Olivier Guibe -- Asymptote lexer +* Phil Hagelberg -- Fennel lexer +* Florian Hahn -- Boogie lexer +* Martin Harriman -- SNOBOL lexer +* Matthew Harrison -- SVG formatter +* Steven Hazel -- Tcl lexer +* Dan Michael Heggø -- Turtle lexer +* Aslak Hellesøy -- Gherkin lexer +* Greg Hendershott -- Racket lexer +* Justin Hendrick -- ParaSail lexer +* Jordi Gutiérrez Hermoso -- Octave lexer +* David Hess, Fish Software, Inc. -- Objective-J lexer +* Ken Hilton -- Typographic Number Theory and Arrow lexers +* Varun Hiremath -- Debian control lexer +* Rob Hoelz -- Perl 6 lexer +* Doug Hogan -- Mscgen lexer +* Ben Hollis -- Mason lexer +* Max Horn -- GAP lexer +* Fred Hornsey -- OMG IDL Lexer +* Alastair Houghton -- Lexer inheritance facility +* Tim Howard -- BlitzMax lexer +* Dustin Howett -- Logos lexer +* Ivan Inozemtsev -- Fantom lexer +* Hiroaki Itoh -- Shell console rewrite, Lexers for PowerShell session, + MSDOS session, BC, WDiff +* Brian R. Jackson -- Tea lexer +* Christian Jann -- ShellSession lexer +* Dennis Kaarsemaker -- sources.list lexer +* Dmitri Kabak -- Inferno Limbo lexer +* Igor Kalnitsky -- vhdl lexer +* Colin Kennedy - USD lexer +* Alexander Kit -- MaskJS lexer +* Pekka Klärck -- Robot Framework lexer +* Gerwin Klein -- Isabelle lexer +* Eric Knibbe -- Lasso lexer +* Stepan Koltsov -- Clay lexer +* Oliver Kopp - Friendly grayscale style +* Adam Koprowski -- Opa lexer +* Benjamin Kowarsch -- Modula-2 lexer +* Domen Kožar -- Nix lexer +* Oleh Krekel -- Emacs Lisp lexer +* Alexander Kriegisch -- Kconfig and AspectJ lexers +* Marek Kubica -- Scheme lexer +* Jochen Kupperschmidt -- Markdown processor +* Gerd Kurzbach -- Modelica lexer +* Jon Larimer, Google Inc. -- Smali lexer +* Olov Lassus -- Dart lexer +* Matt Layman -- TAP lexer +* Kristian Lyngstøl -- Varnish lexers +* Sylvestre Ledru -- Scilab lexer +* Chee Sing Lee -- Flatline lexer +* Mark Lee -- Vala lexer +* Pete Lomax -- Phix lexer +* Valentin Lorentz -- C++ lexer improvements +* Ben Mabey -- Gherkin lexer +* Angus MacArthur -- QML lexer +* Louis Mandel -- X10 lexer +* Louis Marchand -- Eiffel lexer +* Simone Margaritelli -- Hybris lexer +* Tim Martin - World of Warcraft TOC lexer +* Kirk McDonald -- D lexer +* Gordon McGregor -- SystemVerilog lexer +* Stephen McKamey -- Duel/JBST lexer +* Brian McKenna -- F# lexer +* Charles McLaughlin -- Puppet lexer +* Kurt McKee -- Tera Term macro lexer, PostgreSQL updates, MySQL overhaul, JSON lexer +* Joe Eli McIlvain -- Savi lexer +* Lukas Meuser -- BBCode formatter, Lua lexer +* Cat Miller -- Pig lexer +* Paul Miller -- LiveScript lexer +* Hong Minhee -- HTTP lexer +* Michael Mior -- Awk lexer +* Bruce Mitchener -- Dylan lexer rewrite +* Reuben Morais -- SourcePawn lexer +* Jon Morton -- Rust lexer +* Paulo Moura -- Logtalk lexer +* Mher Movsisyan -- DTD lexer +* Dejan Muhamedagic -- Crmsh lexer +* Ana Nelson -- Ragel, ANTLR, R console lexers +* Kurt Neufeld -- Markdown lexer +* Nam T. Nguyen -- Monokai style +* Jesper Noehr -- HTML formatter "anchorlinenos" +* Mike Nolta -- Julia lexer +* Avery Nortonsmith -- Pointless lexer +* Jonas Obrist -- BBCode lexer +* Edward O'Callaghan -- Cryptol lexer +* David Oliva -- Rebol lexer +* Pat Pannuto -- nesC lexer +* Jon Parise -- Protocol buffers and Thrift lexers +* Benjamin Peterson -- Test suite refactoring +* Ronny Pfannschmidt -- BBCode lexer +* Dominik Picheta -- Nimrod lexer +* Andrew Pinkham -- RTF Formatter Refactoring +* Clément Prévost -- UrbiScript lexer +* Tanner Prynn -- cmdline -x option and loading lexers from files +* Oleh Prypin -- Crystal lexer (based on Ruby lexer) +* Nick Psaris -- K and Q lexers +* Xidorn Quan -- Web IDL lexer +* Elias Rabel -- Fortran fixed form lexer +* raichoo -- Idris lexer +* Daniel Ramirez -- GDScript lexer +* Kashif Rasul -- CUDA lexer +* Nathan Reed -- HLSL lexer +* Justin Reidy -- MXML lexer +* Norman Richards -- JSON lexer +* Corey Richardson -- Rust lexer updates +* Fabrizio Riguzzi -- cplint leder +* Lubomir Rintel -- GoodData MAQL and CL lexers +* Andre Roberge -- Tango style +* Georg Rollinger -- HSAIL lexer +* Michiel Roos -- TypoScript lexer +* Konrad Rudolph -- LaTeX formatter enhancements +* Mario Ruggier -- Evoque lexers +* Miikka Salminen -- Lovelace style, Hexdump lexer, lexer enhancements +* Stou Sandalski -- NumPy, FORTRAN, tcsh and XSLT lexers +* Matteo Sasso -- Common Lisp lexer +* Joe Schafer -- Ada lexer +* Max Schillinger -- TiddlyWiki5 lexer +* Ken Schutte -- Matlab lexers +* René Schwaiger -- Rainbow Dash style +* Sebastian Schweizer -- Whiley lexer +* Tassilo Schweyer -- Io, MOOCode lexers +* Pablo Seminario -- PromQL lexer +* Ted Shaw -- AutoIt lexer +* Joerg Sieker -- ABAP lexer +* Robert Simmons -- Standard ML lexer +* Kirill Simonov -- YAML lexer +* Corbin Simpson -- Monte lexer +* Ville Skyttä -- ASCII armored lexer +* Alexander Smishlajev -- Visual FoxPro lexer +* Steve Spigarelli -- XQuery lexer +* Jerome St-Louis -- eC lexer +* Camil Staps -- Clean and NuSMV lexers; Solarized style +* James Strachan -- Kotlin lexer +* Tom Stuart -- Treetop lexer +* Colin Sullivan -- SuperCollider lexer +* Ben Swift -- Extempore lexer +* tatt61880 -- Kuin lexer +* Edoardo Tenani -- Arduino lexer +* Tiberius Teng -- default style overhaul +* Jeremy Thurgood -- Erlang, Squid config lexers +* Brian Tiffin -- OpenCOBOL lexer +* Bob Tolbert -- Hy lexer +* Doug Torrance -- Macaulay2 lexer +* Matthias Trute -- Forth lexer +* Tuoa Spi T4 -- Bdd lexer +* Erick Tryzelaar -- Felix lexer +* Alexander Udalov -- Kotlin lexer improvements +* Thomas Van Doren -- Chapel lexer +* Daniele Varrazzo -- PostgreSQL lexers +* Abe Voelker -- OpenEdge ABL lexer +* Pepijn de Vos -- HTML formatter CTags support +* Matthias Vallentin -- Bro lexer +* Benoît Vinot -- AMPL lexer +* Linh Vu Hong -- RSL lexer +* Immanuel Washington -- Smithy lexer +* Nathan Weizenbaum -- Haml and Sass lexers +* Nathan Whetsell -- Csound lexers +* Dietmar Winkler -- Modelica lexer +* Nils Winter -- Smalltalk lexer +* Davy Wybiral -- Clojure lexer +* Whitney Young -- ObjectiveC lexer +* Diego Zamboni -- CFengine3 lexer +* Enrique Zamudio -- Ceylon lexer +* Alex Zimin -- Nemerle lexer +* Rob Zimmerman -- Kal lexer +* Vincent Zurczak -- Roboconf lexer +* Hubert Gruniaux -- C and C++ lexer improvements +* Thomas Symalla -- AMDGPU Lexer +* 15b3 -- Image Formatter improvements +* Fabian Neumann -- CDDL lexer +* Thomas Duboucher -- CDDL lexer +* Philipp Imhof -- Pango Markup formatter +* Thomas Voss -- Sed lexer +* Martin Fischer -- WCAG contrast testing +* Marc Auberer -- Spice lexer + +Many thanks for all contributions! @@ -0,0 +1,2134 @@ +Pygments changelog +================== + +Pull request numbers before 2.4.2 are not linked as they refer to the now defunct Bitbucket project. + +Version 2.14.0 +-------------- +(released January 1st, 2023) + +- Added lexers: + + * Arturo (#2259) + * GAP session (#2211) + * Fift (#2249) + * func (#2232) + * Jsonnet (#2239) + * Minecraft schema (#2276) + * MIPS (#2228) + * Phix (#2222) + * Portugol (#2300) + * TL-b (#2247) + * World of Warcraft TOC format (#2244, #2245) + * Wren (#2271) + +- Updated lexers: + + * Abap: Update keywords (#2281) + * Alloy: Update for Alloy 6 (#1963) + * C family (C, C++ and many others): + + - Fix an issue where a chunk would be wrongly recognized as a function + definition due to braces in comments (#2210) + - Improve parantheses handling for function definitions (#2207, #2208) + + * C#: Fix number and operator recognition (#2256, #2257) + * CSound: Updated builtins (#2268) + * F#: Add ``.fsx`` file extension (#2282) + * gas (GNU assembler): recognize braces as punctuation (#2230) + * HTTP: Add `CONNECT` keyword (#2242) + * Inform 6: Fix lexing of properties and doubles (#2214) + * INI: Allow comments that are not their own line (#2217, #2161) + * Java properties: Fix issue with whitespace-delimited keys, support + comments starting with `!` and escapes, no longer support undocumented + `;` and `//` comments (#2241) + * LilyPond: Improve heuristics, add ``\maxima`` duration (#2283) + * LLVM: Add opaque pointer type (#2269) + * Macaulay2: Update keywords (#2305) + * Minecraft-related lexers (SNB and Minecraft function) moved to + ``pygments.lexers.minecraft`` (#2276) + * Nim: General improvements (#1970) + * Nix: Fix single quotes inside indented strings (#2289) + * Objective J: Fix catastrophic backtracking (#2225) + * NASM: Add support for SSE/AVX/AVX-512 registers as well as 'rel' + and 'abs' address operators (#2212) + * Powershell: + + - Add ``local:`` keyword (#2254) + - Allow continuations without markers (#2262, #2263) + + * Solidity: Add boolean operators (#2292) + * Spice: Add ``enum`` keyword and fix a bug regarding binary, + hexadecimal and octal number tokens (#2227) + * YAML: Accept colons in key names (#2277) + +- Fix `make mapfiles` when Pygments is not installed in editable mode + (#2223) + +- Support more filetypes and compression types in `autopygmentize` (#2219) +- Merge consecutive tokens in Autohotkey, Clay (#2248) +- Add ``.nasm`` as a recognized file type for NASM (#2280) +- Add ``*Spec.hs`` as a recognized file type for ``HSpec`` (#2308) +- Add ``*.pyi`` (for typing stub files) as a recognized file type for + Python (#2331) +- The HTML lexer no longer emits empty spans for whitespace (#2304) +- Fix ``IRCFormatter`` inserting linenumbers incorrectly (#2270) + +Version 2.13.0 +-------------- +(released August 15th, 2022) + +- Added lexers: + + * COMAL-80 (#2180) + * JMESPath (#2174, #2175, #2179, #2182) + * Sql+Jinja (#2148) + +- Updated lexers: + + * Ada: support Ada 2022 (#2121); disable recognition of namespaces + because it disturbs lexing of aspects (#2125) + * Agda: allow straight quotes in module names (#2163) + * C family (C, C++ and many others): allow comments between + elements of function headers, e.g. between the arguments and + the opening brace for the body (#1891) + * C++: Resolve several cases of ``Error`` tokens (#2207, #2208) + * Coq: Add some common keywords, improve recognition of ``Set`` + and qualified identifiers (#2158) + * F*: Allow C-style comments anywhere in a line + * Fortran: Fix catastrophic backtracking with backslashes in strings + (#2194) + * Go: add support for generics (#2167) + * Inform: Update for version 6.40 (#2190) + * Isabelle: recognize cartouches (#2089) + * Java: support multiline strings aka. text blocks (#2132) + * Kotlin: Add ``value`` modifier (#2142) + * LilyPond: Add some missing builtins + * Macaulay2: Update builtins (#2139) + * Matlab session: fix traceback when a line continuation ellipsis + appears in the output (#2166) + * .NET: Add aliases for LibreOffice Basic, OpenOfficeBasic and + StarOffice Basic (#2170) + * Nim: Use ``Name.Builtin`` instead of ``Keyword.Type`` (#2136) + * PHP: fix `\"$var\"` inside strings (#2105) + * Python: only recognize ``\N``, ``\u`` and ``\U`` escape sequences + in string literals, but not in bytes literals where they are + not supported (#2204) + * Tcl: support ``${name}`` variables (#2145) + * Terraform: Accept leading whitespace for `<<` heredoc + delimiters (#2162) + * Teraterm: Various improvements (#2165) + * Spice: add support for the recently added features including more + builtin functions and bin, oct, hex number formats (#2206) + +- Added styles: + + * GitHub dark (#2192) + * StarOffice (#2168) + * Nord (`nord` and `nord-darker`; #2189, #1799, #1678) + +- Pygments now tries to use the ``importlib.metadata`` module to + discover plugins instead of the slower ``pkg_resources`` (#2155). In + particular, this largely speeds up the ``pygmentize`` script when + the lexer is not specified. + + ``importlib.metadata`` is only available in the Python standard + library since Python 3.8. For older versions, there exists an + ``importlib_metadata`` backport on PyPI. For this reason, Pygments + now defines a packaging extra ``plugins``, which adds a requirement + on ``importlib_metadata`` if the Python version is older than + 3.8. Thus, in order to install Pygments with optimal plugin + support even for old Python versions, you should do:: + + pip install pygments[plugins] + + Pygments still falls back on ``pkg_resources`` if neither + ``importlib.metadata`` nor ``importlib_metadata`` is found, but it + will be slower. + +- Silently ignore ``BrokenPipeError`` in the command-line interface + (#2193). +- The ``HtmlFormatter`` now uses the ``linespans`` attribute for + ``anchorlinenos`` if the ``lineanchors`` attribute is unset (#2026). +- The ``highlight``, ``lex`` and ``format`` functions no longer + wrongly report "argument must be a lexer/formatter instance, not a + class" in some cases where this is not the actual problem (#2123). +- Fix warnings in doc build (#2124). +- The ``codetagify`` filter now recognizes ``FIXME`` tags by default (#2150). +- The ``pygmentize`` command now recognizes if the ``COLORTERM`` + environment variable is set to a value indicating that true-color + support is available. In that case, it uses the ``TerminalTrueColorFormatter`` + by default (#2160) +- Remove redundant caches for filename patterns (#2153) +- Use new non-deprecated Pillow API for text bounding box in ``ImageFormatter`` + (#2198) +- Remove ``default_style`` (#930, #2183) +- Stop treating ``DeprecationWarnings`` as errors in the unit tests (#2196) + +Version 2.12.0 +-------------- +(released April 24th, 2022) + +- Added lexers: + + * Berry (#2070) + * Cplint (#2045) + * Macaulay2 (#1791) + * MCFunction (#2107) + * Minecraft (#2107) + * Qlik (#1925) + * ``UnixConfigLexer`` for "colon-separated" config files, like ``/etc/passwd`` (#2112) + * Uxntal (#2086) + * K and Q (#2073) + +- Updated lexers: + + * Agda: Update keyword list (#2017) + * C family: Fix identifiers after ``case`` statements (#2084) + * Clojure: Highlight ratios (#2042) + * Csound: Update to 6.17 (#2064) + * CSS: Update the list of properties (#2113) + * Elpi: + + - Fix catastrophic backtracking (#2053, #2061) + - Fix handling of ``->`` (#2028) + + * Futhark: Add missing tokens (#2118) + * Gherkin: Add ``But`` (#2046) + * Inform6: Update to 6.36 (#2050) + * Jinja2: add ``.xxx.j2`` and ``.xxx.jinja2`` to relevant lexers + (for ``xxx`` = ``html``, ``xml``, etc.) (#2103) + * JSON: Support C comments in JSON (#2049). Note: This doesn't mean the JSON parser now supports JSONC or JSON5 proper, just that it doesn't error out when seeing a ``/* */`` or ``//`` style comment. If you need proper comment handling, consider using the ``JavaScript`` lexer. + * LilyPond: + + - Fix incorrect lexing of names containing a built-in (#2071) + - Fix properties containing dashes (#2099) + + * PHP: Update builtin function and keyword list (#2054, #2056) + * Python: highlight ``EncodingWarning`` (#2106) + * Savi: fix highlighting for underscore/private identifiers, + add string interpolation (#2102); fix nested type name highlighting + (#2110) + * Scheme: Various improvements (#2060) + * Spice: Update the keyword list, add new types (#2063, #2067) + * Terraform: + + - Support non-idiomatic comments (#2065, #2066) + - Fix class name lexing (#2097) + +- Add ``plugins`` argument to ``get_all_lexers()``. +- Bump minimal Python version to 3.6 (#2059) +- Fix multiple lexers marking whitespace as ``Text`` (#2025) +- Remove various redundant uses of ``re.UNICODE`` (#2058) +- Associate ``.resource`` with the Robot framework (#2047) +- Associate ``.cljc`` with Clojure (#2043) +- Associate ``.tpp`` with C++ (#2031) +- Remove traces of Python 2 from the documentation (#2039) +- The ``native`` style was updated to meet the WCAG AAA contrast guidelines (#2038) +- Fix various typos (#2030) +- Fix ``Groff`` formatter not inheriting token styles correctly (#2024) +- Various improvements to the CI (#2036) +- The Ada lexer has been moved to a separate file (#2117) +- When ``linenos=table`` is used, the ``<table>`` itself is now wrapped with a ``<div class="highlight">`` tag instead of placing it inside the ``<td class="code">`` cell (#632.) With this change, the output matches the documented behavior. + +.. note:: + + If you have subclassed ``HtmlFormatter.wrap``, you may have to adjust the logic. + + +Version 2.11.2 +-------------- +(released January 6th, 2022) + +- Updated lexers: + + * C-family: Fix incorrect handling of labels (#2022, #1996, #1182) + * Java: Fixed an issue with ``record`` keywords result in ``Error`` tokens in some cases (#2016, #2018) + +- Fix links to line numbers not working correctly (#2014) +- Remove ``underline`` from ``Whitespace`` style in the ``Tango`` theme (#2020) +- Fix ``IRC`` and ``Terminal256`` formatters not backtracking correctly for custom token types, resulting in some unstyled tokens (#1986) + + +Version 2.11.1 +-------------- +(released December 31st, 2021) + +- Updated lexers: + + * C-family: Handle return types with multiple tokens (e.g. ``unsigned int``) (#2008) + * JSON: Fix a regression which caused whitespace before ``:`` to result in ``Error`` tokens (#2010) + * SPICE: Various improvements (#2009) + + +Version 2.11.0 +-------------- +(released December 30th, 2021) + +- Added lexers: + + * BDD (#1803) + * Elpi (#1894) + * LilyPond (#1845, #1968, #1971, #2001). This comes with a custom style as well. + * Maxima (#1885) + * Rita (#1541, #2003) + * Savi (#1863) + * Sed (#1935) + * Sophia contracts (#1974) + * Spice (#1980) + * ``.SRCINFO`` (#1951) + +- Updated lexers: + + * ABNF: Allow one-character rules (#1804) + * Assembly: Fix incorrect token endings (#1895, #1961) + * Bibtex: Distinguish between ``comment`` and ``commentary`` (#1899, #1806) + * C family: Support unicode identifiers (#1848) + * CDDL: Fix slow lexing speed (#1959) + * Debian control: Add missing fields (#1946) + * Devicetree: Recognize hexadecimal addresses for nodes (#1949) + * GDScript: Add ``void`` data type (#1948) + * GSQL + + - Fix comment handling (#2002) + - Fix catastrophic backtracking (#2006) + + * HTML, XML: Improve comment handling (#1896) + * Java: Add ``yield`` (#1941) and sealed classes/record (#1902) + * Makefiles (#1860, #1898) + * objdump-nasm: Improve handling of ``--no-show-raw-insn`` dumps (#1981) + * Prolog: Support escaped ``\`` inside quoted strings (#1479) + * Python: + + - Support ``~`` in tracebacks (#2004) + - Support the pattern matching keywords (#1797, #1994) + + * RobotFramework: Improve empty brace handling (#1921, #1922) + * Terraform + + - Add the 'set' type (#1909) + - Support heredocs (#1909) + +- Added styles: + + * Dracula (#1796) + * Friendly Grayscale (#1040, #1273) + * LilyPond (#1845) -- to be used for the ``LilyPond`` language. + * One-Dark (#1924, #1979) + +.. note:: + + All of the new styles unfortunately do not conform to WCAG recommendations. + +- There is new infrastructure in place to improve style accessibility. The default style has been updated to conform to WCAG recommendations. All styles are now checked for sufficient contrast by default to prevent regressions. (#1919, #1937, #1938, #1940) +- Clean up unused imports (#1887) +- Fix multiple lexers producing repeated single-character tokens +- Fix multiple lexers marking whitespace as ``Text`` (#1237, #1905, #1908, #1914, #1911, #1923, #1939, #1957, #1978) +- Remove duplicated assignments in the Paraiso style (#1934) +- ``pygmentize`` supports JSON output for the various list functions now, making it easier to consume them from scripts. (#1437, #1890) +- Use the ``shell`` lexer for ``kshrc`` files (#1947) +- Use the ``ruby`` lexer for ``Vagrantfile`` files (#1936) +- Use the C lexer for ``.xbm`` and ``.xpm`` files (#1802) +- Add a ``groff`` formatter (#1873) +- Update documentation (#1928) +- Line anchors now link to themselves (#1973) +- Add official support for Python 3.10 (#1917) +- Fix several missing colors in dark styles: Gruvbox dark, Monokai, Rrt, Sas, Strata dark (#1955) +- Associate more file types with ``man`` pages +- The ``HtmlFormatter`` can now emit tooltips for each token to ease debugging of lexers (#1822) +- Add ``f90`` as an alias for ``fortran`` (#2000) + + +Version 2.10.0 +-------------- +(released August 15th, 2021) + +- Added lexers: + + * ASC armored files (#1807) + * GSQL (#1809, #1866) + * Javascript REPL (#1825) + * procfile (#1808) + * Smithy (#1878, #1879) + +- Updated lexers: + + * C-family: Fix preprocessor token issues (#1830) + * C# (#1573, #1869) + * CSound (#1837) + * Fennel (#1862) + * JavaScript (#1741, #1814) + * LLVM (#1824) + * Python (#1852) + * Rust + + - Fix lexing of "break" and "continue" (#1843) + - Improve attribute handling (#1813) + + * Scala: Add support for the ``\`` operator (#1857) + * Swift (#1767, #1842) + * Tcl: Allow ``,`` and ``@`` in strings (#1834, #1742) + * TOML (#1870, #1872) + +- Fix assert statements in TNT lexer. +- Token types across all lexers have been unified (using the most common token + type name) (#1816, #1819) +- Improve Jasmin min score analysis (#1619) +- Add new alias for Go files (#1827) +- Fix multi-line console highlighting (#1833) +- Add a new trivial lexer which outputs everything as `Text.Generic.Output` (#1835, #1836) +- Use the ``.ini`` lexer for ``systemd`` files (#1849) +- Fix a ``FutureWarning`` related to ``words()`` (#1854) +- ``pwsh`` is now recognized as an alias for PowerShell (#1876) + + +Version 2.9.0 +------------- +(released May 3rd, 2021) + +- Added lexers: + + * APDL, gcode (#1714) + * Kuin (#1300) + * NestedText (#1578) + * OMG IDL (#1595) + * TEAL (#1671) + * ThingsDB (#1295) + * WebAssembly (#1416, #1564) + +- Updated lexers: + + * AMDGPU (#1717, #1775) + * APL (#1747) + * C/C++: Improve namespace handling (#1722, #1561, #1719, #1746) + * Chapel (#1743) + * Coq (#1721) + * Cython (#853) + * DeviceTree (#1755) + * Groovy (#1765) + * Julia (#1715) + * Octave: Allow multiline and block-percent comments (#1726) + * PowerShell: Improve lexing of ``:`` (#1682, #1758) + * PromQL (#1783) + * Python: Improve float parsing (#1768, #1740) + * Rust (#1061) + * Scala: Rewrite to support Scala3 (#1694, #1035, #1121) + * Terraform: Support 0.14 syntax (#1756) + * Velocity: Detect multi-line patterns (#1776) + +- Add Pango formatter (#1727) +- Autopygmentize uses ``file`` first instead of ``pygments -N`` (#1786) +- Fix links (#1716) +- Fix issue with LaTeX formatter and ``minted`` (#1734, #1735, #1736, #1737) +- Improve alias order (#1780) +- Improve line number colors (#1779, #1778) +- Fix CTag related issue (#1724) +- Recognize ``.leex`` as Elixir templates +- Fix incorrect variable being accessed (#1748) + +- Updated `filename` handling in HTML formatter if `linenos='table'` (#1757) + + * Previously the filename would be emitted within the `<td>` holding the + code, but outside the `<pre>`. This would invariably break the alignment + with line numbers. + * Now if `filename` is specified, a separate `<tr>` is emitted before the + table content which contains a single `<th>` with `colspan=2` so it + spans both the line number and code columns. The filename is still + within `<span class="filename">...</span>` so any existing styles + should still apply, although the CSS path may need to change. + * For an example of the new output format see + `table_cls_step_1_start_1_special_0_noanchor_filename.html` + in the `tests/html_linenos_expected_output/` directory. + * For more details and discussion see the issue + https://github.com/pygments/pygments/issues/1757 + +- Added styles: + + * Gruvbox light+dark (#1763) + + +Version 2.8.0 +------------- +(released February 14, 2021) + +- Added lexers: + + * AMDGPU (#1626) + * CDDL (#1379, #1239) + * Futhark (#1691) + * Graphviz/DOT (#1657, #731) + +- Updated lexers: + + * AutoIt: Support single quoted strings (#1667, #1663) + * C/C++ & related: Fix mishandling ``*/`` (#1695) + * Cocoa: Add builtin types (#1703) + * Console (#1672) + * Eiffel: Fix performance issues (#1658) + * Fortran: Improve combined keyword detection (#1677, #1188) + * J: Fix operator ``?`` lexing (#1700, #1149) + * JavaScript/TypeScript: Fix escapes in backtick strings (#1679, #1686) + * Kotlin: Improve string interpolation, modifier keyword handling, and various small issues (#1699) + * LESS: Support single-line comments (#1046) + * Matlab: + + - Add support for class properties (#1466) + - Update builtin functions (#1705) + - Various cleanups (#1673) + + * OpenEdge (#1696) + * Python: Improve handling of raw f-strings (#1681, #1683) + * Ruby: Better method name handling (#1531) + * Stata: Updated keywords (#1470) + +- Added styles: + + * Material (#1662) + * Zenburn (#1659) + +- The `pygmentize` script now uses `argparse`, all options should work + as before + +- Add `pygmentize -C` option to guess a lexer from content + +- With this release, Pygments moves to a new internal testing system (#1649.) + See ``Contributing.md`` for details. The main advantage of this new change + is a much better test coverage of all existing example lexers. It also makes + it much easier to add new test snippets. +- Make guessing prefer Python 3 lexer +- Do not guess MIME or SQL without reason +- Changed setuptools to use a declarative config through ``setup.cfg``. + Building Pygments now requires setuptools 39.2+. +- Add markdown to MarkdownLexer aliases (#1687) +- Change line number handling + + * In ``<table>`` based output, the ``td.linenos`` element will have either a + ``normal`` or ``special`` class attached. Previously, only ``special`` line + numbers got a class. This prevents styles from getting applied twice - + once via ``<pre>``, once via ``<span class="special">``. This also means + that ``td.linenos pre`` is no longer styled, instead, use + ``td.linenos .normal`` and ``td.linenos .special``. + * In the "inline" style, the DOM element order was changed. The line number + is added first, then the line is wrapped is wrapped by the highlighter. + This fixes lines not being fully highlighted. + * The visual output for inline and non-inline line numbers & highlighting, + as well as class-based and inline styling is now consistent. + * Line number styles are set to ``background-color: transparent`` and + ``color: inherit`` by default. This works much better with dark styles + which don't have colors set for line numbers. + +- Remove "raw" alias from RawTokenLexer, so that it cannot be + selected by alias. +- Fix RawTokenLexer to work in Python 3 and handle exceptions. +- Add prompt colors to the Solarized theme (#1529) +- Image formatter supports background colors now (#1374) +- Add support for anchors in conjunction with inline line numbers (#1591) +- Modernize the codebase using ``pyupgrade`` (#1622) +- Add support for line numbers to the ``terminal256`` formatter (#1674, #1653) +- Improve ``analyze_text`` logic for ``ECL`` (#1610) +- Improve ``analyze_text`` logic for ``CBM Basic V2`` (#1607) +- Improve LaTeX formatter (#1708, #1709) + + +Version 2.7.4 +------------- +(released January 12, 2021) + +- Updated lexers: + + - Apache configurations: Improve handling of malformed tags (#1656) + - CSS: Add support for variables (#1633, #1666) + - Crystal (#1650, #1670) + - Coq (#1648) + - Fortran: Add missing keywords (#1635, #1665) + - Ini (#1624) + - JavaScript and variants (#1647 -- missing regex flags, #1651) + - Markdown (#1623, #1617) + - Shell + + - Lex trailing whitespace as part of the prompt (#1645) + - Add missing ``in`` keyword (#1652) + + - SQL - Fix keywords (#1668) + - Typescript: Fix incorrect punctuation handling (#1510, #1511) + +- Fix infinite loop in SML lexer (#1625), `CVE-2021-20270 <https://nvd.nist.gov/vuln/detail/CVE-2021-20270>`_ +- Fix backtracking string regexes in JavaScript/TypeScript, Modula2 + and many other lexers (#1637) `CVE-2021-27291 <https://nvd.nist.gov/vuln/detail/CVE-2021-27291>`_ +- Limit recursion with nesting Ruby heredocs (#1638) +- Fix a few inefficient regexes for guessing lexers +- Fix the raw token lexer handling of Unicode (#1616) +- Revert a private API change in the HTML formatter (#1655) -- + please note that private APIs remain subject to change! +- Fix several exponential/cubic-complexity regexes found by + Ben Caller/Doyensec (#1675) +- Fix incorrect MATLAB example (#1582) + +Thanks to Google's OSS-Fuzz project for finding many of these bugs. + + +Version 2.7.3 +------------- +(released December 6, 2020) + +- Updated lexers: + + * Ada (#1581) + * HTML (#1615, #1614) + * Java (#1594, #1586) + * JavaScript (#1605, #1589, #1588) + * JSON (#1569 -- this is a complete rewrite) + * Lean (#1601) + * LLVM (#1612) + * Mason (#1592) + * MySQL (#1555, #1551) + * Rust (#1608) + * Turtle (#1590, #1553) + +- Deprecated JsonBareObjectLexer, which is now identical to JsonLexer (#1600) +- The ``ImgFormatter`` now calculates the exact character width, which fixes some issues with overlapping text (#1213, #1611) +- Documentation fixes (#1609, #1599, #1598) +- Fixed duplicated Juttle language alias (#1604, #1606) +- Added support for Kotlin scripts (#1587) +- Removed CSS rule which forced margin to 0 + + +Version 2.7.2 +------------- +(released October 24, 2020) + +- Updated lexers: + + * Latex (#1517, #1516) + * LLVM (#1565) + * SPARQL (#1559) + +- Fix Python console/traceback lexer problems with custom exceptions without messages (#1548) +- Allow loading ttc fonts on Mac/image formatter (#1223) +- Improve ``analyze_text`` across a variety of lexers (#1549) +- Remove CSS rule which forced the vertical padding to 0 for line numbers (#1583, #1579) +- Fix ``TNTLexer`` crashing on unexpected EOL (#1568, #1570) +- ``regexlint`` can be now run locally as part of ``tox`` tests (#1557) +- Fix typos (#1550, #1562) +- Add Python 3.9 as a supported version (#1554) + + +Version 2.7.1 +------------- +(released September 16, 2020) + +- Fixed a regression in the JSON lexer (#1544) + + +Version 2.7.0 +------------- +(released September 12, 2020) + +- Added lexers: + + * Arrow (#1481, #1499) + * BARE (#1488) + * Devicetree (#1434) + * F* (#1409) + * GDScript (#1457) + * Pointless (#1494) + * PromQL (#1506) + * PsySH (#1438) + * Singularity (#1285) + * TiddlyWiki5 (#1390) + * TNT (#1414) + * YANG (#1408, #1428) + +- Updated lexers: + + * APL (#1503) + * C++ (#1350, which also fixes: #1222, #996, #906, #828, #1162, #1166, + #1396) + * Chapel (#1423) + * CMake (#1491) + * CSound (#1509) + * Cython (#1507) + * Dart (#1449) + * Fennel (#1535) + * Fortran (#1442) + * GAS (#1530) + * HTTP (#1432, #1520, #1521) + * Inform 6 (#1461) + * Javascript (#1533) + * JSON (#1065, #1528) + * Lean (#1415) + * Matlab (#1399) + * Markdown (#1492, #1495) + * MySQL (#975, #1063, #1453, #1527) + * NASM (#1465) + * Nim (#1426) + * PostgreSQL (#1513) + * PowerShell (#1398, #1497) + * Protobuf (#1505) + * Robot (#1480) + * SQL (#1402) + * SystemVerilog (#1436, #1452, #1454, #1460, #1462, #1463, #1464, #1471, #1496, #1504) + * TeraTerm (#1337) + * XML (#1502) + +- Added a new filter for math symbols (#1406) +- The Kconfig lexer will match Kconfig derivative names now (#1458) +- Improved HTML formatter output (#1500) +- ``.markdown`` is now recognized as an extension for Markdown files (#1476) +- Fixed line number colors for Solarized (#1477, #1356) +- Improvements to exception handling (#1478) +- Improvements to tests (#1532, #1533, #1539) +- Various code cleanups (#1536, #1537, #1538) + + +Version 2.6.1 +------------- +(released March 8, 2020) + +- This release fixes a packaging issue. No functional changes. + + +Version 2.6 +----------- +(released March 8, 2020) + +- Running Pygments on Python 2.x is no longer supported. + (The Python 2 lexer still exists.) + +- Added lexers: + + * Linux kernel logs (#1310) + * LLVM MIR (#1361) + * MiniScript (#1397) + * Mosel (#1287, #1326) + * Parsing Expression Grammar (#1336) + * ReasonML (#1386) + * Ride (#1319, #1321) + * Sieve (#1257) + * USD (#1290) + * WebIDL (#1309) + +- Updated lexers: + + * Apache2 (#1378) + * Chapel (#1357) + * CSound (#1383) + * D (#1375, #1362) + * Haskell (#1347, #1177) + * Idris (#1360) + * Perl6/Raku lexer (#1344) + * Python3 (#1382, #1385) + * Rust: Updated lexer to cover more builtins (mostly macros) and miscellaneous + new syntax (#1320) + * SQL: Add temporal support keywords (#1402) + +- The 256-color/true-color terminal formatters now support the italic attribute + in styles (#1288) +- Support HTTP 2/3 header (#1308) +- Support missing reason in HTTP header (#1322) +- Boogie/Silver: support line continuations and triggers, move contract keywords + to separate category (#1299) +- GAS: support C-style comments (#1291) +- Fix names in S lexer (#1330, #1333) +- Fix numeric literals in Ada (#1334) +- Recognize ``.mjs`` files as Javascript (#1392) +- Recognize ``.eex`` files as Elixir (#1387) +- Fix ``re.MULTILINE`` usage (#1388) +- Recognize ``pipenv`` and ``poetry`` dependency & lock files (PR#1376) +- Improve font search on Windows (#1247) +- Remove unused script block (#1401) + + +Version 2.5.2 +------------- +(released November 29, 2019) + +- Fix incompatibility with some setuptools versions (PR#1316) + +- Fix lexing of ReST field lists (PR#1279) +- Fix lexing of Matlab keywords as field names (PR#1282) +- Recognize double-quoted strings in Matlab (PR#1278) +- Avoid slow backtracking in Vim lexer (PR#1312) +- Fix Scala highlighting of types (PR#1315) +- Highlight field lists more consistently in ReST (PR#1279) +- Fix highlighting Matlab keywords in field names (PR#1282) +- Recognize Matlab double quoted strings (PR#1278) +- Add some Terraform keywords +- Update Modelica lexer to 3.4 +- Update Crystal examples + + +Version 2.5.1 +------------- +(released November 26, 2019) + +- This release fixes a packaging issue. No functional changes. + + +Version 2.5.0 +------------- +(released November 26, 2019) + +- Added lexers: + + * Email (PR#1246) + * Erlang, Elixir shells (PR#823, #1521) + * Notmuch (PR#1264) + * `Scdoc <https://git.sr.ht/~sircmpwn/scdoc>`_ (PR#1268) + * `Solidity <https://solidity.readthedocs.io/>`_ (#1214) + * `Zeek <https://www.zeek.org>`_ (new name for Bro) (PR#1269) + * `Zig <https://ziglang.org/>`_ (PR#820) + +- Updated lexers: + + * Apache2 Configuration (PR#1251) + * Bash sessions (#1253) + * CSound (PR#1250) + * Dart + * Dockerfile + * Emacs Lisp + * Handlebars (PR#773) + * Java (#1101, #987) + * Logtalk (PR#1261) + * Matlab (PR#1271) + * Praat (PR#1277) + * Python3 (PR#1255, PR#1400) + * Ruby + * YAML (#1528) + * Velocity + +- Added styles: + + * Inkpot (PR#1276) + +- The ``PythonLexer`` class is now an alias for the former ``Python3Lexer``. + The old ``PythonLexer`` is available as ``Python2Lexer``. Same change has + been done for the ``PythonTracebackLexer``. The ``python3`` option for + the ``PythonConsoleLexer`` is now true by default. + +- Bump ``NasmLexer`` priority over ``TasmLexer`` for ``.asm`` files + (fixes #1326) +- Default font in the ``ImageFormatter`` has been updated (#928, PR#1245) +- Test suite switched to py.test, removed nose dependency (#1490) +- Reduce ``TeraTerm`` lexer score -- it used to match nearly all languages + (#1256) +- Treat ``Skylark``/``Starlark`` files as Python files (PR#1259) +- Image formatter: actually respect ``line_number_separator`` option + +- Add LICENSE file to wheel builds +- Agda: fix lambda highlighting +- Dart: support ``@`` annotations +- Dockerfile: accept ``FROM ... AS`` syntax +- Emacs Lisp: add more string functions +- GAS: accept registers in directive arguments +- Java: make structural punctuation (braces, parens, colon, comma) ``Punctuation``, not ``Operator`` (#987) +- Java: support ``var`` contextual keyword (#1101) +- Matlab: Fix recognition of ``function`` keyword (PR#1271) +- Python: recognize ``.jy`` filenames (#976) +- Python: recognize ``f`` string prefix (#1156) +- Ruby: support squiggly heredocs +- Shell sessions: recognize Virtualenv prompt (PR#1266) +- Velocity: support silent reference syntax + + +Version 2.4.2 +------------- +(released May 28, 2019) + +- Fix encoding error when guessing lexer with given ``encoding`` option + (#1438) + + +Version 2.4.1 +------------- +(released May 24, 2019) + +- Updated lexers: + + * Coq (#1430) + * MSDOS Session (PR#734) + * NASM (#1517) + * Objective-C (PR#813, #1508) + * Prolog (#1511) + * TypeScript (#1515) + +- Support CSS variables in stylesheets (PR#814, #1356) +- Fix F# lexer name (PR#709) +- Fix ``TerminalFormatter`` using bold for bright text (#1480) + + +Version 2.4.0 +------------- +(released May 8, 2019) + +- Added lexers: + + * Augeas (PR#807) + * BBC Basic (PR#806) + * Boa (PR#756) + * Charm++ CI (PR#788) + * DASM16 (PR#807) + * FloScript (PR#750) + * FreeFem++ (PR#785) + * Hspec (PR#790) + * Pony (PR#627) + * SGF (PR#780) + * Slash (PR#807) + * Slurm (PR#760) + * Tera Term Language (PR#749) + * TOML (PR#807) + * Unicon (PR#731) + * VBScript (PR#673) + +- Updated lexers: + + * Apache2 (PR#766) + * Cypher (PR#746) + * LLVM (PR#792) + * Makefiles (PR#766) + * PHP (#1482) + * Rust + * SQL (PR#672) + * Stan (PR#774) + * Stata (PR#800) + * Terraform (PR#787) + * YAML + +- Add solarized style (PR#708) +- Add support for Markdown reference-style links (PR#753) +- Add license information to generated HTML/CSS files (#1496) +- Change ANSI color names (PR#777) +- Fix catastrophic backtracking in the bash lexer (#1494) +- Fix documentation failing to build using Sphinx 2.0 (#1501) +- Fix incorrect links in the Lisp and R lexer documentation (PR#775) +- Fix rare unicode errors on Python 2.7 (PR#798, #1492) +- Fix lexers popping from an empty stack (#1506) +- TypoScript uses ``.typoscript`` now (#1498) +- Updated Trove classifiers and ``pip`` requirements (PR#799) + + + +Version 2.3.1 +------------- +(released Dec 16, 2018) + +- Updated lexers: + + * ASM (PR#784) + * Chapel (PR#735) + * Clean (PR#621) + * CSound (PR#684) + * Elm (PR#744) + * Fortran (PR#747) + * GLSL (PR#740) + * Haskell (PR#745) + * Hy (PR#754) + * Igor Pro (PR#764) + * PowerShell (PR#705) + * Python (PR#720, #1299, PR#715) + * SLexer (PR#680) + * YAML (PR#762, PR#724) + +- Fix invalid string escape sequences +- Fix `FutureWarning` introduced by regex changes in Python 3.7 + + +Version 2.3.0 +------------- +(released Nov 25, 2018) + +- Added lexers: + + * Fennel (PR#783) + * HLSL (PR#675) + +- Updated lexers: + + * Dockerfile (PR#714) + +- Minimum Python versions changed to 2.7 and 3.5 +- Added support for Python 3.7 generator changes (PR#772) +- Fix incorrect token type in SCSS for single-quote strings (#1322) +- Use `terminal256` formatter if `TERM` contains `256` (PR#666) +- Fix incorrect handling of GitHub style fences in Markdown (PR#741, #1389) +- Fix `%a` not being highlighted in Python3 strings (PR#727) + + +Version 2.2.0 +------------- +(released Jan 22, 2017) + +- Added lexers: + + * AMPL + * TypoScript (#1173) + * Varnish config (PR#554) + * Clean (PR#503) + * WDiff (PR#513) + * Flatline (PR#551) + * Silver (PR#537) + * HSAIL (PR#518) + * JSGF (PR#546) + * NCAR command language (PR#536) + * Extempore (PR#530) + * Cap'n Proto (PR#595) + * Whiley (PR#573) + * Monte (PR#592) + * Crystal (PR#576) + * Snowball (PR#589) + * CapDL (PR#579) + * NuSMV (PR#564) + * SAS, Stata (PR#593) + +- Added the ability to load lexer and formatter classes directly from files + with the `-x` command line option and the `lexers.load_lexer_from_file()` + and `formatters.load_formatter_from_file()` functions. (PR#559) + +- Added `lexers.find_lexer_class_by_name()`. (#1203) + +- Added new token types and lexing for magic methods and variables in Python + and PHP. + +- Added a new token type for string affixes and lexing for them in Python, C++ + and Postgresql lexers. + +- Added a new token type for heredoc (and similar) string delimiters and + lexing for them in C++, Perl, PHP, Postgresql and Ruby lexers. + +- Styles can now define colors with ANSI colors for use in the 256-color + terminal formatter. (PR#531) + +- Improved the CSS lexer. (#1083, #1130) + +- Added "Rainbow Dash" style. (PR#623) + +- Delay loading `pkg_resources`, which takes a long while to import. (PR#690) + + +Version 2.1.3 +------------- +(released Mar 2, 2016) + +- Fixed regression in Bash lexer (PR#563) + + +Version 2.1.2 +------------- +(released Feb 29, 2016) + +- Fixed Python 3 regression in image formatter (#1215) +- Fixed regression in Bash lexer (PR#562) + + +Version 2.1.1 +------------- +(released Feb 14, 2016) + +- Fixed Jython compatibility (#1205) +- Fixed HTML formatter output with leading empty lines (#1111) +- Added a mapping table for LaTeX encodings and added utf8 (#1152) +- Fixed image formatter font searching on Macs (#1188) +- Fixed deepcopy-ing of Token instances (#1168) +- Fixed Julia string interpolation (#1170) +- Fixed statefulness of HttpLexer between get_tokens calls +- Many smaller fixes to various lexers + + +Version 2.1 +----------- +(released Jan 17, 2016) + +- Added lexers: + + * Emacs Lisp (PR#431) + * Arduino (PR#442) + * Modula-2 with multi-dialect support (#1090) + * Fortran fixed format (PR#213) + * Archetype Definition language (PR#483) + * Terraform (PR#432) + * Jcl, Easytrieve (PR#208) + * ParaSail (PR#381) + * Boogie (PR#420) + * Turtle (PR#425) + * Fish Shell (PR#422) + * Roboconf (PR#449) + * Test Anything Protocol (PR#428) + * Shen (PR#385) + * Component Pascal (PR#437) + * SuperCollider (PR#472) + * Shell consoles (Tcsh, PowerShell, MSDOS) (PR#479) + * Elm and J (PR#452) + * Crmsh (PR#440) + * Praat (PR#492) + * CSound (PR#494) + * Ezhil (PR#443) + * Thrift (PR#469) + * QVT Operational (PR#204) + * Hexdump (PR#508) + * CAmkES Configuration (PR#462) + +- Added styles: + + * Lovelace (PR#456) + * Algol and Algol-nu (#1090) + +- Added formatters: + + * IRC (PR#458) + * True color (24-bit) terminal ANSI sequences (#1142) + (formatter alias: "16m") + +- New "filename" option for HTML formatter (PR#527). + +- Improved performance of the HTML formatter for long lines (PR#504). + +- Updated autopygmentize script (PR#445). + +- Fixed style inheritance for non-standard token types in HTML output. + +- Added support for async/await to Python 3 lexer. + +- Rewrote linenos option for TerminalFormatter (it's better, but slightly + different output than before) (#1147). + +- Javascript lexer now supports most of ES6 (#1100). + +- Cocoa builtins updated for iOS 8.1 (PR#433). + +- Combined BashSessionLexer and ShellSessionLexer, new version should support + the prompt styles of either. + +- Added option to pygmentize to show a full traceback on exceptions. + +- Fixed incomplete output on Windows and Python 3 (e.g. when using iPython + Notebook) (#1153). + +- Allowed more traceback styles in Python console lexer (PR#253). + +- Added decorators to TypeScript (PR#509). + +- Fix highlighting of certain IRC logs formats (#1076). + + +Version 2.0.2 +------------- +(released Jan 20, 2015) + +- Fix Python tracebacks getting duplicated in the console lexer (#1068). + +- Backquote-delimited identifiers are now recognized in F# (#1062). + + +Version 2.0.1 +------------- +(released Nov 10, 2014) + +- Fix an encoding issue when using ``pygmentize`` with the ``-o`` option. + + +Version 2.0 +----------- +(released Nov 9, 2014) + +- Default lexer encoding is now "guess", i.e. UTF-8 / Locale / Latin1 is + tried in that order. + +- Major update to Swift lexer (PR#410). + +- Multiple fixes to lexer guessing in conflicting cases: + + * recognize HTML5 by doctype + * recognize XML by XML declaration + * don't recognize C/C++ as SystemVerilog + +- Simplified regexes and builtin lists. + + +Version 2.0rc1 +-------------- +(released Oct 16, 2014) + +- Dropped Python 2.4 and 2.5 compatibility. This is in favor of single-source + compatibility between Python 2.6, 2.7 and 3.3+. + +- New website and documentation based on Sphinx (finally!) + +- Lexers added: + + * APL (#969) + * Agda and Literate Agda (PR#203) + * Alloy (PR#355) + * AmbientTalk + * BlitzBasic (PR#197) + * ChaiScript (PR#24) + * Chapel (PR#256) + * Cirru (PR#275) + * Clay (PR#184) + * ColdFusion CFC (PR#283) + * Cryptol and Literate Cryptol (PR#344) + * Cypher (PR#257) + * Docker config files + * EBNF (PR#193) + * Eiffel (PR#273) + * GAP (PR#311) + * Golo (PR#309) + * Handlebars (PR#186) + * Hy (PR#238) + * Idris and Literate Idris (PR#210) + * Igor Pro (PR#172) + * Inform 6/7 (PR#281) + * Intel objdump (PR#279) + * Isabelle (PR#386) + * Jasmin (PR#349) + * JSON-LD (PR#289) + * Kal (PR#233) + * Lean (PR#399) + * LSL (PR#296) + * Limbo (PR#291) + * Liquid (#977) + * MQL (PR#285) + * MaskJS (PR#280) + * Mozilla preprocessors + * Mathematica (PR#245) + * NesC (PR#166) + * Nit (PR#375) + * Nix (PR#267) + * Pan + * Pawn (PR#211) + * Perl 6 (PR#181) + * Pig (PR#304) + * Pike (PR#237) + * QBasic (PR#182) + * Red (PR#341) + * ResourceBundle (#1038) + * Rexx (PR#199) + * Rql (PR#251) + * Rsl + * SPARQL (PR#78) + * Slim (PR#366) + * Swift (PR#371) + * Swig (PR#168) + * TADS 3 (PR#407) + * Todo.txt todo lists + * Twig (PR#404) + +- Added a helper to "optimize" regular expressions that match one of many + literal words; this can save 20% and more lexing time with lexers that + highlight many keywords or builtins. + +- New styles: "xcode" and "igor", similar to the default highlighting of + the respective IDEs. + +- The command-line "pygmentize" tool now tries a little harder to find the + correct encoding for files and the terminal (#979). + +- Added "inencoding" option for lexers to override "encoding" analogous + to "outencoding" (#800). + +- Added line-by-line "streaming" mode for pygmentize with the "-s" option. + (PR#165) Only fully works for lexers that have no constructs spanning + lines! + +- Added an "envname" option to the LaTeX formatter to select a replacement + verbatim environment (PR#235). + +- Updated the Makefile lexer to yield a little more useful highlighting. + +- Lexer aliases passed to ``get_lexer_by_name()`` are now case-insensitive. + +- File name matching in lexers and formatters will now use a regex cache + for speed (PR#205). + +- Pygments will now recognize "vim" modelines when guessing the lexer for + a file based on content (PR#118). + +- Major restructure of the ``pygments.lexers`` module namespace. There are now + many more modules with less lexers per module. Old modules are still around + and re-export the lexers they previously contained. + +- The NameHighlightFilter now works with any Name.* token type (#790). + +- Python 3 lexer: add new exceptions from PEP 3151. + +- Opa lexer: add new keywords (PR#170). + +- Julia lexer: add keywords and underscore-separated number + literals (PR#176). + +- Lasso lexer: fix method highlighting, update builtins. Fix + guessing so that plain XML isn't always taken as Lasso (PR#163). + +- Objective C/C++ lexers: allow "@" prefixing any expression (#871). + +- Ruby lexer: fix lexing of Name::Space tokens (#860) and of symbols + in hashes (#873). + +- Stan lexer: update for version 2.4.0 of the language (PR#162, PR#255, PR#377). + +- JavaScript lexer: add the "yield" keyword (PR#196). + +- HTTP lexer: support for PATCH method (PR#190). + +- Koka lexer: update to newest language spec (PR#201). + +- Haxe lexer: rewrite and support for Haxe 3 (PR#174). + +- Prolog lexer: add different kinds of numeric literals (#864). + +- F# lexer: rewrite with newest spec for F# 3.0 (#842), fix a bug with + dotted chains (#948). + +- Kotlin lexer: general update (PR#271). + +- Rebol lexer: fix comment detection and analyse_text (PR#261). + +- LLVM lexer: update keywords to v3.4 (PR#258). + +- PHP lexer: add new keywords and binary literals (PR#222). + +- external/markdown-processor.py updated to newest python-markdown (PR#221). + +- CSS lexer: some highlighting order fixes (PR#231). + +- Ceylon lexer: fix parsing of nested multiline comments (#915). + +- C family lexers: fix parsing of indented preprocessor directives (#944). + +- Rust lexer: update to 0.9 language version (PR#270, PR#388). + +- Elixir lexer: update to 0.15 language version (PR#392). + +- Fix swallowing incomplete tracebacks in Python console lexer (#874). + + +Version 1.6 +----------- +(released Feb 3, 2013) + +- Lexers added: + + * Dylan console (PR#149) + * Logos (PR#150) + * Shell sessions (PR#158) + +- Fix guessed lexers not receiving lexer options (#838). + +- Fix unquoted HTML attribute lexing in Opa (#841). + +- Fixes to the Dart lexer (PR#160). + + +Version 1.6rc1 +-------------- +(released Jan 9, 2013) + +- Lexers added: + + * AspectJ (PR#90) + * AutoIt (PR#122) + * BUGS-like languages (PR#89) + * Ceylon (PR#86) + * Croc (new name for MiniD) + * CUDA (PR#75) + * Dg (PR#116) + * IDL (PR#115) + * Jags (PR#89) + * Julia (PR#61) + * Kconfig (#711) + * Lasso (PR#95, PR#113) + * LiveScript (PR#84) + * Monkey (PR#117) + * Mscgen (PR#80) + * NSIS scripts (PR#136) + * OpenCOBOL (PR#72) + * QML (PR#123) + * Puppet (PR#133) + * Racket (PR#94) + * Rdoc (PR#99) + * Robot Framework (PR#137) + * RPM spec files (PR#124) + * Rust (PR#67) + * Smali (Dalvik assembly) + * SourcePawn (PR#39) + * Stan (PR#89) + * Treetop (PR#125) + * TypeScript (PR#114) + * VGL (PR#12) + * Visual FoxPro (#762) + * Windows Registry (#819) + * Xtend (PR#68) + +- The HTML formatter now supports linking to tags using CTags files, when the + python-ctags package is installed (PR#87). + +- The HTML formatter now has a "linespans" option that wraps every line in a + <span> tag with a specific id (PR#82). + +- When deriving a lexer from another lexer with token definitions, definitions + for states not in the child lexer are now inherited. If you override a state + in the child lexer, an "inherit" keyword has been added to insert the base + state at that position (PR#141). + +- The C family lexers now inherit token definitions from a common base class, + removing code duplication (PR#141). + +- Use "colorama" on Windows for console color output (PR#142). + +- Fix Template Haskell highlighting (PR#63). + +- Fix some S/R lexer errors (PR#91). + +- Fix a bug in the Prolog lexer with names that start with 'is' (#810). + +- Rewrite Dylan lexer, add Dylan LID lexer (PR#147). + +- Add a Java quickstart document (PR#146). + +- Add a "external/autopygmentize" file that can be used as .lessfilter (#802). + + +Version 1.5 +----------- +(codename Zeitdilatation, released Mar 10, 2012) + +- Lexers added: + + * Awk (#630) + * Fancy (#633) + * PyPy Log + * eC + * Nimrod + * Nemerle (#667) + * F# (#353) + * Groovy (#501) + * PostgreSQL (#660) + * DTD + * Gosu (#634) + * Octave (PR#22) + * Standard ML (PR#14) + * CFengine3 (#601) + * Opa (PR#37) + * HTTP sessions (PR#42) + * JSON (PR#31) + * SNOBOL (PR#30) + * MoonScript (PR#43) + * ECL (PR#29) + * Urbiscript (PR#17) + * OpenEdge ABL (PR#27) + * SystemVerilog (PR#35) + * Coq (#734) + * PowerShell (#654) + * Dart (#715) + * Fantom (PR#36) + * Bro (PR#5) + * NewLISP (PR#26) + * VHDL (PR#45) + * Scilab (#740) + * Elixir (PR#57) + * Tea (PR#56) + * Kotlin (PR#58) + +- Fix Python 3 terminal highlighting with pygmentize (#691). + +- In the LaTeX formatter, escape special &, < and > chars (#648). + +- In the LaTeX formatter, fix display problems for styles with token + background colors (#670). + +- Enhancements to the Squid conf lexer (#664). + +- Several fixes to the reStructuredText lexer (#636). + +- Recognize methods in the ObjC lexer (#638). + +- Fix Lua "class" highlighting: it does not have classes (#665). + +- Fix degenerate regex in Scala lexer (#671) and highlighting bugs (#713, 708). + +- Fix number pattern order in Ocaml lexer (#647). + +- Fix generic type highlighting in ActionScript 3 (#666). + +- Fixes to the Clojure lexer (PR#9). + +- Fix degenerate regex in Nemerle lexer (#706). + +- Fix infinite looping in CoffeeScript lexer (#729). + +- Fix crashes and analysis with ObjectiveC lexer (#693, #696). + +- Add some Fortran 2003 keywords. + +- Fix Boo string regexes (#679). + +- Add "rrt" style (#727). + +- Fix infinite looping in Darcs Patch lexer. + +- Lots of misc fixes to character-eating bugs and ordering problems in many + different lexers. + + +Version 1.4 +----------- +(codename Unschärfe, released Jan 03, 2011) + +- Lexers added: + + * Factor (#520) + * PostScript (#486) + * Verilog (#491) + * BlitzMax Basic (#478) + * Ioke (#465) + * Java properties, split out of the INI lexer (#445) + * Scss (#509) + * Duel/JBST + * XQuery (#617) + * Mason (#615) + * GoodData (#609) + * SSP (#473) + * Autohotkey (#417) + * Google Protocol Buffers + * Hybris (#506) + +- Do not fail in analyse_text methods (#618). + +- Performance improvements in the HTML formatter (#523). + +- With the ``noclasses`` option in the HTML formatter, some styles + present in the stylesheet were not added as inline styles. + +- Four fixes to the Lua lexer (#480, #481, #482, #497). + +- More context-sensitive Gherkin lexer with support for more i18n translations. + +- Support new OO keywords in Matlab lexer (#521). + +- Small fix in the CoffeeScript lexer (#519). + +- A bugfix for backslashes in ocaml strings (#499). + +- Fix unicode/raw docstrings in the Python lexer (#489). + +- Allow PIL to work without PIL.pth (#502). + +- Allow seconds as a unit in CSS (#496). + +- Support ``application/javascript`` as a JavaScript mime type (#504). + +- Support `Offload <https://offload.codeplay.com/>`_ C++ Extensions as + keywords in the C++ lexer (#484). + +- Escape more characters in LaTeX output (#505). + +- Update Haml/Sass lexers to version 3 (#509). + +- Small PHP lexer string escaping fix (#515). + +- Support comments before preprocessor directives, and unsigned/ + long long literals in C/C++ (#613, #616). + +- Support line continuations in the INI lexer (#494). + +- Fix lexing of Dylan string and char literals (#628). + +- Fix class/procedure name highlighting in VB.NET lexer (#624). + + +Version 1.3.1 +------------- +(bugfix release, released Mar 05, 2010) + +- The ``pygmentize`` script was missing from the distribution. + + +Version 1.3 +----------- +(codename Schneeglöckchen, released Mar 01, 2010) + +- Added the ``ensurenl`` lexer option, which can be used to suppress the + automatic addition of a newline to the lexer input. + +- Lexers added: + + * Ada + * Coldfusion + * Modula-2 + * Haxe + * R console + * Objective-J + * Haml and Sass + * CoffeeScript + +- Enhanced reStructuredText highlighting. + +- Added support for PHP 5.3 namespaces in the PHP lexer. + +- Added a bash completion script for `pygmentize`, to the external/ + directory (#466). + +- Fixed a bug in `do_insertions()` used for multi-lexer languages. + +- Fixed a Ruby regex highlighting bug (#476). + +- Fixed regex highlighting bugs in Perl lexer (#258). + +- Add small enhancements to the C lexer (#467) and Bash lexer (#469). + +- Small fixes for the Tcl, Debian control file, Nginx config, + Smalltalk, Objective-C, Clojure, Lua lexers. + +- Gherkin lexer: Fixed single apostrophe bug and added new i18n keywords. + + +Version 1.2.2 +------------- +(bugfix release, released Jan 02, 2010) + +* Removed a backwards incompatibility in the LaTeX formatter that caused + Sphinx to produce invalid commands when writing LaTeX output (#463). + +* Fixed a forever-backtracking regex in the BashLexer (#462). + + +Version 1.2.1 +------------- +(bugfix release, released Jan 02, 2010) + +* Fixed mishandling of an ellipsis in place of the frames in a Python + console traceback, resulting in clobbered output. + + +Version 1.2 +----------- +(codename Neujahr, released Jan 01, 2010) + +- Dropped Python 2.3 compatibility. + +- Lexers added: + + * Asymptote + * Go + * Gherkin (Cucumber) + * CMake + * Ooc + * Coldfusion + * Haxe + * R console + +- Added options for rendering LaTeX in source code comments in the + LaTeX formatter (#461). + +- Updated the Logtalk lexer. + +- Added `line_number_start` option to image formatter (#456). + +- Added `hl_lines` and `hl_color` options to image formatter (#457). + +- Fixed the HtmlFormatter's handling of noclasses=True to not output any + classes (#427). + +- Added the Monokai style (#453). + +- Fixed LLVM lexer identifier syntax and added new keywords (#442). + +- Fixed the PythonTracebackLexer to handle non-traceback data in header or + trailer, and support more partial tracebacks that start on line 2 (#437). + +- Fixed the CLexer to not highlight ternary statements as labels. + +- Fixed lexing of some Ruby quoting peculiarities (#460). + +- A few ASM lexer fixes (#450). + + +Version 1.1.1 +------------- +(bugfix release, released Sep 15, 2009) + +- Fixed the BBCode lexer (#435). + +- Added support for new Jinja2 keywords. + +- Fixed test suite failures. + +- Added Gentoo-specific suffixes to Bash lexer. + + +Version 1.1 +----------- +(codename Brillouin, released Sep 11, 2009) + +- Ported Pygments to Python 3. This needed a few changes in the way + encodings are handled; they may affect corner cases when used with + Python 2 as well. + +- Lexers added: + + * Antlr/Ragel, thanks to Ana Nelson + * (Ba)sh shell + * Erlang shell + * GLSL + * Prolog + * Evoque + * Modelica + * Rebol + * MXML + * Cython + * ABAP + * ASP.net (VB/C#) + * Vala + * Newspeak + +- Fixed the LaTeX formatter's output so that output generated for one style + can be used with the style definitions of another (#384). + +- Added "anchorlinenos" and "noclobber_cssfile" (#396) options to HTML + formatter. + +- Support multiline strings in Lua lexer. + +- Rewrite of the JavaScript lexer by Pumbaa80 to better support regular + expression literals (#403). + +- When pygmentize is asked to highlight a file for which multiple lexers + match the filename, use the analyse_text guessing engine to determine the + winner (#355). + +- Fixed minor bugs in the JavaScript lexer (#383), the Matlab lexer (#378), + the Scala lexer (#392), the INI lexer (#391), the Clojure lexer (#387) + and the AS3 lexer (#389). + +- Fixed three Perl heredoc lexing bugs (#379, #400, #422). + +- Fixed a bug in the image formatter which misdetected lines (#380). + +- Fixed bugs lexing extended Ruby strings and regexes. + +- Fixed a bug when lexing git diffs. + +- Fixed a bug lexing the empty commit in the PHP lexer (#405). + +- Fixed a bug causing Python numbers to be mishighlighted as floats (#397). + +- Fixed a bug when backslashes are used in odd locations in Python (#395). + +- Fixed various bugs in Matlab and S-Plus lexers, thanks to Winston Chang (#410, + #411, #413, #414) and fmarc (#419). + +- Fixed a bug in Haskell single-line comment detection (#426). + +- Added new-style reStructuredText directive for docutils 0.5+ (#428). + + +Version 1.0 +----------- +(codename Dreiundzwanzig, released Nov 23, 2008) + +- Don't use join(splitlines()) when converting newlines to ``\n``, + because that doesn't keep all newlines at the end when the + ``stripnl`` lexer option is False. + +- Added ``-N`` option to command-line interface to get a lexer name + for a given filename. + +- Added Tango style, written by Andre Roberge for the Crunchy project. + +- Added Python3TracebackLexer and ``python3`` option to + PythonConsoleLexer. + +- Fixed a few bugs in the Haskell lexer. + +- Fixed PythonTracebackLexer to be able to recognize SyntaxError and + KeyboardInterrupt (#360). + +- Provide one formatter class per image format, so that surprises like:: + + pygmentize -f gif -o foo.gif foo.py + + creating a PNG file are avoided. + +- Actually use the `font_size` option of the image formatter. + +- Fixed numpy lexer that it doesn't listen for `*.py` any longer. + +- Fixed HTML formatter so that text options can be Unicode + strings (#371). + +- Unified Diff lexer supports the "udiff" alias now. + +- Fixed a few issues in Scala lexer (#367). + +- RubyConsoleLexer now supports simple prompt mode (#363). + +- JavascriptLexer is smarter about what constitutes a regex (#356). + +- Add Applescript lexer, thanks to Andreas Amann (#330). + +- Make the codetags more strict about matching words (#368). + +- NginxConfLexer is a little more accurate on mimetypes and + variables (#370). + + +Version 0.11.1 +-------------- +(released Aug 24, 2008) + +- Fixed a Jython compatibility issue in pygments.unistring (#358). + + +Version 0.11 +------------ +(codename Straußenei, released Aug 23, 2008) + +Many thanks go to Tim Hatch for writing or integrating most of the bug +fixes and new features. + +- Lexers added: + + * Nasm-style assembly language, thanks to delroth + * YAML, thanks to Kirill Simonov + * ActionScript 3, thanks to Pierre Bourdon + * Cheetah/Spitfire templates, thanks to Matt Good + * Lighttpd config files + * Nginx config files + * Gnuplot plotting scripts + * Clojure + * POV-Ray scene files + * Sqlite3 interactive console sessions + * Scala source files, thanks to Krzysiek Goj + +- Lexers improved: + + * C lexer highlights standard library functions now and supports C99 + types. + * Bash lexer now correctly highlights heredocs without preceding + whitespace. + * Vim lexer now highlights hex colors properly and knows a couple + more keywords. + * Irc logs lexer now handles xchat's default time format (#340) and + correctly highlights lines ending in ``>``. + * Support more delimiters for perl regular expressions (#258). + * ObjectiveC lexer now supports 2.0 features. + +- Added "Visual Studio" style. + +- Updated markdown processor to Markdown 1.7. + +- Support roman/sans/mono style defs and use them in the LaTeX + formatter. + +- The RawTokenFormatter is no longer registered to ``*.raw`` and it's + documented that tokenization with this lexer may raise exceptions. + +- New option ``hl_lines`` to HTML formatter, to highlight certain + lines. + +- New option ``prestyles`` to HTML formatter. + +- New option *-g* to pygmentize, to allow lexer guessing based on + filetext (can be slowish, so file extensions are still checked + first). + +- ``guess_lexer()`` now makes its decision much faster due to a cache + of whether data is xml-like (a check which is used in several + versions of ``analyse_text()``. Several lexers also have more + accurate ``analyse_text()`` now. + + +Version 0.10 +------------ +(codename Malzeug, released May 06, 2008) + +- Lexers added: + + * Io + * Smalltalk + * Darcs patches + * Tcl + * Matlab + * Matlab sessions + * FORTRAN + * XSLT + * tcsh + * NumPy + * Python 3 + * S, S-plus, R statistics languages + * Logtalk + +- In the LatexFormatter, the *commandprefix* option is now by default + 'PY' instead of 'C', since the latter resulted in several collisions + with other packages. Also, the special meaning of the *arg* + argument to ``get_style_defs()`` was removed. + +- Added ImageFormatter, to format code as PNG, JPG, GIF or BMP. + (Needs the Python Imaging Library.) + +- Support doc comments in the PHP lexer. + +- Handle format specifications in the Perl lexer. + +- Fix comment handling in the Batch lexer. + +- Add more file name extensions for the C++, INI and XML lexers. + +- Fixes in the IRC and MuPad lexers. + +- Fix function and interface name highlighting in the Java lexer. + +- Fix at-rule handling in the CSS lexer. + +- Handle KeyboardInterrupts gracefully in pygmentize. + +- Added BlackWhiteStyle. + +- Bash lexer now correctly highlights math, does not require + whitespace after semicolons, and correctly highlights boolean + operators. + +- Makefile lexer is now capable of handling BSD and GNU make syntax. + + +Version 0.9 +----------- +(codename Herbstzeitlose, released Oct 14, 2007) + +- Lexers added: + + * Erlang + * ActionScript + * Literate Haskell + * Common Lisp + * Various assembly languages + * Gettext catalogs + * Squid configuration + * Debian control files + * MySQL-style SQL + * MOOCode + +- Lexers improved: + + * Greatly improved the Haskell and OCaml lexers. + * Improved the Bash lexer's handling of nested constructs. + * The C# and Java lexers exhibited abysmal performance with some + input code; this should now be fixed. + * The IRC logs lexer is now able to colorize weechat logs too. + * The Lua lexer now recognizes multi-line comments. + * Fixed bugs in the D and MiniD lexer. + +- The encoding handling of the command line mode (pygmentize) was + enhanced. You shouldn't get UnicodeErrors from it anymore if you + don't give an encoding option. + +- Added a ``-P`` option to the command line mode which can be used to + give options whose values contain commas or equals signs. + +- Added 256-color terminal formatter. + +- Added an experimental SVG formatter. + +- Added the ``lineanchors`` option to the HTML formatter, thanks to + Ian Charnas for the idea. + +- Gave the line numbers table a CSS class in the HTML formatter. + +- Added a Vim 7-like style. + + +Version 0.8.1 +------------- +(released Jun 27, 2007) + +- Fixed POD highlighting in the Ruby lexer. + +- Fixed Unicode class and namespace name highlighting in the C# lexer. + +- Fixed Unicode string prefix highlighting in the Python lexer. + +- Fixed a bug in the D and MiniD lexers. + +- Fixed the included MoinMoin parser. + + +Version 0.8 +----------- +(codename Maikäfer, released May 30, 2007) + +- Lexers added: + + * Haskell, thanks to Adam Blinkinsop + * Redcode, thanks to Adam Blinkinsop + * D, thanks to Kirk McDonald + * MuPad, thanks to Christopher Creutzig + * MiniD, thanks to Jarrett Billingsley + * Vim Script, by Tim Hatch + +- The HTML formatter now has a second line-numbers mode in which it + will just integrate the numbers in the same ``<pre>`` tag as the + code. + +- The `CSharpLexer` now is Unicode-aware, which means that it has an + option that can be set so that it correctly lexes Unicode + identifiers allowed by the C# specs. + +- Added a `RaiseOnErrorTokenFilter` that raises an exception when the + lexer generates an error token, and a `VisibleWhitespaceFilter` that + converts whitespace (spaces, tabs, newlines) into visible + characters. + +- Fixed the `do_insertions()` helper function to yield correct + indices. + +- The ReST lexer now automatically highlights source code blocks in + ".. sourcecode:: language" and ".. code:: language" directive + blocks. + +- Improved the default style (thanks to Tiberius Teng). The old + default is still available as the "emacs" style (which was an alias + before). + +- The `get_style_defs` method of HTML formatters now uses the + `cssclass` option as the default selector if it was given. + +- Improved the ReST and Bash lexers a bit. + +- Fixed a few bugs in the Makefile and Bash lexers, thanks to Tim + Hatch. + +- Fixed a bug in the command line code that disallowed ``-O`` options + when using the ``-S`` option. + +- Fixed a bug in the `RawTokenFormatter`. + + +Version 0.7.1 +------------- +(released Feb 15, 2007) + +- Fixed little highlighting bugs in the Python, Java, Scheme and + Apache Config lexers. + +- Updated the included manpage. + +- Included a built version of the documentation in the source tarball. + + +Version 0.7 +----------- +(codename Faschingskrapfn, released Feb 14, 2007) + +- Added a MoinMoin parser that uses Pygments. With it, you get + Pygments highlighting in Moin Wiki pages. + +- Changed the exception raised if no suitable lexer, formatter etc. is + found in one of the `get_*_by_*` functions to a custom exception, + `pygments.util.ClassNotFound`. It is, however, a subclass of + `ValueError` in order to retain backwards compatibility. + +- Added a `-H` command line option which can be used to get the + docstring of a lexer, formatter or filter. + +- Made the handling of lexers and formatters more consistent. The + aliases and filename patterns of formatters are now attributes on + them. + +- Added an OCaml lexer, thanks to Adam Blinkinsop. + +- Made the HTML formatter more flexible, and easily subclassable in + order to make it easy to implement custom wrappers, e.g. alternate + line number markup. See the documentation. + +- Added an `outencoding` option to all formatters, making it possible + to override the `encoding` (which is used by lexers and formatters) + when using the command line interface. Also, if using the terminal + formatter and the output file is a terminal and has an encoding + attribute, use it if no encoding is given. + +- Made it possible to just drop style modules into the `styles` + subpackage of the Pygments installation. + +- Added a "state" keyword argument to the `using` helper. + +- Added a `commandprefix` option to the `LatexFormatter` which allows + to control how the command names are constructed. + +- Added quite a few new lexers, thanks to Tim Hatch: + + * Java Server Pages + * Windows batch files + * Trac Wiki markup + * Python tracebacks + * ReStructuredText + * Dylan + * and the Befunge esoteric programming language (yay!) + +- Added Mako lexers by Ben Bangert. + +- Added "fruity" style, another dark background originally vim-based + theme. + +- Added sources.list lexer by Dennis Kaarsemaker. + +- Added token stream filters, and a pygmentize option to use them. + +- Changed behavior of `in` Operator for tokens. + +- Added mimetypes for all lexers. + +- Fixed some problems lexing Python strings. + +- Fixed tickets: #167, #178, #179, #180, #185, #201. + + +Version 0.6 +----------- +(codename Zimtstern, released Dec 20, 2006) + +- Added option for the HTML formatter to write the CSS to an external + file in "full document" mode. + +- Added RTF formatter. + +- Added Bash and Apache configuration lexers (thanks to Tim Hatch). + +- Improved guessing methods for various lexers. + +- Added `@media` support to CSS lexer (thanks to Tim Hatch). + +- Added a Groff lexer (thanks to Tim Hatch). + +- License change to BSD. + +- Added lexers for the Myghty template language. + +- Added a Scheme lexer (thanks to Marek Kubica). + +- Added some functions to iterate over existing lexers, formatters and + lexers. + +- The HtmlFormatter's `get_style_defs()` can now take a list as an + argument to generate CSS with multiple prefixes. + +- Support for guessing input encoding added. + +- Encoding support added: all processing is now done with Unicode + strings, input and output are converted from and optionally to byte + strings (see the ``encoding`` option of lexers and formatters). + +- Some improvements in the C(++) lexers handling comments and line + continuations. + + +Version 0.5.1 +------------- +(released Oct 30, 2006) + +- Fixed traceback in ``pygmentize -L`` (thanks to Piotr Ozarowski). + + +Version 0.5 +----------- +(codename PyKleur, released Oct 30, 2006) + +- Initial public release. diff --git a/Contributing.md b/Contributing.md new file mode 100644 index 0000000..93da428 --- /dev/null +++ b/Contributing.md @@ -0,0 +1,167 @@ +Licensing +========= + +The code is distributed under the BSD 2-clause license. Contributors making pull +requests must agree that they are able and willing to put their contributions +under that license. + +Goals & non-goals of Pygments +============================= + +Python support +-------------- + +Pygments supports all supported Python versions as per the [Python Developer's Guide](https://devguide.python.org/versions/). Additionally, the default Python version of the latest stable version of RHEL, Ubuntu LTS, and Debian are supported, even if they're officially EOL. Supporting other end-of-life versions is a non-goal of Pygments. + +Validation +---------- + +Pygments does not attempt to validate the input. Accepting code that is not legal for a given language is acceptable if it simplifies the codebase and does not result in surprising behavior. For instance, in C89, accepting `//` based comments would be fine because de-facto all compilers supported it, and having a separate lexer for it would not be worth it. + +Contribution checklist +====================== + +* Check the documentation for how to write + [a new lexer](https://pygments.org/docs/lexerdevelopment/), + [a new formatter](https://pygments.org/docs/formatterdevelopment/) or + [a new filter](https://pygments.org/docs/filterdevelopment/) + +* Make sure to add a test for your new functionality, and where applicable, + write documentation. + +* When writing rules, try to merge simple rules. For instance, combine: + + ```python + _PUNCTUATION = [ + (r"\(", token.Punctuation), + (r"\)", token.Punctuation), + (r"\[", token.Punctuation), + (r"\]", token.Punctuation), + ("{", token.Punctuation), + ("}", token.Punctuation), + ] + ``` + + into: + + ```python + (r"[\(\)\[\]{}]", token.Punctuation) + ``` + +* Be careful with ``.*``. This matches greedily as much as it can. For instance, + a rule like ``@.*@`` will match the whole string ``@first@ second @third@``, + instead of matching ``@first@`` and ``@second@``. You can use ``@.*?@`` in + this case to stop early. The ``?`` tries to match _as few times_ as possible. + +* Beware of so-called "catastrophic backtracking". As a first example, consider + the regular expression ``(A+)*C``. This is equivalent to ``A*B`` regarding + what it matches, but *non*-matches will take very long. This is because + of the way the regular expression engine works. Suppose you feed it 50 + 'A's, and a 'C' at the end. It first matches the 'A's greedily in ``A+``, + but finds that it cannot match the end since 'B' is not the same as 'C'. + Then it backtracks, removing one 'A' from the first ``A+`` and trying to + match the rest as another ``(A+)*``. This fails again, so it backtracks + further left in the input string, etc. In effect, it tries all combinations + + ``` + (AAAAAAAAAAAAAAAAA) + (AAAAAAAAAAAAAAAA)(A) + (AAAAAAAAAAAAAAA)(AA) + (AAAAAAAAAAAAAAA)(A)(A) + (AAAAAAAAAAAAAA)(AAA) + (AAAAAAAAAAAAAA)(AA)(A) + ... + ``` + + Thus, the matching has exponential complexity. In a lexer, the + effect is that Pygments will seemingly hang when parsing invalid + input. + + ```python + >>> import re + >>> re.match('(A+)*B', 'A'*50 + 'C') # hangs + ``` + + As a more subtle and real-life example, here is a badly written + regular expression to match strings: + + ```python + r'"(\\?.)*?"' + ``` + + If the ending quote is missing, the regular expression engine will + find that it cannot match at the end, and try to backtrack with less + matches in the ``*?``. When it finds a backslash, as it has already + tried the possibility ``\\.``, it tries ``.`` (recognizing it as a + simple character without meaning), which leads to the same + exponential backtracking problem if there are lots of backslashes in + the (invalid) input string. A good way to write this would be + ``r'"([^\\]|\\.)*?"'``, where the inner group can only match in one + way. Better yet is to use a dedicated state, which not only + sidesteps the issue without headaches, but allows you to highlight + string escapes. + + ```python + 'root': [ + ..., + (r'"', String, 'string'), + ... + ], + 'string': [ + (r'\\.', String.Escape), + (r'"', String, '#pop'), + (r'[^\\"]+', String), + ] + ``` + +* When writing rules for patterns such as comments or strings, match as many + characters as possible in each token. This is an example of what not to + do: + + ```python + 'comment': [ + (r'\*/', Comment.Multiline, '#pop'), + (r'.', Comment.Multiline), + ] + ``` + + This generates one token per character in the comment, which slows + down the lexing process, and also makes the raw token output (and in + particular the test output) hard to read. Do this instead: + + ```python + 'comment': [ + (r'\*/', Comment.Multiline, '#pop'), + (r'[^*]+', Comment.Multiline), + (r'\*', Comment.Multiline), + ] + ``` + +* Don't add imports of your lexer anywhere in the codebase. (In case you're + curious about ``compiled.py`` -- this file exists for backwards compatibility + reasons.) + +* Use the standard importing convention: ``from token import Punctuation`` + +* For test cases that assert on the tokens produced by a lexer, use tools: + + * You can use the ``testcase`` formatter to produce a piece of code that + can be pasted into a unittest file: + ``python -m pygments -l lua -f testcase <<< "local a = 5"`` + + * Most snippets should instead be put as a sample file under + ``tests/snippets/<lexer_alias>/*.txt``. These files are automatically + picked up as individual tests, asserting that the input produces the + expected tokens. + + To add a new test, create a file with just your code snippet under a + subdirectory based on your lexer's main alias. Then run + ``pytest --update-goldens <filename.txt>`` to auto-populate the currently + expected tokens. Check that they look good and check in the file. + + Also run the same command whenever you need to update the test if the + actual produced tokens change (assuming the change is expected). + + * Large test files should go in ``tests/examplefiles``. This works + similar to ``snippets``, but the token output is stored in a separate + file. Output can also be regenerated with ``--update-goldens``. @@ -0,0 +1,25 @@ +Copyright (c) 2006-2022 by the respective authors (see AUTHORS file). +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..c6a8567 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,5 @@ +include Makefile CHANGES LICENSE AUTHORS +include external/* +recursive-include tests * +recursive-include doc * +recursive-include scripts * diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e674732 --- /dev/null +++ b/Makefile @@ -0,0 +1,68 @@ +# +# Makefile for Pygments +# ~~~~~~~~~~~~~~~~~~~~~ +# +# Combines scripts for common tasks. +# +# :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. +# :license: BSD, see LICENSE for details. +# + +PYTHON ?= python3 + +export PYTHONPATH = $(shell echo "$$PYTHONPATH"):$(shell python -c 'import os; print ":".join(os.path.abspath(line.strip()) for line in file("PYTHONPATH"))' 2>/dev/null) + +.PHONY: all check clean clean-pyc docs mapfiles \ + pylint reindent test test-coverage \ + tox-test tox-test-coverage regexlint + +all: clean-pyc check test + +check: + @$(PYTHON) scripts/check_crlf.py pygments build external + @$(PYTHON) scripts/detect_missing_analyse_text.py --skip-no-aliases + @pyflakes pygments | grep -v 'but unused' || true + @$(PYTHON) scripts/check_sources.py -i build -i dist -i pygments/lexers/_mapping.py \ + -i docs/build -i pygments/formatters/_mapping.py -i pygments/unistring.py \ + -i tests/support/empty.py + @$(PYTHON) scripts/count_token_references.py --minfiles=1 --maxfiles=1 \ + --minlines=1 --maxlines=3 --subtoken + +clean: clean-pyc + -rm -rf doc/_build build Pygments.egg-info + -rm -f codetags.html + +clean-pyc: + find . -name '__pycache__' -exec rm -rf {} + + +docs: + make -C doc html + +mapfiles: + $(PYTHON) scripts/gen_mapfiles.py + +pylint: + @pylint --rcfile scripts/pylintrc pygments + +reindent: + @$(PYTHON) scripts/reindent.py -r -B . + +TEST = tests + +test: + @$(PYTHON) -m pytest $(TEST) + +test-coverage: + @$(PYTHON) -m pytest --cov --cov-report=html --cov-report=term $(TEST) + +tox-test: + @tox -- $(TEST) + +tox-test-coverage: + @tox -- --with-coverage --cover-package=pygments --cover-erase $(TEST) + +RLMODULES = pygments.lexers + +regexlint: + @if [ -z "$(REGEXLINT)" ]; then echo "Please set REGEXLINT=checkout path"; exit 1; fi + PYTHONPATH=`pwd`:$(REGEXLINT) $(PYTHON) $(REGEXLINT)/regexlint/cmdline.py $(RLMODULES) diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..204e46b --- /dev/null +++ b/README.rst @@ -0,0 +1,93 @@ +Welcome to Pygments +=================== + +This is the source of Pygments. It is a **generic syntax highlighter** written +in Python that supports over 500 languages and text formats, for use in code +hosting, forums, wikis or other applications that need to prettify source code. + +Installing +---------- + +... works as usual, use ``pip install Pygments`` to get published versions, +or ``python setup.py install`` to install from a checkout. + +Documentation +------------- + +... can be found online at https://pygments.org/ or created with Sphinx by :: + + make docs + +By default, the documentation does not include the demo page, as it requires +having Docker installed for building Pyodide. To build the documentation with +the demo page, use :: + + WEBSITE_BUILD=1 make docs + +The initial build might take some time, but subsequent ones should be instant +because of Docker caching. + +To view the generated documentation, serve it using Python's ``http.server`` +module (this step is required for the demo to work) :: + + python3 -m http.server --directory doc/_build/html + + +Development +----------- + +... takes place on `GitHub <https://github.com/pygments/pygments>`_, where the +Git repository, tickets and pull requests can be viewed. + +Continuous testing runs on GitHub workflows: + +.. image:: https://github.com/pygments/pygments/workflows/Pygments/badge.svg + :target: https://github.com/pygments/pygments/actions?query=workflow%3APygments + +Contribution guidelines are found in Contributing.md_. + +.. _Contributing.md: https://github.com/pygments/pygments/blob/master/Contributing.md + +Security considerations +----------------------- + +Pygments provides no guarantees on execution time, which needs to be taken +into consideration when using Pygments to process arbitrary user inputs. For +example, if you have a web service which uses Pygments for highlighting, there +may be inputs which will cause the Pygments process to run "forever" and/or use +significant amounts of memory. This can subsequently be used to perform a +remote denial-of-service attack on the server if the processes are not +terminated quickly. + +Unfortunately, it's practically impossible to harden Pygments itself against +those issues: Some regular expressions can result in "catastrophic +backtracking", but other bugs like incorrect matchers can also +cause similar problems, and there is no way to find them in an automated fashion +(short of solving the halting problem.) Pygments has extensive unit tests, +automated randomized testing, and is also tested by `OSS-Fuzz <https://github.com/google/oss-fuzz/tree/master/projects/pygments>`_, +but we will never be able to eliminate all bugs in this area. + +Our recommendations are: + +* Ensure that the Pygments process is *terminated* after a reasonably short + timeout. In general Pygments should take seconds at most for reasonably-sized + input. +* *Limit* the number of concurrent Pygments processes to avoid oversubscription + of resources. + +The Pygments authors will treat any bug resulting in long processing times with +high priority -- it's one of those things that will be fixed in a patch release. +When reporting a bug where you suspect super-linear execution times, please make +sure to attach an input to reproduce it. + +The authors +----------- + +Pygments is maintained by **Georg Brandl**, e-mail address *georg*\ *@*\ *python.org*, **Matthäus Chajdas** and **Jean Abou-Samra**. + +Many lexers and fixes have been contributed by **Armin Ronacher**, the rest of +the `Pocoo <https://www.pocoo.org/>`_ team and **Tim Hatch**. + +The code is distributed under the BSD 2-clause license. Contributors making pull +requests must agree that they are able and willing to put their contributions +under that license. diff --git a/description.rst b/description.rst new file mode 100644 index 0000000..de34cf1 --- /dev/null +++ b/description.rst @@ -0,0 +1,18 @@ +Pygments +~~~~~~~~ + +Pygments is a syntax highlighting package written in Python. + +It is a generic syntax highlighter suitable for use in code hosting, forums, +wikis or other applications that need to prettify source code. Highlights +are: + +* a wide range of over 500 languages and other text formats is supported +* special attention is paid to details, increasing quality by a fair amount +* support for new languages and formats are added easily +* a number of output formats, presently HTML, LaTeX, RTF, SVG, all image + formats that PIL supports and ANSI sequences +* it is usable as a command-line tool and as a library + +Copyright 2006-2022 by the Pygments team, see ``AUTHORS``. +Licensed under the BSD, see ``LICENSE`` for details.
\ No newline at end of file diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..a0dcaaa --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,163 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = PYTHONPATH=.. sphinx-build +PAPER = +BUILDDIR = _build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean pyodide html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " pyodide to make Pyodide with currently checked out Pygments" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +pyodide: + $(if $(test ! -f docker), $(error "Could not find Docker. Please install that before continuing.")) + # Enable the BuildKit backend to use the --output option. + DOCKER_BUILDKIT=1 docker build --file pyodide/Dockerfile --output $(BUILDDIR)/pyodide/pyodide .. + @echo + @echo "Pyodide build finished. The Pyodide artifacts are in $(BUILDDIR)/pyodide." + +html: + $(if $(WEBSITE_BUILD), $(MAKE) pyodide) + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(if $(WEBSITE_BUILD), $(MAKE) pyodide) + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Pygments.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Pygments.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/Pygments" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Pygments" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/doc/_static/demo-worker.js b/doc/_static/demo-worker.js new file mode 100644 index 0000000..22b8b3d --- /dev/null +++ b/doc/_static/demo-worker.js @@ -0,0 +1,74 @@ +importScripts('/_static/pyodide/pyodide.js'); + +async function loadPyodideAndPygments() { + self.pyodide = await loadPyodide(); + await self.pyodide.loadPackage(["Pygments"]); + const styles = self.pyodide.runPython(` + from pygments.formatters.html import HtmlFormatter + from pygments.styles import STYLE_MAP + {s: HtmlFormatter(style=s).get_style_defs('.demo-highlight') for s in STYLE_MAP} + `).toJs(); + self.postMessage({loaded: {styles}}) +} +let pyodideReadyPromise = loadPyodideAndPygments(); + +self.onmessage = async (event) => { + // Make sure loading is done. + await pyodideReadyPromise; + if (event.data.highlight) { + self.pyodide.globals.set('code', event.data.highlight.code); + self.pyodide.globals.set('lexer_name', event.data.highlight.lexer); + + self.pyodide.runPython(` + import pygments.lexers + + lexer = pygments.lexers.get_lexer_by_name(lexer_name) + if type(code) == memoryview: + code = bytes(code) + tokens = lexer.get_tokens(code) + `); + + const formatter = event.data.highlight.formatter; + if (formatter == 'html') { + + const html = self.pyodide.runPython(` + import io + from pygments.formatters.html import HtmlFormatter + + fmter = HtmlFormatter(cssclass='demo-highlight') + buf = io.StringIO() + fmter.format(tokens, buf) + buf.getvalue() + `); + self.postMessage({html}); + } else if (formatter == 'tokens') { + const tokens = self.pyodide.runPython('list(tokens)').toJs(); + self.postMessage({tokens}); + } else { + console.warn('unknown formatter:', formatter); + } + } else if (event.data.guess_lexer) { + self.pyodide.globals.set('code', event.data.guess_lexer.code); + self.pyodide.globals.set('filename', event.data.guess_lexer.filename); + const lexer = self.pyodide.runPython(` + import sys + sys.setrecursionlimit(1000) + # TODO: remove after upgrading to Pyodide 0.19 + + import pygments.lexers + import pygments.util + + if type(code) == memoryview: + code = bytes(code) + + if filename: + lexer = pygments.lexers.guess_lexer_for_filename(filename, code) + else: + lexer = pygments.lexers.guess_lexer(code) + lexer.aliases[0] + `); + self.postMessage({lexer}); + } else { + console.warn('unknown command: expected highlight or guess_lexer but received ', event.data); + } +} diff --git a/doc/_static/demo.css b/doc/_static/demo.css new file mode 100644 index 0000000..eaa4410 --- /dev/null +++ b/doc/_static/demo.css @@ -0,0 +1,89 @@ +#try { + background-color: #f6f6f6; + border-radius: 0; + border: 1px solid #ccc; + margin-top: 15px; + margin-bottom: 10px; + padding: 10px 15px 5px 10px; + position: relative; +} + +#try h2 { + margin-top: 0; +} + +#try textarea { + border: 1px solid #999; + padding: 2px; + width: 100%; + min-height: 150px; + resize: vertical; +} + +#hlcode { + margin: 10px 0; + max-height: 500px; + overflow: auto; + border: 1px solid #ccc; +} + +#hlcode:empty { + display: none; +} + +#hlcode pre { + background-color: transparent; + border: 0; + margin: 0; +} +#hlcode table { + /* unset negative margin from pygments14.css */ + margin: unset; +} + +#code-header:not([hidden]) { + display: flex; + gap: 1em; + padding: 0 15px; +} +.flex-grow-1 { + flex-grow: 1; +} +#lexer { + margin-right: 0.5em; +} +#guessed-lexer:not(:empty):before { + content: '(guessed '; +} +#guessed-lexer:not(:empty):after { + content: ')'; +} + +#loading[hidden] { + visibility: hidden; + display: flex; +} + +#loading { + display: flex; + align-items: center; + gap: 1em; +} + +#format-settings { + display: flex; + gap: 1em; + border-top: 1px solid #ccc; + padding-top: 0.5em; + margin-top: 0.5em; +} + +.tokens code { + /* make whitespace visible */ + white-space: pre; + background: #d9d9d9; +} + +#contrast-warning { + color: darkred; +} diff --git a/doc/_static/demo.js b/doc/_static/demo.js new file mode 100644 index 0000000..b193d10 --- /dev/null +++ b/doc/_static/demo.js @@ -0,0 +1,200 @@ +const loadingDiv = document.getElementById("loading"); +const langSelect = document.getElementById("lang"); +const styleSelect = document.getElementById("style"); +const formatterSelect = document.getElementById("formatter"); +const outputDiv = document.getElementById("hlcode"); +const codeHeader = document.getElementById("code-header"); +const copyLink = document.getElementById("copylink"); +const style = document.getElementById("css-style"); +const textarea = document.getElementById("code"); +const uriTooLongMsg = document.getElementById('uri-too-long'); +const contrastWarning = document.getElementById('contrast-warning'); +const fileInput = document.getElementById("file"); +const fileInputResetButton = document.getElementById('reset-file'); + +const qvars = Object.fromEntries(new URLSearchParams(window.location.search)); +if (qvars.lexer) { + langSelect.value = qvars.lexer; +} +if (qvars.code !== undefined) { + textarea.value = qvars.code; + loadingDiv.hidden = false; +} +if (qvars.style !== undefined) { + styleSelect.value = qvars.style; + updateContrastWarning(); +} +if (qvars.formatter !== undefined) { + formatterSelect.value = qvars.formatter; +} + +styleSelect.addEventListener('change', () => { + if (!styles) + // Worker has not loaded yet. + return; + style.textContent = styles.get(styleSelect.value); + updateCopyLink(); + updateContrastWarning(); +}); + +function updateContrastWarning() { + contrastWarning.hidden = styleSelect.selectedOptions[0].dataset.wcag == 'aa'; +} + +function debounce(func, timeout) { + let timer; + return (...args) => { + clearTimeout(timer); + timer = setTimeout(() => func.apply(this, args), timeout); + }; +} + +const highlightShortDebounce = debounce(highlight, 50); +const highlightLongDebounce = debounce(highlight, 500); + +function debouncedUpdate() { + if (fileInput.files.length > 0) + return; + + if (textarea.value.length < 1000) { + highlightShortDebounce(); + } else { + highlightLongDebounce(); + } +} + +langSelect.addEventListener('change', debouncedUpdate); +textarea.addEventListener('input', debouncedUpdate); +formatterSelect.addEventListener('change', debouncedUpdate); +fileInput.addEventListener('change', () => { + fileInputResetButton.hidden = false; + highlight(); +}); +fileInputResetButton.hidden = fileInput.files.length == 0; +fileInputResetButton.addEventListener('click', () => { + fileInputResetButton.hidden = true; + fileInput.value = ''; + highlight(); +}); + +let styles; + +const highlightWorker = new Worker("/_static/demo-worker.js"); +highlightWorker.onmessage = (msg) => { + if (msg.data.loaded) { + styles = msg.data.loaded.styles; + + if (qvars.code !== undefined || textarea.value) { + loadingDiv.hidden = true; + highlight(); + } + } else if (msg.data.html) { + outputDiv.innerHTML = msg.data.html; + codeHeader.hidden = false; + loadingDiv.hidden = true; + style.textContent = styles.get(styleSelect.value); + } else if (msg.data.tokens) { + const table = document.createElement('table'); + table.className = 'tokens'; + for (const [tokenType, value] of msg.data.tokens) { + const tr = document.createElement('tr'); + const td1 = document.createElement('td'); + td1.textContent = tokenType.join('.'); + const td2 = document.createElement('td'); + const inlineCode = document.createElement('code'); + inlineCode.textContent = value; + td2.appendChild(inlineCode); + tr.appendChild(td1); + tr.appendChild(td2); + table.appendChild(tr); + } + outputDiv.innerHTML = ''; + outputDiv.appendChild(table); + + codeHeader.hidden = false; + loadingDiv.hidden = true; + } else if (msg.data.lexer) { + highlight(msg.data.lexer); + } else { + console.warn('unexpected message from highlight worker', msg); + } +}; + +function updateCopyLink() { + var url = document.location.origin + document.location.pathname + + "?" + new URLSearchParams({ + lexer: langSelect.value, + style: styleSelect.value, + formatter: formatterSelect.value, + code: textarea.value, + }).toString() + if (url.length > 8201) { + // pygments.org is hosted on GitHub pages which does not support URIs longer than 8201 + copyLink.hidden = true; + uriTooLongMsg.hidden = false; + } else { + copyLink.href = url; + copyLink.textContent = 'Copy link'; + copyLink.hidden = false; + uriTooLongMsg.hidden = true; + } +} + +async function highlight(guessedLexer) { + var lexer = langSelect.value || guessedLexer; + var file = fileInput.files[0]; + + let code; + if (file) { + code = await file.arrayBuffer(); + } else { + code = textarea.value; + } + + loadingDiv.hidden = false; + + if (!lexer) { + const guess_lexer = {code}; + if (file) + guess_lexer.filename = file.name; + highlightWorker.postMessage({guess_lexer}); + document.getElementById('loading-text').textContent = 'guessing lexer...'; + return; + } + + document.getElementById('loading-text').textContent = 'highlighting code...'; + + document.getElementById('guessed-lexer').textContent = guessedLexer; + + highlightWorker.postMessage({highlight: {code, lexer, formatter: formatterSelect.value}}); + + if (code instanceof ArrayBuffer) { + copyLink.hidden = true; + uriTooLongMsg.hidden = true; + } else { + updateCopyLink(); + } +} + +copyLink.addEventListener('click', async (e) => { + e.preventDefault(); + await navigator.clipboard.writeText(e.target.href); +}); + +function download_code() { + var filename = "highlighted.html"; + var hlcode = document.getElementById("hlcode").innerHTML + style.outerHTML; + var blob = new Blob([hlcode], {type: 'text/html'}); + if (window.navigator.msSaveOrOpenBlob) { + window.navigator.msSaveBlob(blob, filename); + } + else{ + var elem = window.document.createElement('a'); + elem.href = window.URL.createObjectURL(blob); + elem.download = filename; + document.body.appendChild(elem); + elem.click(); + document.body.removeChild(elem); + window.URL.revokeObjectURL(elem.href); + } +} diff --git a/doc/_static/favicon.ico b/doc/_static/favicon.ico Binary files differnew file mode 100644 index 0000000..777f617 --- /dev/null +++ b/doc/_static/favicon.ico diff --git a/doc/_static/github.png b/doc/_static/github.png Binary files differnew file mode 100644 index 0000000..5d146ad --- /dev/null +++ b/doc/_static/github.png diff --git a/doc/_static/logo_new.png b/doc/_static/logo_new.png Binary files differnew file mode 100644 index 0000000..0ae4b20 --- /dev/null +++ b/doc/_static/logo_new.png diff --git a/doc/_static/logo_only.png b/doc/_static/logo_only.png Binary files differnew file mode 100644 index 0000000..fdebcc4 --- /dev/null +++ b/doc/_static/logo_only.png diff --git a/doc/_static/spinner.gif b/doc/_static/spinner.gif Binary files differnew file mode 100644 index 0000000..2212db9 --- /dev/null +++ b/doc/_static/spinner.gif diff --git a/doc/_templates/demo.html b/doc/_templates/demo.html new file mode 100644 index 0000000..8e2a7c6 --- /dev/null +++ b/doc/_templates/demo.html @@ -0,0 +1,97 @@ +{% extends "layout.html" %} +{% set sidebars = sidebars + ["demo_sidebar.html"] %} + +{% block extrahead %} +{{ super() }} +<link rel="stylesheet" type="text/css" href="{{ pathto("_static/demo.css", 1) }}"> +{% endblock %} + +{% block htmltitle %}<title>Demo{{ titlesuffix }}</title>{% endblock %} + +{% block body %} +{{ body }} + +<h1>Try out Pygments!</h1> + +<noscript> + <h2>This website requires JavaScript (and WebAssembly)</h2> + + You can also try out pygments locally by running <code>pip install pygments</code>. + Then you can use <a href="{{pathto('docs/cmdline')}}">the command-line interface</a>. +</noscript> + +<div id="try"> + <p> + <label>Language + <select id="lang" autofocus> + <option value="">guess the language</option> + {% for name, info, _, _ in lexers %} + <option value="{{info.0}}">{{name}}</option> + {% endfor %} + </select> + </label> + <span id=guessed-lexer></span> + </p> + <p> + <label> + Enter some code: + <textarea id="code" rows="1" cols="60" spellcheck="false"></textarea> + </label> + </p> + <p> + <label> + Alternatively you can upload a file: + <input type="file" id="file"> + </label> + <button id="reset-file">Reset</button> + </p> + <div id="format-settings"> + <label> + Formatter + <select id=formatter> + <option value=html>HTML</option> + <option value=tokens>tokens</option> + </select> + </label> + <label>Style + <select id="style"> + <optgroup label="Good contrast"> + {% for style in styles_aa %} + <option data-wcag=aa>{{style.name}}</option> + {% endfor %} + </optgroup> + <optgroup label="Suboptimal contrast"> + {% for style in styles_sub_aa %} + <option>{{style.name}}</option> + {% endfor %} + </optgroup> + </select> + </label> + <span id=contrast-warning hidden>style may have poor contrast</span> + </div> + </form> +</div> + +<div id="loading" hidden> + <img src="{{ pathto("_static/spinner.gif", 1) }}" width="20"> + <span id="loading-text">loading Python...</span> +</div> + +<style id=css-style></style> + +<div id="hlcode"></div> + +<div id="code-header" hidden> + <div class=flex-grow-1></div> + <button onclick="download_code()">Download</button> + <a id="copylink" role="button">Copy link</a> + <span hidden id="uri-too-long">(Copy link unavailable because code too long)</span> +</div> + +<p>The highlighting here is performed in-browser using + a WebAssembly translation of the latest Pygments master branch, courtesy of + <a href="https://github.com/iodide-project/pyodide">Pyodide</a>.</p> +<p>Your content is neither sent over the web nor stored anywhere.</p> + +<script type="text/javascript" src="{{ pathto("_static/demo.js", 1) }}"></script> +{% endblock %} diff --git a/doc/_templates/demo_sidebar.html b/doc/_templates/demo_sidebar.html new file mode 100644 index 0000000..3f2a86c --- /dev/null +++ b/doc/_templates/demo_sidebar.html @@ -0,0 +1 @@ +<p><a href="#try">Back to top</a></p> diff --git a/doc/_templates/docssidebar.html b/doc/_templates/docssidebar.html new file mode 100644 index 0000000..913acaa --- /dev/null +++ b/doc/_templates/docssidebar.html @@ -0,0 +1,3 @@ +{% if pagename != 'docs/index' %} +<strong>« <a href="{{ pathto('docs/index') }}">Back to docs index</a></strong> +{% endif %} diff --git a/doc/_templates/index_with_try.html b/doc/_templates/index_with_try.html new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/doc/_templates/index_with_try.html diff --git a/doc/_templates/indexsidebar.html b/doc/_templates/indexsidebar.html new file mode 100644 index 0000000..5aa5019 --- /dev/null +++ b/doc/_templates/indexsidebar.html @@ -0,0 +1,18 @@ +<section> +<h3>Download</h3> +<p>Current version: <b>{{ version }}</b><br><a href="{{ pathto('docs/changelog') }}">Changelog</a></p> +<p>Get Pygments from the <a href="https://pypi.python.org/pypi/Pygments">Python Package + Index</a>, or install it with:</p> +<pre>pip install Pygments</pre> +</section> +<section> +<h3>Questions? Suggestions?</h3> + +<p><img src="{{ pathto("_static/github.png", 1) }}" width="24" /> + Clone at <a href="https://github.com/pygments/pygments">GitHub</a>.</p> +<p>You can also open an issue at the + <a href="https://github.com/pygments/pygments/issues">tracker</a>.</p> +</section> + +<p class="logo">A <a href="https://www.pocoo.org/"> + <img src="{{ pathto("_static/pocoo.png", 1) }}" /></a> project</a></p> diff --git a/doc/_templates/styles.html b/doc/_templates/styles.html new file mode 100644 index 0000000..137fa24 --- /dev/null +++ b/doc/_templates/styles.html @@ -0,0 +1,55 @@ +{% extends "layout.html" %} + +{% block htmltitle %}<title>Styles{{ titlesuffix }}</title>{% endblock %} + +{% block body %} +<style> +.style-gallery { + display: flex; + flex-wrap: wrap; + justify-content: space-around; +} +h2 { + margin-top: 2em; +} +.style-gallery h3 { + margin-bottom: 0.1em; +} +.style-gallery pre { + background-color: inherit; +} +</style> +{{ body }} + +<h1>Styles</h1> + +<p>Pygments comes with the following builtin styles. +For more information about styles refer to <a href="{{ pathto('docs/styles') }}">the documentation</a>. +</p> + +<div class=style-gallery> +{% for style in styles_aa %} + <div> + <h3 id="{{style.name}}">{{style.name}}</h3> + {{style.html|safe}} + </div> +{% endfor %} +</div> + +<h2>Styles with a lower contrast</h2> +<p> +The following styles do not meet the <a href="https://www.w3.org/WAI/WCAG21/Understanding/contrast-minimum.html">WCAG 2.1 AA contrast minimum</a>, +so they might be difficult to read for people with suboptimal vision. +If you want your highlighted code to be well readable for other people, you +should use one of the earlier styles instead. +</p> +<div class=style-gallery> + {% for style in styles_sub_aa %} + <div> + <h3 id="{{style.name}}">{{style.name}}</h3> + {{style.html|safe}} + </div> + {% endfor %} +</div> + +{% endblock %} diff --git a/doc/_themes/pygments14/layout.html b/doc/_themes/pygments14/layout.html new file mode 100644 index 0000000..34e86ef --- /dev/null +++ b/doc/_themes/pygments14/layout.html @@ -0,0 +1,101 @@ +{# + sphinxdoc/layout.html + ~~~~~~~~~~~~~~~~~~~~~ + + Sphinx layout template for the sphinxdoc theme. + + :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +#} +{%- extends "basic/layout.html" %} + +{# put the sidebar before the body #} +{% block sidebar1 %}{{ sidebar() }}{% endblock %} +{% block sidebar2 %}{% endblock %} + +{% block relbar1 %}{% endblock %} +{% block relbar2 %}{% endblock %} + +{% block extrahead %} + <link href='https://fonts.googleapis.com/css?family={{ theme_font|replace(' ', '+') }}:300,400,700' + rel='stylesheet' type='text/css'> +{{ super() }} +{%- if not embedded %} + <style type="text/css"> + table.right { float: right; margin-left: 20px; } + table.right td { border: 1px solid #ccc; } + {% if pagename == 'index' %} + .related { display: none; } + {% endif %} + </style> + <script type="text/javascript"> + // intelligent scrolling of the sidebar content + $(window).scroll(function() { + var sb = $('.sphinxsidebarwrapper'); + var win = $(window); + var sbh = sb.height(); + var offset = $('.sphinxsidebar').position()['top']; + var wintop = win.scrollTop(); + var winbot = wintop + win.innerHeight(); + var curtop = sb.position()['top']; + var curbot = curtop + sbh; + // does sidebar fit in window? + if (sbh < win.innerHeight()) { + // yes: easy case -- always keep at the top + sb.css('top', $u.min([$u.max([0, wintop - offset - 10]), + $(document).height() - sbh - 200])); + } else { + // no: only scroll if top/bottom edge of sidebar is at + // top/bottom edge of window + if (curtop > wintop && curbot > winbot) { + sb.css('top', $u.max([wintop - offset - 10, 0])); + } else if (curtop < wintop && curbot < winbot) { + sb.css('top', $u.min([winbot - sbh - offset - 20, + $(document).height() - sbh - 200])); + } + } + }); + </script> +{%- endif %} +{% endblock %} + +{% block header %} +<div class="outerwrapper"> +<div class="pageheader"> + <ul> + <li><a href="{{ pathto('index') }}">Home</a></li> + {% if demo_active %} + <li><a href="{{ pathto('demo') }}">Demo</a></li> + {% endif %} + <li><a href="{{ pathto('languages') }}">Languages</a></li> + <li><a href="{{ pathto('styles') }}">Styles</a></li> + <li><a href="{{ pathto('faq') }}">FAQ</a></li> + <li><a href="{{ pathto('download') }}">Get it</a></li> + <li><a href="{{ pathto('docs/index') }}">Docs</a></li> + </ul> + <div> + <a href="{{ pathto('index') }}"> + <img src="{{ pathto('_static/logo.png', 1) }}" alt="Pygments logo" /> + </a> + </div> +</div> +<div class="flexwrapper"> +{% endblock %} + +{% block footer %} + </div> {# closes "flexwrapper" div #} + <div class="footer" role="contentinfo"> + © Copyright 2006-2022, Georg Brandl and Pygments contributors. + Created using <a href="https://sphinx-doc.org/">Sphinx</a> {{ + sphinx_version }}. <br/> + Pygments logo created by <a href="https://joelunger.com">Joel Unger</a>. + Backgrounds from <a href="https://subtlepatterns.com">subtlepatterns.com</a>. + </div> + </div> {# closes "outerwrapper" div #} +{% endblock %} + +{% block sidebarrel %} +{% endblock %} + +{% block sidebarsourcelink %} +{% endblock %} diff --git a/doc/_themes/pygments14/localtoc.html b/doc/_themes/pygments14/localtoc.html new file mode 100644 index 0000000..c0e2de0 --- /dev/null +++ b/doc/_themes/pygments14/localtoc.html @@ -0,0 +1,17 @@ +{# + basic/localtoc.html + ~~~~~~~~~~~~~~~~~~~ + + Sphinx sidebar template: local table of contents. + + This file can be removed once https://github.com/sphinx-doc/sphinx/pull/9815 has landed. + + :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +#} +{%- if display_toc %} + <div> + <h3><a href="{{ pathto(root_doc)|e }}">{{ _('Table of Contents') }}</a></h3> + {{ toc }} + </div> +{%- endif %} diff --git a/doc/_themes/pygments14/relations.html b/doc/_themes/pygments14/relations.html new file mode 100644 index 0000000..372894d --- /dev/null +++ b/doc/_themes/pygments14/relations.html @@ -0,0 +1,25 @@ +{# + basic/relations.html + ~~~~~~~~~~~~~~~~~~~~ + + Sphinx sidebar template: relation links. + + This file can be removed once https://github.com/sphinx-doc/sphinx/pull/9815 has landed. + + :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +#} +{%- if prev %} +<div> + <h4>{{ _('Previous topic') }}</h4> + <p class="topless"><a href="{{ prev.link|e }}" + title="{{ _('previous chapter') }}">{{ prev.title }}</a></p> +</div> +{%- endif %} +{%- if next %} +<div> + <h4>{{ _('Next topic') }}</h4> + <p class="topless"><a href="{{ next.link|e }}" + title="{{ _('next chapter') }}">{{ next.title }}</a></p> +</div> +{%- endif %} diff --git a/doc/_themes/pygments14/static/bodybg.png b/doc/_themes/pygments14/static/bodybg.png Binary files differnew file mode 100644 index 0000000..46892b8 --- /dev/null +++ b/doc/_themes/pygments14/static/bodybg.png diff --git a/doc/_themes/pygments14/static/docbg.png b/doc/_themes/pygments14/static/docbg.png Binary files differnew file mode 100644 index 0000000..13e61f3 --- /dev/null +++ b/doc/_themes/pygments14/static/docbg.png diff --git a/doc/_themes/pygments14/static/listitem.png b/doc/_themes/pygments14/static/listitem.png Binary files differnew file mode 100644 index 0000000..e45715f --- /dev/null +++ b/doc/_themes/pygments14/static/listitem.png diff --git a/doc/_themes/pygments14/static/logo.png b/doc/_themes/pygments14/static/logo.png Binary files differnew file mode 100644 index 0000000..2c1a24d --- /dev/null +++ b/doc/_themes/pygments14/static/logo.png diff --git a/doc/_themes/pygments14/static/pocoo.png b/doc/_themes/pygments14/static/pocoo.png Binary files differnew file mode 100644 index 0000000..4174149 --- /dev/null +++ b/doc/_themes/pygments14/static/pocoo.png diff --git a/doc/_themes/pygments14/static/pygments14.css_t b/doc/_themes/pygments14/static/pygments14.css_t new file mode 100644 index 0000000..4355074 --- /dev/null +++ b/doc/_themes/pygments14/static/pygments14.css_t @@ -0,0 +1,422 @@ +/* + * pygments14.css + * ~~~~~~~~~~~~~~ + * + * Sphinx stylesheet -- pygments14 theme. Heavily copied from sphinx13. + * + * :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: {{ theme_font }}, 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; + font-size: 14px; + text-align: center; + background-image: url(bodybg.png); + background-color: {{ theme_background }}; + color: black; + padding: 0; + /* + border-right: 1px solid {{ theme_border }}; + border-left: 1px solid {{ theme_border }}; + */ + + margin: 0 auto; + max-width: 1080px; +} + +.outerwrapper { + background-image: url(docbg.png); + background-attachment: fixed; +} + +.pageheader { + text-align: left; + padding: 10px 15px; +} + +.pageheader ul { + float: right; + color: white; + list-style-type: none; + padding-left: 0; + margin-top: 40px; + margin-right: 10px; +} + +.pageheader li { + float: left; + margin: 0 0 0 10px; +} + +.pageheader li a { + border-radius: 3px; + padding: 8px 12px; + color: {{ theme_darkgray }}; + text-shadow: 0 0 5px rgba(0, 0, 0, 0.2); +} + +.pageheader li a:hover { + background-color: {{ theme_yellow }}; + color: black; + text-shadow: none; +} + +div.document { + width: 700px; + flex-grow: 100; + text-align: left; + /*border-left: 1em solid {{ theme_lightyellow }};*/ + min-width: 500px; +} + +@media screen and (max-width: 550px) { + div.document { + min-width: inherit; + } +} + +div.bodywrapper { + background-color: white; +/* border-right: 1px solid {{ theme_border }}; */ +} + +.flexwrapper { + display: flex; + gap: 15px; + flex-wrap: wrap; + padding-right: 12px; +} + +div.body { + margin: 0; + padding: 0.5em 20px 20px 20px; + width: 100%; + box-sizing: border-box; +} + +div.related { + font-size: 1em; + color: {{ theme_darkgray }}; +} + +div.related ul { + background-image: url(relbg.png); + background-repeat: repeat-y; + background-color: {{ theme_yellow }}; + height: 1.9em; + /* + border-top: 1px solid {{ theme_border }}; + border-bottom: 1px solid {{ theme_border }}; + */ +} + +div.related ul li { + margin: 0 5px 0 0; + padding: 0; + float: left; +} + +div.related ul li.right { + float: right; + margin-right: 5px; +} + +div.related ul li a { + margin: 0; + padding: 0 5px 0 5px; + line-height: 1.75em; + color: {{ theme_darkgray }}; + /*text-shadow: 0px 0px 1px rgba(0, 0, 0, 0.5);*/ +} + +div.related ul li a:hover { + text-decoration: underline; + text-shadow: 0px 0px 1px rgba(255, 255, 255, 0.5); +} + +div.sphinxsidebar { + margin: 0; + padding: 0 0px 15px 15px; + width: 210px; + float: none; + font-size: 1em; + text-align: left; + flex-grow: 1; +} + +.sphinxsidebarwrapper > * { + flex: 1 1 0px; + min-width: 200px; +} + +div.sphinxsidebar .logo { + font-size: 1.8em; + color: #666; + font-weight: 300; + text-align: center; +} + +div.sphinxsidebar .logo img { + vertical-align: middle; +} + +div.sphinxsidebar input { + border: 1px solid #aaa; + font-family: {{ theme_font }}, 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; + font-size: 1em; +} + +div.sphinxsidebar h3 { + font-size: 1.5em; + /* border-top: 1px solid {{ theme_border }}; */ + margin-top: 0; + margin-bottom: 0.5em; + padding-top: 0.5em; +} + +div.sphinxsidebar h4 { + font-size: 1.2em; + margin-bottom: 0; +} + +div.sphinxsidebar h3, div.sphinxsidebar h4 { + margin-left: -15px; + padding-right: 14px; + padding-left: 14px; + color: #333; + font-weight: 300; + /*text-shadow: 0px 0px 0.5px rgba(0, 0, 0, 0.4);*/ +} + +div.sphinxsidebarwrapper { + padding: 0; + display: flex; + flex-wrap: wrap; + gap: 15px; +} + +div.sphinxsidebarwrapper > h3:first-child { + margin-top: 0.5em; + border: none; +} + +div.sphinxsidebar h3 a { + color: #333; +} + +div.sphinxsidebar ul { + color: #444; + margin-top: 7px; + padding: 0; + line-height: 130%; +} + +div.sphinxsidebar ul ul { + margin-left: 20px; + list-style-image: url(listitem.png); +} + +div.footer { + color: {{ theme_darkgray }}; + text-shadow: 0 0 .2px rgba(255, 255, 255, 0.8); + padding: 2em; + text-align: center; + clear: both; + font-size: 0.8em; +} + +/* -- body styles ----------------------------------------------------------- */ + +p { + margin: 0.8em 0 0.5em 0; +} + +a { + color: {{ theme_darkgreen }}; + text-decoration: none; +} + +a:hover { + color: {{ theme_darkyellow }}; +} + +div.body a { + text-decoration: underline; +} + +h1 { + margin: 10px 0 0 0; + font-size: 2.4em; + color: {{ theme_darkgray }}; + font-weight: 300; +} + +h2 { + margin: 1.em 0 0.2em 0; + font-size: 1.5em; + font-weight: 300; + padding: 0; + color: {{ theme_darkgreen }}; +} + +h3 { + margin: 1em 0 -0.3em 0; + font-size: 1.3em; + font-weight: 300; +} + +div.body h1 a, div.body h2 a, div.body h3 a, div.body h4 a, div.body h5 a, div.body h6 a { + text-decoration: none; +} + +div.body h1 a tt, div.body h2 a tt, div.body h3 a tt, div.body h4 a tt, div.body h5 a tt, div.body h6 a tt { + color: {{ theme_darkgreen }} !important; + font-size: inherit !important; +} + +a.headerlink { + color: {{ theme_green }} !important; + font-size: 12px; + margin-left: 6px; + padding: 0 4px 0 4px; + text-decoration: none !important; + float: right; +} + +a.headerlink:hover { + background-color: #ccc; + color: white!important; +} + +cite, code, tt { + font-family: 'Consolas', 'DejaVu Sans Mono', + 'Bitstream Vera Sans Mono', monospace; + font-size: 14px; + letter-spacing: -0.02em; +} + +tt { + background-color: #f2f2f2; + border: 1px solid #ddd; + border-radius: 2px; + color: #333; + padding: 1px; +} + +tt.descname, tt.descclassname, tt.xref { + border: 0; +} + +hr { + border: 1px solid #abc; + margin: 2em; +} + +a tt { + border: 0; + color: {{ theme_darkgreen }}; +} + +a tt:hover { + color: {{ theme_darkyellow }}; +} + +pre { + font-family: 'Consolas', 'DejaVu Sans Mono', + 'Bitstream Vera Sans Mono', monospace; + font-size: 13px; + letter-spacing: 0.015em; + line-height: 120%; + padding: 0.5em; + border: 1px solid #ccc; + border-radius: 2px; + background-color: #f8f8f8; +} + +pre a { + color: inherit; + text-decoration: underline; +} + +td.linenos pre { + padding: 0.5em 0; +} + +div.quotebar { + background-color: #f8f8f8; + max-width: 250px; + float: right; + padding: 0px 7px; + border: 1px solid #ccc; + margin-left: 1em; +} + +div.topic { + background-color: #f8f8f8; +} + +table { + border-collapse: collapse; + margin: 0 -0.5em 0 -0.5em; +} + +table td, table th { + padding: 0.2em 0.5em 0.2em 0.5em; +} + +div.admonition, div.warning { + font-size: 0.9em; + margin: 1em 0 1em 0; + border: 1px solid #86989B; + border-radius: 2px; + background-color: #f7f7f7; + padding: 0; + padding-bottom: 0.5rem; +} + +div.admonition p, div.warning p { + margin: 0.5em 1em 0.5em 1em; + padding: 0; +} + +div.admonition pre, div.warning pre { + margin: 0.4em 1em 0.4em 1em; +} + +div.admonition p.admonition-title, +div.warning p.admonition-title { + font-weight: bold; +} + +div.warning { + border: 1px solid #940000; +/* background-color: #FFCCCF;*/ +} + +div.warning p.admonition-title { +} + +div.admonition ul, div.admonition ol, +div.warning ul, div.warning ol { + margin: 0.1em 0.5em 0.5em 3em; + padding: 0; +} + +.viewcode-back { + font-family: {{ theme_font }}, 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; +} + +div.viewcode-block:target { + background-color: #f4debf; + border-top: 1px solid #ac9; + border-bottom: 1px solid #ac9; +} diff --git a/doc/_themes/pygments14/theme.conf b/doc/_themes/pygments14/theme.conf new file mode 100644 index 0000000..8d2988f --- /dev/null +++ b/doc/_themes/pygments14/theme.conf @@ -0,0 +1,17 @@ +[theme] +inherit = basic +stylesheet = pygments14.css +pygments_style = friendly + +[options] +body_min_width = inherit +body_max_width = inherit +green = #66b55e +darkgreen = #36852e +darkgray = #666666 +border = #66b55e +yellow = #f4cd00 +darkyellow = #d4ad00 +lightyellow = #fffbe3 +background = #f9f9f9 +font = PT Sans diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 0000000..f42c355 --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,291 @@ +# +# Pygments documentation build configuration file +# + +import re, sys, os, itertools + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.insert(0, os.path.abspath('..')) + +import pygments +import pygments.formatters +import pygments.lexers +import pygments.styles +import tests.contrast.test_contrasts as test_contrasts + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'pygments.sphinxext'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'Pygments' +copyright = '2006-2022, Georg Brandl and Pygments contributors' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = pygments.__version__ +# The full version, including alpha/beta/rc tags. +release = version + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +#pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'pygments14' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +html_theme_path = ['_themes'] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +html_title = 'Pygments' + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +html_favicon = '_static/favicon.ico' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +html_sidebars = {'index': ['indexsidebar.html', 'searchbox.html']} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +html_additional_pages = { + 'styles': 'styles.html', + } + +if os.environ.get('WEBSITE_BUILD'): + html_additional_pages['demo'] = 'demo.html' + html_static_path.append('_build/pyodide') + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Pygments' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('docs/index', 'Pygments.tex', 'Pygments Documentation', + 'Pygments authors', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('docs/index', 'pygments', 'Pygments Documentation', + ['Pygments authors'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# Example configuration for intersphinx: refer to the Python standard library. +#intersphinx_mapping = {'http://docs.python.org/': None} + +rst_prolog = '.. |language_count| replace:: {}'.format(len(list(pygments.lexers.get_all_lexers()))) + +def pg_context(app, pagename, templatename, ctx, event_arg): + ctx['demo_active'] = bool(os.environ.get('WEBSITE_BUILD')) + + if pagename == 'demo': + ctx['lexers'] = sorted(pygments.lexers.get_all_lexers(plugins=False), key=lambda x: x[0].lower()) + + if pagename in ('styles', 'demo'): + with open('examples/example.py') as f: + html = f.read() + lexer = pygments.lexers.get_lexer_for_filename('example.py') + min_contrasts = test_contrasts.min_contrasts() + ctx['styles_aa'] = [] + ctx['styles_sub_aa'] = [] + # Use STYLE_MAP directly so we don't get plugins as with get_all_styles(). + for style in pygments.styles.STYLE_MAP: + if not pygments.styles.get_style_by_name(style).web_style_gallery_exclude: + aa = min_contrasts[style] >= test_contrasts.WCAG_AA_CONTRAST + bg_r, bg_g, bg_b = test_contrasts.hex2rgb(pygments.styles.get_style_by_name(style).background_color) + ctx['styles_aa' if aa else 'styles_sub_aa'].append( + dict( + name=style, + html=pygments.highlight( + html, + lexer, + pygments.formatters.HtmlFormatter(noclasses=True, style=style), + ), + # from https://en.wikipedia.org/wiki/Relative_luminance + bg_luminance=(0.2126*bg_r + 0.7152*bg_g + 0.0722*bg_b) + ) + ) + + # sort styles according to their background luminance (light styles first) + # if styles have the same background luminance sort them by their name + sortkey = lambda s: (-s['bg_luminance'], s['name']) + # the default style is always displayed first + default_style = ctx['styles_aa'].pop(0) + ctx['styles_aa'].sort(key=sortkey) + ctx['styles_aa'].insert(0, default_style) + ctx['styles_sub_aa'].sort(key=sortkey) + + +def source_read(app, docname, source): + # linkify issue / PR numbers in changelog + if docname == 'docs/changelog': + with open('../CHANGES') as f: + changelog = f.read() + + idx = changelog.find('\nVersion 2.4.2\n') + + def linkify(match): + url = 'https://github.com/pygments/pygments/issues/' + match[1] + return '`{} <{}>`_'.format(match[0], url) + + linkified = re.sub(r'(?:PR)?#([0-9]+)\b', linkify, changelog[:idx]) + source[0] = linkified + changelog[idx:] + + +def setup(app): + app.connect('html-page-context', pg_context) + app.connect('source-read', source_read) diff --git a/doc/docs/api.rst b/doc/docs/api.rst new file mode 100644 index 0000000..4d330bf --- /dev/null +++ b/doc/docs/api.rst @@ -0,0 +1,360 @@ +.. -*- mode: rst -*- + +===================== +The full Pygments API +===================== + +This page describes the Pygments API. + +High-level API +============== + +.. module:: pygments + +Functions from the :mod:`pygments` module: + +.. function:: lex(code, lexer) + + Lex `code` with the `lexer` (must be a `Lexer` instance) + and return an iterable of tokens. Currently, this only calls + `lexer.get_tokens()`. + +.. function:: format(tokens, formatter, outfile=None) + + Format a token stream (iterable of tokens) `tokens` with the + `formatter` (must be a `Formatter` instance). The result is + written to `outfile`, or if that is ``None``, returned as a + string. + +.. function:: highlight(code, lexer, formatter, outfile=None) + + This is the most high-level highlighting function. + It combines `lex` and `format` in one function. + + +.. module:: pygments.lexers + +Functions from :mod:`pygments.lexers`: + +.. function:: get_lexer_by_name(alias, **options) + + Return an instance of a `Lexer` subclass that has `alias` in its + aliases list. The lexer is given the `options` at its + instantiation. + + Will raise :exc:`pygments.util.ClassNotFound` if no lexer with that alias is + found. + +.. function:: get_lexer_for_filename(fn, **options) + + Return a `Lexer` subclass instance that has a filename pattern + matching `fn`. The lexer is given the `options` at its + instantiation. + + Will raise :exc:`pygments.util.ClassNotFound` if no lexer for that filename + is found. + +.. function:: get_lexer_for_mimetype(mime, **options) + + Return a `Lexer` subclass instance that has `mime` in its mimetype + list. The lexer is given the `options` at its instantiation. + + Will raise :exc:`pygments.util.ClassNotFound` if not lexer for that mimetype + is found. + +.. function:: load_lexer_from_file(filename, lexername="CustomLexer", **options) + + Return a `Lexer` subclass instance loaded from the provided file, relative + to the current directory. The file is expected to contain a Lexer class + named `lexername` (by default, CustomLexer). Users should be very careful with + the input, because this method is equivalent to running eval on the input file. + The lexer is given the `options` at its instantiation. + + :exc:`ClassNotFound` is raised if there are any errors loading the Lexer + + .. versionadded:: 2.2 + +.. function:: guess_lexer(text, **options) + + Return a `Lexer` subclass instance that's guessed from the text in + `text`. For that, the :meth:`.analyse_text()` method of every known lexer + class is called with the text as argument, and the lexer which returned the + highest value will be instantiated and returned. + + :exc:`pygments.util.ClassNotFound` is raised if no lexer thinks it can + handle the content. + +.. function:: guess_lexer_for_filename(filename, text, **options) + + As :func:`guess_lexer()`, but only lexers which have a pattern in `filenames` + or `alias_filenames` that matches `filename` are taken into consideration. + + :exc:`pygments.util.ClassNotFound` is raised if no lexer thinks it can + handle the content. + +.. function:: get_all_lexers() + + Return an iterable over all registered lexers, yielding tuples in the + format:: + + (longname, tuple of aliases, tuple of filename patterns, tuple of mimetypes) + + .. versionadded:: 0.6 + +.. function:: find_lexer_class_by_name(alias) + + Return the `Lexer` subclass that has `alias` in its aliases list, without + instantiating it. + + Will raise :exc:`pygments.util.ClassNotFound` if no lexer with that alias is + found. + + .. versionadded:: 2.2 + +.. function:: find_lexer_class(name) + + Return the `Lexer` subclass that with the *name* attribute as given by + the *name* argument. + + +.. module:: pygments.formatters + +Functions from :mod:`pygments.formatters`: + +.. function:: get_formatter_by_name(alias, **options) + + Return an instance of a :class:`.Formatter` subclass that has `alias` in its + aliases list. The formatter is given the `options` at its instantiation. + + Will raise :exc:`pygments.util.ClassNotFound` if no formatter with that + alias is found. + +.. function:: get_formatter_for_filename(fn, **options) + + Return a :class:`.Formatter` subclass instance that has a filename pattern + matching `fn`. The formatter is given the `options` at its instantiation. + + Will raise :exc:`pygments.util.ClassNotFound` if no formatter for that filename + is found. + +.. function:: load_formatter_from_file(filename, formattername="CustomFormatter", **options) + + Return a `Formatter` subclass instance loaded from the provided file, relative + to the current directory. The file is expected to contain a Formatter class + named ``formattername`` (by default, CustomFormatter). Users should be very + careful with the input, because this method is equivalent to running eval + on the input file. The formatter is given the `options` at its instantiation. + + :exc:`ClassNotFound` is raised if there are any errors loading the Formatter + + .. versionadded:: 2.2 + +.. module:: pygments.styles + +Functions from :mod:`pygments.styles`: + +.. function:: get_style_by_name(name) + + Return a style class by its short name. The names of the builtin styles + are listed in :data:`pygments.styles.STYLE_MAP`. + + Will raise :exc:`pygments.util.ClassNotFound` if no style of that name is + found. + +.. function:: get_all_styles() + + Return an iterable over all registered styles, yielding their names. + + .. versionadded:: 0.6 + + +.. module:: pygments.lexer + +Lexers +====== + +The base lexer class from which all lexers are derived is: + +.. class:: Lexer(**options) + + The constructor takes a \*\*keywords dictionary of options. + Every subclass must first process its own options and then call + the `Lexer` constructor, since it processes the `stripnl`, + `stripall` and `tabsize` options. + + An example looks like this: + + .. sourcecode:: python + + def __init__(self, **options): + self.compress = options.get('compress', '') + Lexer.__init__(self, **options) + + As these options must all be specifiable as strings (due to the + command line usage), there are various utility functions + available to help with that, see `Option processing`_. + + .. method:: get_tokens(text) + + This method is the basic interface of a lexer. It is called by + the `highlight()` function. It must process the text and return an + iterable of ``(tokentype, value)`` pairs from `text`. + + Normally, you don't need to override this method. The default + implementation processes the `stripnl`, `stripall` and `tabsize` + options and then yields all tokens from `get_tokens_unprocessed()`, + with the ``index`` dropped. + + .. method:: get_tokens_unprocessed(text) + + This method should process the text and return an iterable of + ``(index, tokentype, value)`` tuples where ``index`` is the starting + position of the token within the input text. + + This method must be overridden by subclasses. + + .. staticmethod:: analyse_text(text) + + A static method which is called for lexer guessing. It should analyse + the text and return a float in the range from ``0.0`` to ``1.0``. + If it returns ``0.0``, the lexer will not be selected as the most + probable one, if it returns ``1.0``, it will be selected immediately. + + .. note:: You don't have to add ``@staticmethod`` to the definition of + this method, this will be taken care of by the Lexer's metaclass. + + For a list of known tokens have a look at the :doc:`tokens` page. + + A lexer also can have the following attributes (in fact, they are mandatory + except `alias_filenames`) that are used by the builtin lookup mechanism. + + .. attribute:: name + + Full name for the lexer, in human-readable form. + + .. attribute:: aliases + + A list of short, unique identifiers that can be used to lookup + the lexer from a list, e.g. using `get_lexer_by_name()`. + + .. attribute:: filenames + + A list of `fnmatch` patterns that match filenames which contain + content for this lexer. The patterns in this list should be unique among + all lexers. + + .. attribute:: alias_filenames + + A list of `fnmatch` patterns that match filenames which may or may not + contain content for this lexer. This list is used by the + :func:`.guess_lexer_for_filename()` function, to determine which lexers + are then included in guessing the correct one. That means that + e.g. every lexer for HTML and a template language should include + ``\*.html`` in this list. + + .. attribute:: mimetypes + + A list of MIME types for content that can be lexed with this + lexer. + + +.. module:: pygments.formatter + +Formatters +========== + +A formatter is derived from this class: + + +.. class:: Formatter(**options) + + As with lexers, this constructor processes options and then must call the + base class :meth:`__init__`. + + The :class:`Formatter` class recognizes the options `style`, `full` and + `title`. It is up to the formatter class whether it uses them. + + .. method:: get_style_defs(arg='') + + This method must return statements or declarations suitable to define + the current style for subsequent highlighted text (e.g. CSS classes + in the `HTMLFormatter`). + + The optional argument `arg` can be used to modify the generation and + is formatter dependent (it is standardized because it can be given on + the command line). + + This method is called by the ``-S`` :doc:`command-line option <cmdline>`, + the `arg` is then given by the ``-a`` option. + + .. method:: format(tokensource, outfile) + + This method must format the tokens from the `tokensource` iterable and + write the formatted version to the file object `outfile`. + + Formatter options can control how exactly the tokens are converted. + + .. versionadded:: 0.7 + A formatter must have the following attributes that are used by the + builtin lookup mechanism. + + .. attribute:: name + + Full name for the formatter, in human-readable form. + + .. attribute:: aliases + + A list of short, unique identifiers that can be used to lookup + the formatter from a list, e.g. using :func:`.get_formatter_by_name()`. + + .. attribute:: filenames + + A list of :mod:`fnmatch` patterns that match filenames for which this + formatter can produce output. The patterns in this list should be unique + among all formatters. + + +.. module:: pygments.util + +Option processing +================= + +The :mod:`pygments.util` module has some utility functions usable for processing +command line options. All of the following functions get values from a +dictionary of options. If the value is already in the type expected by the +option, it is returned as-is. Otherwise, if the value is a string, it is first +converted to the expected type if possible. + +.. exception:: OptionError + + This exception will be raised by all option processing functions if + the type or value of the argument is not correct. + +.. function:: get_bool_opt(options, optname, default=None) + + Intuitively, this is `options.get(optname, default)`, but restricted to + Boolean value. The Booleans can be represented as string, in order to accept + Boolean value from the command line arguments. If the key `optname` is + present in the dictionary `options` and is not associated with a Boolean, + raise an `OptionError`. If it is absent, `default` is returned instead. + + The valid string values for ``True`` are ``1``, ``yes``, ``true`` and + ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off`` + (matched case-insensitively). + +.. function:: get_int_opt(options, optname, default=None) + + As :func:`get_bool_opt`, but interpret the value as an integer. + +.. function:: get_list_opt(options, optname, default=None) + + If the key `optname` from the dictionary `options` is a string, + split it at whitespace and return it. If it is already a list + or a tuple, it is returned as a list. + +.. function:: get_choice_opt(options, optname, allowed, default=None) + + If the key `optname` from the dictionary is not in the sequence + `allowed`, raise an error, otherwise return it. + + .. versionadded:: 0.8 diff --git a/doc/docs/authors.rst b/doc/docs/authors.rst new file mode 100644 index 0000000..f8373f0 --- /dev/null +++ b/doc/docs/authors.rst @@ -0,0 +1,4 @@ +Full contributor list +===================== + +.. include:: ../../AUTHORS diff --git a/doc/docs/changelog.rst b/doc/docs/changelog.rst new file mode 100644 index 0000000..f264cab --- /dev/null +++ b/doc/docs/changelog.rst @@ -0,0 +1 @@ +.. include:: ../../CHANGES diff --git a/doc/docs/cmdline.rst b/doc/docs/cmdline.rst new file mode 100644 index 0000000..b07b3e4 --- /dev/null +++ b/doc/docs/cmdline.rst @@ -0,0 +1,218 @@ +.. -*- mode: rst -*- + +====================== +Command Line Interface +====================== + +You can use Pygments from the shell, provided you installed the +:program:`pygmentize` script:: + + $ pygmentize test.py + print "Hello World" + +will print the file test.py to standard output, using the Python lexer +(inferred from the file name extension) and the terminal formatter (because +you didn't give an explicit formatter name). +:program:`pygmentize` attempts to +detect the maximum number of colors that the terminal supports. The difference +between color formatters for 16 and 256 colors is immense, but there is a less +noticeable difference between color formatters for 256 and 16 million colors. + +Here's the process of how it detects the maxiumum number of colors +supported by your terminal. If the ``COLORTERM`` environment variable is set to +either ``truecolor`` or ``24bit``, it will use a 16 million color representation +(like ``terminal16m``). Next, it will try to find ``256`` is anywhere in the +environment variable ``TERM``, which it will use a 256-color representaion +(such as ``terminal256``). When neither of those are found, it falls back to a +the 16 color representation (like ``terminal``). + +If you want HTML output:: + + $ pygmentize -f html -l python -o test.html test.py + +As you can see, the -l option explicitly selects a lexer. As seen above, if you +give an input file name and it has an extension that Pygments recognizes, you can +omit this option. + +The ``-o`` option gives an output file name. If it is not given, output is +written to stdout. + +The ``-f`` option selects a formatter (as with ``-l``, it can also be omitted +if an output file name is given and has a supported extension). +If no output file name is given and ``-f`` is omitted, the +:class:`.TerminalFormatter` is used. + +The above command could therefore also be given as:: + + $ pygmentize -o test.html test.py + +To create a full HTML document, including line numbers and stylesheet (using the +"emacs" style), highlighting the Python file ``test.py`` to ``test.html``:: + + $ pygmentize -O full,style=emacs,linenos=1 -o test.html test.py + + +Options and filters +------------------- + +Lexer and formatter options can be given using the ``-O`` option:: + + $ pygmentize -f html -O style=colorful,linenos=1 -l python test.py + +Be sure to enclose the option string in quotes if it contains any special shell +characters, such as spaces or expansion wildcards like ``*``. If an option +expects a list value, separate the list entries with spaces (you'll have to +quote the option value in this case too, so that the shell doesn't split it). + +Since the ``-O`` option argument is split at commas and expects the split values +to be of the form ``name=value``, you can't give an option value that contains +commas or equals signs. Therefore, an option ``-P`` is provided (as of Pygments +0.9) that works like ``-O`` but can only pass one option per ``-P``. Its value +can then contain all characters:: + + $ pygmentize -P "heading=Pygments, the Python highlighter" ... + +Filters are added to the token stream using the ``-F`` option:: + + $ pygmentize -f html -l pascal -F keywordcase:case=upper main.pas + +As you see, options for the filter are given after a colon. As for ``-O``, the +filter name and options must be one shell word, so there may not be any spaces +around the colon. + + +Generating styles +----------------- + +Formatters normally don't output full style information. For example, the HTML +formatter by default only outputs ``<span>`` tags with ``class`` attributes. +Therefore, there's a special ``-S`` option for generating style definitions. +Usage is as follows:: + + $ pygmentize -f html -S colorful -a .syntax + +generates a CSS style sheet (because you selected the HTML formatter) for +the "colorful" style prepending a ".syntax" selector to all style rules. + +For an explanation what ``-a`` means for :doc:`a particular formatter +<formatters>`, look for the `arg` argument for the formatter's +:meth:`.get_style_defs()` method. + + +Getting lexer names +------------------- + +.. versionadded:: 1.0 + +The ``-N`` option guesses a lexer name for a given filename, so that :: + + $ pygmentize -N setup.py + +will print out ``python``. It won't highlight anything yet. If no specific +lexer is known for that filename, ``text`` is printed. + +Additionally, there is the ``-C`` option, which is just like like ``-N``, except +that it prints out a lexer name based solely on a given content from standard +input. + + +Guessing the lexer from the file contents +----------------------------------------- + +The ``-g`` option will try to guess the correct lexer from the file contents, +or pass through as plain text if nothing can be guessed. This option also looks +for Vim modelines in the text, and for *some* languages, shebangs. Usage is as +follows:: + + $ pygmentize -g setup.py + +Note though, that this option is not very relaiable, and probably should be +used only if Pygments is not able to guess the correct lexer from the file's +extension. + + +Highlighting stdin until EOF +---------------------------- + +The ``-s`` option processes lines one at a time until EOF, rather than waiting +to process the entire file. This only works for stdin, only for lexers with no +line-spanning constructs, and is intended for streaming input such as you get +from `tail -f`. Usage is as follows:: + + $ tail -f sql.log | pygmentize -s -l sql + + +Custom Lexers and Formatters +---------------------------- + +.. versionadded:: 2.2 + +The ``-x`` flag enables custom lexers and formatters to be loaded +from files relative to the current directory. Create a file with a class named +CustomLexer or CustomFormatter, then specify it on the command line:: + + $ pygmentize -l your_lexer.py -f your_formatter.py -x + +You can also specify the name of your class with a colon:: + + $ pygmentize -l your_lexer.py:SomeLexer -x + +For more information, see :doc:`the Pygments documentation on Lexer development +<lexerdevelopment>`. + + +Getting help +------------ + +The ``-L`` option lists lexers, formatters, along with their short +names and supported file name extensions, styles and filters. If you want to see +only one category, give it as an argument:: + + $ pygmentize -L filters + +will list only all installed filters. + +.. versionadded:: 2.11 + +The ``--json`` option can be used in conjunction with the ``-L`` option to +output it's contents as JSON. Thus, to print all the installed styles and their +description in JSON, use the command:: + + $ pygmentize -L styles --json + +The ``-H`` option will give you detailed information (the same that can be found +in this documentation) about a lexer, formatter or filter. Usage is as follows:: + + $ pygmentize -H formatter html + +will print the help for the HTML formatter, while :: + + $ pygmentize -H lexer python + +will print the help for the Python lexer, etc. + + +A note on encodings +------------------- + +.. versionadded:: 0.9 + +Pygments tries to be smart regarding encodings in the formatting process: + +* If you give an ``encoding`` option, it will be used as the input and + output encoding. + +* If you give an ``outencoding`` option, it will override ``encoding`` + as the output encoding. + +* If you give an ``inencoding`` option, it will override ``encoding`` + as the input encoding. + +* If you don't give an encoding and have given an output file, the default + encoding for lexer and formatter is the terminal encoding or the default + locale encoding of the system. As a last resort, ``latin1`` is used (which + will pass through all non-ASCII characters). + +* If you don't give an encoding and haven't given an output file (that means + output is written to the console), the default encoding for lexer and + formatter is the terminal encoding (``sys.stdout.encoding``). diff --git a/doc/docs/filterdevelopment.rst b/doc/docs/filterdevelopment.rst new file mode 100644 index 0000000..004919e --- /dev/null +++ b/doc/docs/filterdevelopment.rst @@ -0,0 +1,75 @@ +.. -*- mode: rst -*- + +===================== +Write your own filter +===================== + +.. versionadded:: 0.7 + +Writing own filters is very easy. All you have to do is to subclass +the `Filter` class and override the `filter` method. Additionally a +filter is instantiated with some keyword arguments you can use to +adjust the behavior of your filter. + + +Subclassing Filters +=================== + +As an example, we write a filter that converts all `Name.Function` tokens +to normal `Name` tokens to make the output less colorful. + +.. sourcecode:: python + + from pygments.util import get_bool_opt + from pygments.token import Name + from pygments.filter import Filter + + class UncolorFilter(Filter): + + def __init__(self, **options): + Filter.__init__(self, **options) + self.class_too = get_bool_opt(options, 'classtoo') + + def filter(self, lexer, stream): + for ttype, value in stream: + if ttype is Name.Function or (self.class_too and + ttype is Name.Class): + ttype = Name + yield ttype, value + +Some notes on the `lexer` argument: that can be quite confusing since it doesn't +need to be a lexer instance. If a filter was added by using the `add_filter()` +function of lexers, that lexer is registered for the filter. In that case +`lexer` will refer to the lexer that has registered the filter. It *can* be used +to access options passed to a lexer. Because it could be `None` you always have +to check for that case if you access it. + + +Using a decorator +================= + +You can also use the `simplefilter` decorator from the `pygments.filter` module: + +.. sourcecode:: python + + from pygments.util import get_bool_opt + from pygments.token import Name + from pygments.filter import simplefilter + + + @simplefilter + def uncolor(self, lexer, stream, options): + class_too = get_bool_opt(options, 'classtoo') + for ttype, value in stream: + if ttype is Name.Function or (class_too and + ttype is Name.Class): + ttype = Name + yield ttype, value + + +You can instantiate this filter by calling `uncolor(classtoo=True)`, the same +way that you would have instantiated the previous filter by calling +`UncolorFilter(classtoo=True)`. Indeed, The decorator automatically ensures that +`uncolor` is a class which subclasses an internal filter class. The class +`uncolo` uses the decorated function as a method for filtering. (That's why +there is a `self` argument that you probably won't end up using in the method.) diff --git a/doc/docs/filters.rst b/doc/docs/filters.rst new file mode 100644 index 0000000..5cdcb4c --- /dev/null +++ b/doc/docs/filters.rst @@ -0,0 +1,48 @@ +.. -*- mode: rst -*- + +======= +Filters +======= + +.. versionadded:: 0.7 + +Transforming a stream of tokens into another stream is called "filtering" and is +done by filters. The most common example of filters transform each token by +applying a simple rules such as highlighting the token if it is a TODO or +another special word, or converting keywords to uppercase to enforce a style +guide. More complex filters can transform the stream of tokens, such as removing +the line indentation or merging tokens together. It should be noted that pygments +filters are entirely unrelated to Python's `filter +<https://docs.python.org/3/library/functions.html#filter>`_. + +An arbitrary number of filters can be applied to token streams coming from +lexers to improve or annotate the output. To apply a filter, you can use the +`add_filter()` method of a lexer: + +.. sourcecode:: pycon + + >>> from pygments.lexers import PythonLexer + >>> l = PythonLexer() + >>> # add a filter given by a string and options + >>> l.add_filter('codetagify', case='lower') + >>> l.filters + [<pygments.filters.CodeTagFilter object at 0xb785decc>] + >>> from pygments.filters import KeywordCaseFilter + >>> # or give an instance + >>> l.add_filter(KeywordCaseFilter(case='lower')) + +The `add_filter()` method takes keyword arguments which are forwarded to +the constructor of the filter. + +To get a list of all registered filters by name, you can use the +`get_all_filters()` function from the `pygments.filters` module that returns an +iterable for all known filters. + +If you want to write your own filter, have a look at :doc:`Write your own filter +<filterdevelopment>`. + + +Builtin Filters +=============== + +.. pygmentsdoc:: filters diff --git a/doc/docs/formatterdevelopment.rst b/doc/docs/formatterdevelopment.rst new file mode 100644 index 0000000..2bfac05 --- /dev/null +++ b/doc/docs/formatterdevelopment.rst @@ -0,0 +1,169 @@ +.. -*- mode: rst -*- + +======================== +Write your own formatter +======================== + +As well as creating :doc:`your own lexer <lexerdevelopment>`, writing a new +formatter for Pygments is easy and straightforward. + +A formatter is a class that is initialized with some keyword arguments (the +formatter options) and that must provides a `format()` method. +Additionally a formatter should provide a `get_style_defs()` method that +returns the style definitions from the style in a form usable for the +formatter's output format. + + +Quickstart +========== + +The most basic formatter shipped with Pygments is the `NullFormatter`. It just +sends the value of a token to the output stream: + +.. sourcecode:: python + + from pygments.formatter import Formatter + + class NullFormatter(Formatter): + def format(self, tokensource, outfile): + for ttype, value in tokensource: + outfile.write(value) + +As you can see, the `format()` method is passed two parameters: `tokensource` +and `outfile`. The first is an iterable of ``(token_type, value)`` tuples, +the latter a file like object with a `write()` method. + +Because the formatter is that basic it doesn't overwrite the `get_style_defs()` +method. + + +Styles +====== + +Styles aren't instantiated but their metaclass provides some class functions +so that you can access the style definitions easily. + +Styles are iterable and yield tuples in the form ``(ttype, d)`` where `ttype` +is a token and `d` is a dict with the following keys: + +``'color'`` + Hexadecimal color value (eg: ``'ff0000'`` for red) or `None` if not + defined. + +``'bold'`` + `True` if the value should be bold + +``'italic'`` + `True` if the value should be italic + +``'underline'`` + `True` if the value should be underlined + +``'bgcolor'`` + Hexadecimal color value for the background (eg: ``'eeeeeee'`` for light + gray) or `None` if not defined. + +``'border'`` + Hexadecimal color value for the border (eg: ``'0000aa'`` for a dark + blue) or `None` for no border. + +Additional keys might appear in the future, formatters should ignore all keys +they don't support. + + +HTML 3.2 Formatter +================== + +For an more complex example, let's implement a HTML 3.2 Formatter. We don't +use CSS but inline markup (``<u>``, ``<font>``, etc). Because this isn't good +style this formatter isn't in the standard library ;-) + +.. sourcecode:: python + + from pygments.formatter import Formatter + + class OldHtmlFormatter(Formatter): + + def __init__(self, **options): + Formatter.__init__(self, **options) + + # create a dict of (start, end) tuples that wrap the + # value of a token so that we can use it in the format + # method later + self.styles = {} + + # we iterate over the `_styles` attribute of a style item + # that contains the parsed style values. + for token, style in self.style: + start = end = '' + # a style item is a tuple in the following form: + # colors are readily specified in hex: 'RRGGBB' + if style['color']: + start += '<font color="#%s">' % style['color'] + end = '</font>' + end + if style['bold']: + start += '<b>' + end = '</b>' + end + if style['italic']: + start += '<i>' + end = '</i>' + end + if style['underline']: + start += '<u>' + end = '</u>' + end + self.styles[token] = (start, end) + + def format(self, tokensource, outfile): + # lastval is a string we use for caching + # because it's possible that an lexer yields a number + # of consecutive tokens with the same token type. + # to minimize the size of the generated html markup we + # try to join the values of same-type tokens here + lastval = '' + lasttype = None + + # wrap the whole output with <pre> + outfile.write('<pre>') + + for ttype, value in tokensource: + # if the token type doesn't exist in the stylemap + # we try it with the parent of the token type + # eg: parent of Token.Literal.String.Double is + # Token.Literal.String + while ttype not in self.styles: + ttype = ttype.parent + if ttype == lasttype: + # the current token type is the same of the last + # iteration. cache it + lastval += value + else: + # not the same token as last iteration, but we + # have some data in the buffer. wrap it with the + # defined style and write it to the output file + if lastval: + stylebegin, styleend = self.styles[lasttype] + outfile.write(stylebegin + lastval + styleend) + # set lastval/lasttype to current values + lastval = value + lasttype = ttype + + # if something is left in the buffer, write it to the + # output file, then close the opened <pre> tag + if lastval: + stylebegin, styleend = self.styles[lasttype] + outfile.write(stylebegin + lastval + styleend) + outfile.write('</pre>\n') + +The comments should explain it. Again, this formatter doesn't override the +`get_style_defs()` method. If we would have used CSS classes instead of +inline HTML markup, we would need to generate the CSS first. For that +purpose the `get_style_defs()` method exists: + + +Generating Style Definitions +============================ + +Some formatters like the `LatexFormatter` and the `HtmlFormatter` don't +output inline markup but reference either macros or css classes. Because +the definitions of those are not part of the output, the `get_style_defs()` +method exists. It is passed one parameter (if it's used and how it's used +is up to the formatter) and has to return a string or ``None``. diff --git a/doc/docs/formatters.rst b/doc/docs/formatters.rst new file mode 100644 index 0000000..9e7074e --- /dev/null +++ b/doc/docs/formatters.rst @@ -0,0 +1,48 @@ +.. -*- mode: rst -*- + +==================== +Available formatters +==================== + +This page lists all builtin formatters. + +Common options +============== + +All formatters support these options: + +`encoding` + If given, must be an encoding name (such as ``"utf-8"``). This will + be used to convert the token strings (which are Unicode strings) + to byte strings in the output (default: ``None``). + It will also be written in an encoding declaration suitable for the + document format if the `full` option is given (e.g. a ``meta + content-type`` directive in HTML or an invocation of the `inputenc` + package in LaTeX). + + If this is ``""`` or ``None``, Unicode strings will be written + to the output file, which most file-like objects do not support. + For example, `pygments.highlight()` will return a Unicode string if + called with no `outfile` argument and a formatter that has `encoding` + set to ``None`` because it uses a `StringIO.StringIO` object that + supports Unicode arguments to `write()`. Using a regular file object + wouldn't work. + + .. versionadded:: 0.6 + +`outencoding` + When using Pygments from the command line, any `encoding` option given is + passed to the lexer and the formatter. This is sometimes not desirable, + for example if you want to set the input encoding to ``"guess"``. + Therefore, `outencoding` has been introduced which overrides `encoding` + for the formatter if given. + + .. versionadded:: 0.7 + + +Formatter classes +================= + +All these classes are importable from :mod:`pygments.formatters`. + +.. pygmentsdoc:: formatters diff --git a/doc/docs/index.rst b/doc/docs/index.rst new file mode 100644 index 0000000..d35fe6f --- /dev/null +++ b/doc/docs/index.rst @@ -0,0 +1,64 @@ +Pygments documentation +====================== + +**Starting with Pygments** + +.. toctree:: + :maxdepth: 1 + + ../download + quickstart + cmdline + +**Builtin components** + +.. toctree:: + :maxdepth: 1 + + lexers + filters + formatters + styles + +**Reference** + +.. toctree:: + :maxdepth: 1 + + unicode + tokens + api + terminal-sessions + +**Hacking for Pygments** + +.. toctree:: + :maxdepth: 1 + + lexerdevelopment + formatterdevelopment + filterdevelopment + styledevelopment + plugins + +**Hints and tricks** + +.. toctree:: + :maxdepth: 1 + + rstdirective + moinmoin + java + integrate + +**About Pygments** + +.. toctree:: + :maxdepth: 1 + + changelog + authors + security + +If you find bugs or have suggestions for the documentation, please submit them +on `GitHub <https://github.com/pygments/pygments>`_. diff --git a/doc/docs/integrate.rst b/doc/docs/integrate.rst new file mode 100644 index 0000000..2a030b7 --- /dev/null +++ b/doc/docs/integrate.rst @@ -0,0 +1,40 @@ +.. -*- mode: rst -*- + +=================================== +Using Pygments in various scenarios +=================================== + +Markdown +-------- + +Since Pygments 0.9, the distribution ships Markdown_ preprocessor sample code +that uses Pygments to render source code in +:file:`external/markdown-processor.py`. You can copy and adapt it to your +liking. + +.. _Markdown: https://pypi.org/project/Markdown/ + +TextMate +-------- + +Antonio Cangiano has created a Pygments bundle for TextMate that allows to +colorize code via a simple menu option. It can be found here_. + +.. _here: https://programmingzen.com/pygments-textmate-bundle/ + +Bash completion +--------------- + +The source distribution contains a file ``external/pygments.bashcomp`` that +sets up completion for the ``pygmentize`` command in bash. + +Wrappers for other languages +---------------------------- + +These libraries provide Pygments highlighting for users of other languages +than Python: + +* `pygments.rb <https://github.com/pygments/pygments.rb>`_, a pygments wrapper for Ruby +* `Clygments <https://github.com/bfontaine/clygments>`_, a pygments wrapper for + Clojure +* `PHPygments <https://github.com/capynet/PHPygments>`_, a pygments wrapper for PHP diff --git a/doc/docs/java.rst b/doc/docs/java.rst new file mode 100644 index 0000000..a8a5beb --- /dev/null +++ b/doc/docs/java.rst @@ -0,0 +1,70 @@ +===================== +Use Pygments in Java +===================== + +Thanks to `Jython <https://www.jython.org/>`_ it is possible to use Pygments in +Java. + +This page is a simple tutorial to get an idea of how this works. You can +then look at the `Jython documentation <https://jython.readthedocs.io/en/latest/>`_ for more +advanced uses. + +Since version 1.5, Pygments is deployed on `Maven Central +<https://repo1.maven.org/maven2/org/pygments/pygments/>`_ as a JAR, as is Jython +which makes it a lot easier to create a Java project. + +Here is an example of a `Maven <https://maven.apache.org/>`_ ``pom.xml`` file for a +project running Pygments: + +.. sourcecode:: xml + + <?xml version="1.0" encoding="UTF-8"?> + + <project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 + http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> + <groupId>example</groupId> + <artifactId>example</artifactId> + <version>1.0-SNAPSHOT</version> + <dependencies> + <dependency> + <groupId>org.python</groupId> + <artifactId>jython-standalone</artifactId> + <version>2.5.3</version> + </dependency> + <dependency> + <groupId>org.pygments</groupId> + <artifactId>pygments</artifactId> + <version>1.5</version> + <scope>runtime</scope> + </dependency> + </dependencies> + </project> + +The following Java example: + +.. sourcecode:: java + + PythonInterpreter interpreter = new PythonInterpreter(); + + // Set a variable with the content you want to work with + interpreter.set("code", code); + + // Simple use Pygments as you would in Python + interpreter.exec("from pygments import highlight\n" + + "from pygments.lexers import PythonLexer\n" + + "from pygments.formatters import HtmlFormatter\n" + + "\nresult = highlight(code, PythonLexer(), HtmlFormatter())"); + + // Get the result that has been set in a variable + System.out.println(interpreter.get("result", String.class)); + +will print something like: + +.. sourcecode:: html + + <div class="highlight"> + <pre><span class="k">print</span> <span class="s">"Hello World"</span></pre> + </div> diff --git a/doc/docs/lexerdevelopment.rst b/doc/docs/lexerdevelopment.rst new file mode 100644 index 0000000..354b1d4 --- /dev/null +++ b/doc/docs/lexerdevelopment.rst @@ -0,0 +1,748 @@ +.. -*- mode: rst -*- + +.. highlight:: python + +==================== +Write your own lexer +==================== + +If a lexer for your favorite language is missing in the Pygments package, you +can easily write your own and extend Pygments. + +All you need can be found inside the :mod:`pygments.lexer` module. As you can +read in the :doc:`API documentation <api>`, a lexer is a class that is +initialized with some keyword arguments (the lexer options) and that provides a +:meth:`.get_tokens_unprocessed()` method which is given a string or unicode +object with the data to lex. + +The :meth:`.get_tokens_unprocessed()` method must return an iterator or iterable +containing tuples in the form ``(index, token, value)``. Normally you don't +need to do this since there are base lexers that do most of the work and that +you can subclass. + +RegexLexer +========== + +The lexer base class used by almost all of Pygments' lexers is the +:class:`RegexLexer`. This class allows you to define lexing rules in terms of +*regular expressions* for different *states*. + +States are groups of regular expressions that are matched against the input +string at the *current position*. If one of these expressions matches, a +corresponding action is performed (such as yielding a token with a specific +type, or changing state), the current position is set to where the last match +ended and the matching process continues with the first regex of the current +state. + +Lexer states are kept on a stack: each time a new state is entered, the new +state is pushed onto the stack. The most basic lexers (like the `DiffLexer`) +just need one state. + +Each state is defined as a list of tuples in the form (`regex`, `action`, +`new_state`) where the last item is optional. In the most basic form, `action` +is a token type (like `Name.Builtin`). That means: When `regex` matches, emit a +token with the match text and type `tokentype` and push `new_state` on the state +stack. If the new state is ``'#pop'``, the topmost state is popped from the +stack instead. To pop more than one state, use ``'#pop:2'`` and so on. +``'#push'`` is a synonym for pushing a second time the current state on top of +the stack. + +The following example shows the `DiffLexer` from the builtin lexers. Note that +it contains some additional attributes `name`, `aliases` and `filenames` which +aren't required for a lexer. They are used by the builtin lexer lookup +functions. :: + + from pygments.lexer import RegexLexer + from pygments.token import * + + class DiffLexer(RegexLexer): + name = 'Diff' + aliases = ['diff'] + filenames = ['*.diff'] + + tokens = { + 'root': [ + (r' .*\n', Text), + (r'\+.*\n', Generic.Inserted), + (r'-.*\n', Generic.Deleted), + (r'@.*\n', Generic.Subheading), + (r'Index.*\n', Generic.Heading), + (r'=.*\n', Generic.Heading), + (r'.*\n', Text), + ] + } + +As you can see this lexer only uses one state. When the lexer starts scanning +the text, it first checks if the current character is a space. If this is true +it scans everything until newline and returns the data as a `Text` token (which +is the "no special highlighting" token). + +If this rule doesn't match, it checks if the current char is a plus sign. And +so on. + +If no rule matches at the current position, the current char is emitted as an +`Error` token that indicates a lexing error, and the position is increased by +one. + + +Adding and testing a new lexer +============================== + +The easiest way to use a new lexer is to use Pygments' support for loading +the lexer from a file relative to your current directory. + +First, change the name of your lexer class to CustomLexer: + +.. code-block:: python + + from pygments.lexer import RegexLexer + from pygments.token import * + + class CustomLexer(RegexLexer): + """All your lexer code goes here!""" + +Then you can load and test the lexer from the command line with the additional +flag ``-x``: + +.. code-block:: console + + $ python -m pygments -x -l your_lexer_file.py <inputfile> + +To specify a class name other than CustomLexer, append it with a colon: + +.. code-block:: console + + $ python -m pygments -x -l your_lexer.py:SomeLexer <inputfile> + +Or, using the Python API: + +.. code-block:: python + + # For a lexer named CustomLexer + your_lexer = load_lexer_from_file(filename, **options) + + # For a lexer named MyNewLexer + your_named_lexer = load_lexer_from_file(filename, "MyNewLexer", **options) + +When loading custom lexers and formatters, be extremely careful to use only +trusted files; Pygments will perform the equivalent of ``eval`` on them. + +If you only want to use your lexer with the Pygments API, you can import and +instantiate the lexer yourself, then pass it to :func:`pygments.highlight`. + +Use the ``-f`` flag to select a different output format than terminal +escape sequences. The :class:`pygments.formatters.html.HtmlFormatter` helps +you with debugging your lexer. You can use the ``debug_token_types`` option +to display the token types assigned to each part of your input file: + +.. code-block:: console + + $ python -m pygments -x -f html -Ofull,debug_token_types -l your_lexer.py:SomeLexer <inputfile> + +Hover over each token to see the token type displayed as a tooltip. + +To prepare your new lexer for inclusion in the Pygments distribution, so that it +will be found when passing filenames or lexer aliases from the command line, you +have to perform the following steps. + +First, change to the current directory containing the Pygments source code. You +will need to have either an unpacked source tarball, or (preferably) a copy +cloned from GitHub. + +.. code-block:: console + + $ cd pygments + +Select a matching module under ``pygments/lexers``, or create a new module for +your lexer class. + +.. note:: + + We encourage you to put your lexer class into its own module, unless it's a + very small derivative of an already existing lexer. + +Next, make sure the lexer is known from outside of the module. All modules in +the ``pygments.lexers`` package specify ``__all__``. For example, +``esoteric.py`` sets:: + + __all__ = ['BrainfuckLexer', 'BefungeLexer', ...] + +Add the name of your lexer class to this list (or create the list if your lexer +is the only class in the module). + +Finally the lexer can be made publicly known by rebuilding the lexer mapping. +In the root directory of the source (where the ``Makefile`` is located), run: + +.. code-block:: console + + $ make mapfiles + +To test the new lexer, store an example file in +``tests/examplefiles/<alias>``. For example, to test your +``DiffLexer``, add a ``tests/examplefiles/diff/example.diff`` containing a +sample diff output. To (re)generate the lexer output which the file is checked +against, use the command ``pytest tests/examplefiles/diff --update-goldens``. + +Now you can use ``python -m pygments`` from the current root of the checkout to +render your example to HTML: + +.. code-block:: console + + $ python -m pygments -O full -f html -o /tmp/example.html tests/examplefiles/diff/example.diff + +Note that this explicitly calls the ``pygments`` module in the current +directory. This ensures your modifications are used. Otherwise a possibly +already installed, unmodified version without your new lexer would have been +called from the system search path (``$PATH``). + +To view the result, open ``/tmp/example.html`` in your browser. + +Once the example renders as expected, you should run the complete test suite: + +.. code-block:: console + + $ make test + +It also tests that your lexer fulfills the lexer API and certain invariants, +such as that the concatenation of all token text is the same as the input text. + + +Regex Flags +=========== + +You can either define regex flags locally in the regex (``r'(?x)foo bar'``) or +globally by adding a `flags` attribute to your lexer class. If no attribute is +defined, it defaults to `re.MULTILINE`. For more information about regular +expression flags see the page about `regular expressions`_ in the Python +documentation. + +.. _regular expressions: https://docs.python.org/library/re.html#regular-expression-syntax + + +Scanning multiple tokens at once +================================ + +So far, the `action` element in the rule tuple of regex, action and state has +been a single token type. Now we look at the first of several other possible +values. + +Here is a more complex lexer that highlights INI files. INI files consist of +sections, comments and ``key = value`` pairs:: + + from pygments.lexer import RegexLexer, bygroups + from pygments.token import * + + class IniLexer(RegexLexer): + name = 'INI' + aliases = ['ini', 'cfg'] + filenames = ['*.ini', '*.cfg'] + + tokens = { + 'root': [ + (r'\s+', Text), + (r';.*?$', Comment), + (r'\[.*?\]$', Keyword), + (r'(.*?)(\s*)(=)(\s*)(.*?)$', + bygroups(Name.Attribute, Text, Operator, Text, String)) + ] + } + +The lexer first looks for whitespace, comments and section names. Later it +looks for a line that looks like a key, value pair, separated by an ``'='`` +sign, and optional whitespace. + +The `bygroups` helper yields each capturing group in the regex with a different +token type. First the `Name.Attribute` token, then a `Text` token for the +optional whitespace, after that a `Operator` token for the equals sign. Then a +`Text` token for the whitespace again. The rest of the line is returned as +`String`. + +Note that for this to work, every part of the match must be inside a capturing +group (a ``(...)``), and there must not be any nested capturing groups. If you +nevertheless need a group, use a non-capturing group defined using this syntax: +``(?:some|words|here)`` (note the ``?:`` after the beginning parenthesis). + +If you find yourself needing a capturing group inside the regex which shouldn't +be part of the output but is used in the regular expressions for backreferencing +(eg: ``r'(<(foo|bar)>)(.*?)(</\2>)'``), you can pass `None` to the bygroups +function and that group will be skipped in the output. + + +Changing states +=============== + +Many lexers need multiple states to work as expected. For example, some +languages allow multiline comments to be nested. Since this is a recursive +pattern it's impossible to lex just using regular expressions. + +Here is a lexer that recognizes C++ style comments (multi-line with ``/* */`` +and single-line with ``//`` until end of line):: + + from pygments.lexer import RegexLexer + from pygments.token import * + + class CppCommentLexer(RegexLexer): + name = 'Example Lexer with states' + + tokens = { + 'root': [ + (r'[^/]+', Text), + (r'/\*', Comment.Multiline, 'comment'), + (r'//.*?$', Comment.Singleline), + (r'/', Text) + ], + 'comment': [ + (r'[^*/]+', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline) + ] + } + +This lexer starts lexing in the ``'root'`` state. It tries to match as much as +possible until it finds a slash (``'/'``). If the next character after the slash +is an asterisk (``'*'``) the `RegexLexer` sends those two characters to the +output stream marked as `Comment.Multiline` and continues lexing with the rules +defined in the ``'comment'`` state. + +If there wasn't an asterisk after the slash, the `RegexLexer` checks if it's a +Singleline comment (i.e. followed by a second slash). If this also wasn't the +case it must be a single slash, which is not a comment starter (the separate +regex for a single slash must also be given, else the slash would be marked as +an error token). + +Inside the ``'comment'`` state, we do the same thing again. Scan until the +lexer finds a star or slash. If it's the opening of a multiline comment, push +the ``'comment'`` state on the stack and continue scanning, again in the +``'comment'`` state. Else, check if it's the end of the multiline comment. If +yes, pop one state from the stack. + +Note: If you pop from an empty stack you'll get an `IndexError`. (There is an +easy way to prevent this from happening: don't ``'#pop'`` in the root state). + +If the `RegexLexer` encounters a newline that is flagged as an error token, the +stack is emptied and the lexer continues scanning in the ``'root'`` state. This +can help producing error-tolerant highlighting for erroneous input, e.g. when a +single-line string is not closed. + + +Advanced state tricks +===================== + +There are a few more things you can do with states: + +- You can push multiple states onto the stack if you give a tuple instead of a + simple string as the third item in a rule tuple. For example, if you want to + match a comment containing a directive, something like: + + .. code-block:: text + + /* <processing directive> rest of comment */ + + you can use this rule:: + + tokens = { + 'root': [ + (r'/\* <', Comment, ('comment', 'directive')), + ... + ], + 'directive': [ + (r'[^>]+', Comment.Directive), + (r'>', Comment, '#pop'), + ], + 'comment': [ + (r'[^*]+', Comment), + (r'\*/', Comment, '#pop'), + (r'\*', Comment), + ] + } + + When this encounters the above sample, first ``'comment'`` and ``'directive'`` + are pushed onto the stack, then the lexer continues in the directive state + until it finds the closing ``>``, then it continues in the comment state until + the closing ``*/``. Then, both states are popped from the stack again and + lexing continues in the root state. + + .. versionadded:: 0.9 + The tuple can contain the special ``'#push'`` and ``'#pop'`` (but not + ``'#pop:n'``) directives. + + +- You can include the rules of a state in the definition of another. This is + done by using `include` from `pygments.lexer`:: + + from pygments.lexer import RegexLexer, bygroups, include + from pygments.token import * + + class ExampleLexer(RegexLexer): + tokens = { + 'comments': [ + (r'(?s)/\*.*?\*/', Comment), + (r'//.*?\n', Comment), + ], + 'root': [ + include('comments'), + (r'(function)( )(\w+)( )({)', + bygroups(Keyword, Whitespace, Name, Whitespace, Punctuation), 'function'), + (r'.*\n', Text), + ], + 'function': [ + (r'[^}/]+', Text), + include('comments'), + (r'/', Text), + (r'\}', Punctuation, '#pop'), + ] + } + + This is a hypothetical lexer for a language that consist of functions and + comments. Because comments can occur at toplevel and in functions, we need + rules for comments in both states. As you can see, the `include` helper saves + repeating rules that occur more than once (in this example, the state + ``'comment'`` will never be entered by the lexer, as it's only there to be + included in ``'root'`` and ``'function'``). + +- Sometimes, you may want to "combine" a state from existing ones. This is + possible with the `combined` helper from `pygments.lexer`. + + If you, instead of a new state, write ``combined('state1', 'state2')`` as the + third item of a rule tuple, a new anonymous state will be formed from state1 + and state2 and if the rule matches, the lexer will enter this state. + + This is not used very often, but can be helpful in some cases, such as the + `PythonLexer`'s string literal processing. + +- If you want your lexer to start lexing in a different state you can modify the + stack by overriding the `get_tokens_unprocessed()` method:: + + from pygments.lexer import RegexLexer + + class ExampleLexer(RegexLexer): + tokens = {...} + + def get_tokens_unprocessed(self, text, stack=('root', 'otherstate')): + for item in RegexLexer.get_tokens_unprocessed(self, text, stack): + yield item + + Some lexers like the `PhpLexer` use this to make the leading ``<?php`` + preprocessor comments optional. Note that you can crash the lexer easily by + putting values into the stack that don't exist in the token map. Also + removing ``'root'`` from the stack can result in strange errors! + +- In some lexers, a state should be popped if anything is encountered that isn't + matched by a rule in the state. You could use an empty regex at the end of + the state list, but Pygments provides a more obvious way of spelling that: + ``default('#pop')`` is equivalent to ``('', Text, '#pop')``. + + .. versionadded:: 2.0 + + +Subclassing lexers derived from RegexLexer +========================================== + +.. versionadded:: 1.6 + +Sometimes multiple languages are very similar, but should still be lexed by +different lexer classes. + +When subclassing a lexer derived from RegexLexer, the ``tokens`` dictionaries +defined in the parent and child class are merged. For example:: + + from pygments.lexer import RegexLexer, inherit + from pygments.token import * + + class BaseLexer(RegexLexer): + tokens = { + 'root': [ + ('[a-z]+', Name), + (r'/\*', Comment, 'comment'), + ('"', String, 'string'), + (r'\s+', Text), + ], + 'string': [ + ('[^"]+', String), + ('"', String, '#pop'), + ], + 'comment': [ + ... + ], + } + + class DerivedLexer(BaseLexer): + tokens = { + 'root': [ + ('[0-9]+', Number), + inherit, + ], + 'string': [ + (r'[^"\\]+', String), + (r'\\.', String.Escape), + ('"', String, '#pop'), + ], + } + +The `BaseLexer` defines two states, lexing names and strings. The +`DerivedLexer` defines its own tokens dictionary, which extends the definitions +of the base lexer: + +* The "root" state has an additional rule and then the special object `inherit`, + which tells Pygments to insert the token definitions of the parent class at + that point. + +* The "string" state is replaced entirely, since there is not `inherit` rule. + +* The "comment" state is inherited entirely. + + +Using multiple lexers +===================== + +Using multiple lexers for the same input can be tricky. One of the easiest +combination techniques is shown here: You can replace the action entry in a rule +tuple with a lexer class. The matched text will then be lexed with that lexer, +and the resulting tokens will be yielded. + +For example, look at this stripped-down HTML lexer:: + + from pygments.lexer import RegexLexer, bygroups, using + from pygments.token import * + from pygments.lexers.javascript import JavascriptLexer + + class HtmlLexer(RegexLexer): + name = 'HTML' + aliases = ['html'] + filenames = ['*.html', '*.htm'] + + flags = re.IGNORECASE | re.DOTALL + tokens = { + 'root': [ + ('[^<&]+', Text), + ('&.*?;', Name.Entity), + (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')), + (r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'), + (r'<\s*/\s*[a-zA-Z0-9:]+\s*>', Name.Tag), + ], + 'script-content': [ + (r'(.+?)(<\s*/\s*script\s*>)', + bygroups(using(JavascriptLexer), Name.Tag), + '#pop'), + ] + } + +Here the content of a ``<script>`` tag is passed to a newly created instance of +a `JavascriptLexer` and not processed by the `HtmlLexer`. This is done using +the `using` helper that takes the other lexer class as its parameter. + +Note the combination of `bygroups` and `using`. This makes sure that the +content up to the ``</script>`` end tag is processed by the `JavascriptLexer`, +while the end tag is yielded as a normal token with the `Name.Tag` type. + +Also note the ``(r'<\s*script\s*', Name.Tag, ('script-content', 'tag'))`` rule. +Here, two states are pushed onto the state stack, ``'script-content'`` and +``'tag'``. That means that first ``'tag'`` is processed, which will lex +attributes and the closing ``>``, then the ``'tag'`` state is popped and the +next state on top of the stack will be ``'script-content'``. + +Since you cannot refer to the class currently being defined, use `this` +(imported from `pygments.lexer`) to refer to the current lexer class, i.e. +``using(this)``. This construct may seem unnecessary, but this is often the +most obvious way of lexing arbitrary syntax between fixed delimiters without +introducing deeply nested states. + +The `using()` helper has a special keyword argument, `state`, which works as +follows: if given, the lexer to use initially is not in the ``"root"`` state, +but in the state given by this argument. This does not work with advanced +`RegexLexer` subclasses such as `ExtendedRegexLexer` (see below). + +Any other keywords arguments passed to `using()` are added to the keyword +arguments used to create the lexer. + + +Delegating Lexer +================ + +Another approach for nested lexers is the `DelegatingLexer` which is for example +used for the template engine lexers. It takes two lexers as arguments on +initialisation: a `root_lexer` and a `language_lexer`. + +The input is processed as follows: First, the whole text is lexed with the +`language_lexer`. All tokens yielded with the special type of ``Other`` are +then concatenated and given to the `root_lexer`. The language tokens of the +`language_lexer` are then inserted into the `root_lexer`'s token stream at the +appropriate positions. :: + + from pygments.lexer import DelegatingLexer + from pygments.lexers.web import HtmlLexer, PhpLexer + + class HtmlPhpLexer(DelegatingLexer): + def __init__(self, **options): + super().__init__(HtmlLexer, PhpLexer, **options) + +This procedure ensures that e.g. HTML with template tags in it is highlighted +correctly even if the template tags are put into HTML tags or attributes. + +If you want to change the needle token ``Other`` to something else, you can give +the lexer another token type as the third parameter:: + + DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options) + + +Callbacks +========= + +Sometimes the grammar of a language is so complex that a lexer would be unable +to process it just by using regular expressions and stacks. + +For this, the `RegexLexer` allows callbacks to be given in rule tuples, instead +of token types (`bygroups` and `using` are nothing else but preimplemented +callbacks). The callback must be a function taking two arguments: + +* the lexer itself +* the match object for the last matched rule + +The callback must then return an iterable of (or simply yield) ``(index, +tokentype, value)`` tuples, which are then just passed through by +`get_tokens_unprocessed()`. The ``index`` here is the position of the token in +the input string, ``tokentype`` is the normal token type (like `Name.Builtin`), +and ``value`` the associated part of the input string. + +You can see an example here:: + + from pygments.lexer import RegexLexer + from pygments.token import Generic + + class HypotheticLexer(RegexLexer): + + def headline_callback(lexer, match): + equal_signs = match.group(1) + text = match.group(2) + yield match.start(), Generic.Headline, equal_signs + text + equal_signs + + tokens = { + 'root': [ + (r'(=+)(.*?)(\1)', headline_callback) + ] + } + +If the regex for the `headline_callback` matches, the function is called with +the match object. Note that after the callback is done, processing continues +normally, that is, after the end of the previous match. The callback has no +possibility to influence the position. + +There are not really any simple examples for lexer callbacks, but you can see +them in action e.g. in the `SMLLexer` class in `ml.py`_. + +.. _ml.py: https://github.com/pygments/pygments/blob/master/pygments/lexers/ml.py + + +The ExtendedRegexLexer class +============================ + +The `RegexLexer`, even with callbacks, unfortunately isn't powerful enough for +the funky syntax rules of languages such as Ruby. + +But fear not; even then you don't have to abandon the regular expression +approach: Pygments has a subclass of `RegexLexer`, the `ExtendedRegexLexer`. +All features known from RegexLexers are available here too, and the tokens are +specified in exactly the same way, *except* for one detail: + +The `get_tokens_unprocessed()` method holds its internal state data not as local +variables, but in an instance of the `pygments.lexer.LexerContext` class, and +that instance is passed to callbacks as a third argument. This means that you +can modify the lexer state in callbacks. + +The `LexerContext` class has the following members: + +* `text` -- the input text +* `pos` -- the current starting position that is used for matching regexes +* `stack` -- a list containing the state stack +* `end` -- the maximum position to which regexes are matched, this defaults to + the length of `text` + +Additionally, the `get_tokens_unprocessed()` method can be given a +`LexerContext` instead of a string and will then process this context instead of +creating a new one for the string argument. + +Note that because you can set the current position to anything in the callback, +it won't be automatically be set by the caller after the callback is finished. +For example, this is how the hypothetical lexer above would be written with the +`ExtendedRegexLexer`:: + + from pygments.lexer import ExtendedRegexLexer + from pygments.token import Generic + + class ExHypotheticLexer(ExtendedRegexLexer): + + def headline_callback(lexer, match, ctx): + equal_signs = match.group(1) + text = match.group(2) + yield match.start(), Generic.Headline, equal_signs + text + equal_signs + ctx.pos = match.end() + + tokens = { + 'root': [ + (r'(=+)(.*?)(\1)', headline_callback) + ] + } + +This might sound confusing (and it can really be). But it is needed, and for an +example look at the Ruby lexer in `ruby.py`_. + +.. _ruby.py: https://github.com/pygments/pygments/blob/master/pygments/lexers/ruby.py + + +Handling Lists of Keywords +========================== + +For a relatively short list (hundreds) you can construct an optimized regular +expression directly using ``words()`` (longer lists, see next section). This +function handles a few things for you automatically, including escaping +metacharacters and Python's first-match rather than longest-match in +alternations. Feel free to put the lists themselves in +``pygments/lexers/_$lang_builtins.py`` (see examples there), and generated by +code if possible. + +An example of using ``words()`` is something like:: + + from pygments.lexer import RegexLexer, words, Name + + class MyLexer(RegexLexer): + + tokens = { + 'root': [ + (words(('else', 'elseif'), suffix=r'\b'), Name.Builtin), + (r'\w+', Name), + ], + } + +As you can see, you can add ``prefix`` and ``suffix`` parts to the constructed +regex. + + +Modifying Token Streams +======================= + +Some languages ship a lot of builtin functions (for example PHP). The total +amount of those functions differs from system to system because not everybody +has every extension installed. In the case of PHP there are over 3000 builtin +functions. That's an incredibly huge amount of functions, much more than you +want to put into a regular expression. + +But because only `Name` tokens can be function names this is solvable by +overriding the ``get_tokens_unprocessed()`` method. The following lexer +subclasses the `PythonLexer` so that it highlights some additional names as +pseudo keywords:: + + from pygments.lexers.python import PythonLexer + from pygments.token import Name, Keyword + + class MyPythonLexer(PythonLexer): + EXTRA_KEYWORDS = set(('foo', 'bar', 'foobar', 'barfoo', 'spam', 'eggs')) + + def get_tokens_unprocessed(self, text): + for index, token, value in PythonLexer.get_tokens_unprocessed(self, text): + if token is Name and value in self.EXTRA_KEYWORDS: + yield index, Keyword.Pseudo, value + else: + yield index, token, value + +The `PhpLexer` and `LuaLexer` use this method to resolve builtin functions. diff --git a/doc/docs/lexers.rst b/doc/docs/lexers.rst new file mode 100644 index 0000000..446c5a9 --- /dev/null +++ b/doc/docs/lexers.rst @@ -0,0 +1,69 @@ +.. -*- mode: rst -*- + +================ +Available lexers +================ + +This page lists all available builtin lexers and the options they take. + +Currently, **all lexers** support these options: + +`stripnl` + Strip leading and trailing newlines from the input (default: ``True``) + +`stripall` + Strip all leading and trailing whitespace from the input (default: + ``False``). + +`ensurenl` + Make sure that the input ends with a newline (default: ``True``). This + is required for some lexers that consume input linewise. + + .. versionadded:: 1.3 + +`tabsize` + If given and greater than 0, expand tabs in the input (default: ``0``). + +`encoding` + If given, must be an encoding name (such as ``"utf-8"``). This encoding + will be used to convert the input string to Unicode (if it is not already + a Unicode string). The default is ``"guess"``. + + If this option is set to ``"guess"``, a simple UTF-8 vs. Latin-1 + detection is used, if it is set to ``"chardet"``, the + `chardet library <https://chardet.github.io/>`_ is used to + guess the encoding of the input. + + .. versionadded:: 0.6 + + +The "Short Names" field lists the identifiers that can be used with the +`get_lexer_by_name()` function. + +These lexers are builtin and can be imported from `pygments.lexers`: + +.. pygmentsdoc:: lexers + + +Iterating over all lexers +------------------------- + +.. versionadded:: 0.6 + +To get all lexers (both the builtin and the plugin ones), you can +use the `get_all_lexers()` function from the `pygments.lexers` +module: + +.. sourcecode:: pycon + + >>> from pygments.lexers import get_all_lexers + >>> i = get_all_lexers() + >>> i.next() + ('Diff', ('diff',), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch')) + >>> i.next() + ('Delphi', ('delphi', 'objectpascal', 'pas', 'pascal'), ('*.pas',), ('text/x-pascal',)) + >>> i.next() + ('XML+Ruby', ('xml+ruby', 'xml+erb'), (), ()) + +As you can see, the return value is an iterator which yields tuples +in the form ``(name, aliases, filetypes, mimetypes)``. diff --git a/doc/docs/moinmoin.rst b/doc/docs/moinmoin.rst new file mode 100644 index 0000000..80ed25c --- /dev/null +++ b/doc/docs/moinmoin.rst @@ -0,0 +1,39 @@ +.. -*- mode: rst -*- + +============================ +Using Pygments with MoinMoin +============================ + +From Pygments 0.7, the source distribution ships a `Moin`_ parser plugin that +can be used to get Pygments highlighting in Moin wiki pages. + +To use it, copy the file `external/moin-parser.py` from the Pygments +distribution to the `data/plugin/parser` subdirectory of your Moin instance. +Edit the options at the top of the file (currently ``ATTACHMENTS`` and +``INLINESTYLES``) and rename the file to the name that the parser directive +should have. For example, if you name the file ``code.py``, you can get a +highlighted Python code sample with this Wiki markup:: + + {{{ + #!code python + [...] + }}} + +where ``python`` is the Pygments name of the lexer to use. + +Additionally, if you set the ``ATTACHMENTS`` option to True, Pygments will also +be called for all attachments for whose filenames there is no other parser +registered. + +You are responsible for including CSS rules that will map the Pygments CSS +classes to colors. You can output a stylesheet file with `pygmentize`, put it +into the `htdocs` directory of your Moin instance and then include it in the +`stylesheets` configuration option in the Moin config, e.g.:: + + stylesheets = [('screen', '/htdocs/pygments.css')] + +If you do not want to do that and are willing to accept larger HTML output, you +can set the ``INLINESTYLES`` option to True. + + +.. _Moin: https://moinmo.in/ diff --git a/doc/docs/plugins.rst b/doc/docs/plugins.rst new file mode 100644 index 0000000..6738860 --- /dev/null +++ b/doc/docs/plugins.rst @@ -0,0 +1,122 @@ +======= +Plugins +======= + +If you want to extend Pygments without hacking the sources, but want to +use the lexer/formatter/style/filter lookup functions (`lexers.get_lexer_by_name` +et al.), you can use `setuptools`_ entrypoints to add new lexers, formatters +or styles as if they were in the Pygments core. + +.. _setuptools: https://pypi.org/project/setuptools/ + +That means you can use your highlighter modules with the `pygmentize` script, +which relies on the mentioned functions. + + +Plugin discovery +================ + +At runtime, discovering plugins is preferentially done using Python's +standard library module `importlib.metadata`_, available in Python 3.8 +and higher. In earlier Python versions, Pygments attempts to use the +`importlib_metadata`_ backport, if available. If not available, a +fallback is attempted on the older `pkg_resources`_ module. Finally, if +``pkg_resources`` is not available, no plugins will be loaded at +all. Note that ``pkg_resources`` is distributed with `setuptools`_, and +thus available on most Python environments. However, ``pkg_resources`` +is considerably slower than ``importlib.metadata`` or its +``importlib_metadata`` backport. For this reason, if you run Pygments +under Python older than 3.8, it is recommended to install +``importlib-metadata``. Pygments defines a ``plugins`` packaging extra, +so you can ensure it is installed with best plugin support (i.e., that +``importlib-metadata`` is also installed in case you are running Python +earlier than 3.8) by specifying ``pygments[plugins]`` as the +requirement, for example, with ``pip``: + +.. sourcecode:: shell + + $ python -m pip install --user pygments[plugins] + +.. _importlib.metadata: https://docs.python.org/3.10/library/importlib.metadata.html +.. _importlib_metadata: https://pypi.org/project/importlib-metadata +.. _pkg_resources: https://setuptools.pypa.io/en/latest/pkg_resources.html + + +Defining plugins through entrypoints +==================================== + +Here is a list of setuptools entrypoints that Pygments understands: + +`pygments.lexers` + + This entrypoint is used for adding new lexers to the Pygments core. + The name of the entrypoint values doesn't really matter, Pygments extracts + required metadata from the class definition: + + .. sourcecode:: ini + + [pygments.lexers] + yourlexer = yourmodule:YourLexer + + Note that you have to define ``name``, ``aliases`` and ``filename`` + attributes so that you can use the highlighter from the command line: + + .. sourcecode:: python + + class YourLexer(...): + name = 'Name Of Your Lexer' + aliases = ['alias'] + filenames = ['*.ext'] + + +`pygments.formatters` + + You can use this entrypoint to add new formatters to Pygments. The + name of an entrypoint item is the name of the formatter. If you + prefix the name with a slash it's used as a filename pattern: + + .. sourcecode:: ini + + [pygments.formatters] + yourformatter = yourmodule:YourFormatter + /.ext = yourmodule:YourFormatter + + +`pygments.styles` + + To add a new style you can use this entrypoint. The name of the entrypoint + is the name of the style: + + .. sourcecode:: ini + + [pygments.styles] + yourstyle = yourmodule:YourStyle + + +`pygments.filters` + + Use this entrypoint to register a new filter. The name of the + entrypoint is the name of the filter: + + .. sourcecode:: ini + + [pygments.filters] + yourfilter = yourmodule:YourFilter + + +How To Use Entrypoints +====================== + +This documentation doesn't explain how to use those entrypoints because this is +covered in the `setuptools documentation`_. That page should cover everything +you need to write a plugin. + +.. _setuptools documentation: https://setuptools.readthedocs.io/en/latest/ + + +Extending The Core +================== + +If you have written a Pygments plugin that is open source, please inform us +about that. There is a high chance that we'll add it to the Pygments +distribution. diff --git a/doc/docs/quickstart.rst b/doc/docs/quickstart.rst new file mode 100644 index 0000000..b2a9c29 --- /dev/null +++ b/doc/docs/quickstart.rst @@ -0,0 +1,205 @@ +.. -*- mode: rst -*- + +=========================== +Introduction and Quickstart +=========================== + + +Welcome to Pygments! This document explains the basic concepts and terms and +gives a few examples of how to use the library. + + +Architecture +============ + +There are four types of components that work together highlighting a piece of +code: + +* A **lexer** splits the source into tokens, fragments of the source that + have a token type that determines what the text represents semantically + (e.g., keyword, string, or comment). There is a lexer for every language + or markup format that Pygments supports. +* The token stream can be piped through **filters**, which usually modify + the token types or text fragments, e.g. uppercasing all keywords. +* A **formatter** then takes the token stream and writes it to an output + file, in a format such as HTML, LaTeX or RTF. +* While writing the output, a **style** determines how to highlight all the + different token types. It maps them to attributes like "red and bold". + + +Example +======= + +Here is a small example for highlighting Python code: + +.. sourcecode:: python + + from pygments import highlight + from pygments.lexers import PythonLexer + from pygments.formatters import HtmlFormatter + + code = 'print "Hello World"' + print(highlight(code, PythonLexer(), HtmlFormatter())) + +which prints something like this: + +.. sourcecode:: html + + <div class="highlight"> + <pre><span class="k">print</span> <span class="s">"Hello World"</span></pre> + </div> + +As you can see, Pygments uses CSS classes (by default, but you can change that) +instead of inline styles in order to avoid outputting redundant style information over +and over. A CSS stylesheet that contains all CSS classes possibly used in the output +can be produced by: + +.. sourcecode:: python + + print(HtmlFormatter().get_style_defs('.highlight')) + +The argument to :func:`get_style_defs` is used as an additional CSS selector: +the output may look like this: + +.. sourcecode:: css + + .highlight .k { color: #AA22FF; font-weight: bold } + .highlight .s { color: #BB4444 } + ... + + +Options +======= + +The :func:`highlight()` function supports a fourth argument called *outfile*, it +must be a file object if given. The formatted output will then be written to +this file instead of being returned as a string. + +Lexers and formatters both support options. They are given to them as keyword +arguments either to the class or to the lookup method: + +.. sourcecode:: python + + from pygments import highlight + from pygments.lexers import get_lexer_by_name + from pygments.formatters import HtmlFormatter + + lexer = get_lexer_by_name("python", stripall=True) + formatter = HtmlFormatter(linenos=True, cssclass="source") + result = highlight(code, lexer, formatter) + +This makes the lexer strip all leading and trailing whitespace from the input +(`stripall` option), lets the formatter output line numbers (`linenos` option), +and sets the wrapping ``<div>``'s class to ``source`` (instead of +``highlight``). + +Important options include: + +`encoding` : for lexers and formatters + Since Pygments uses Unicode strings internally, this determines which + encoding will be used to convert to or from byte strings. +`style` : for formatters + The name of the style to use when writing the output. + + +For an overview of builtin lexers and formatters and their options, visit the +:doc:`lexer <lexers>` and :doc:`formatters <formatters>` lists. + +For a documentation on filters, see :doc:`this page <filters>`. + + +Lexer and formatter lookup +========================== + +If you want to lookup a built-in lexer by its alias or a filename, you can use +one of the following methods: + +.. sourcecode:: pycon + + >>> from pygments.lexers import (get_lexer_by_name, + ... get_lexer_for_filename, get_lexer_for_mimetype) + + >>> get_lexer_by_name('python') + <pygments.lexers.PythonLexer> + + >>> get_lexer_for_filename('spam.rb') + <pygments.lexers.RubyLexer> + + >>> get_lexer_for_mimetype('text/x-perl') + <pygments.lexers.PerlLexer> + +All these functions accept keyword arguments; they will be passed to the lexer +as options. + +A similar API is available for formatters: use :func:`.get_formatter_by_name()` +and :func:`.get_formatter_for_filename()` from the :mod:`pygments.formatters` +module for this purpose. + + +Guessing lexers +=============== + +If you don't know the content of the file, or you want to highlight a file +whose extension is ambiguous, such as ``.html`` (which could contain plain HTML +or some template tags), use these functions: + +.. sourcecode:: pycon + + >>> from pygments.lexers import guess_lexer, guess_lexer_for_filename + + >>> guess_lexer('#!/usr/bin/python\nprint "Hello World!"') + <pygments.lexers.PythonLexer> + + >>> guess_lexer_for_filename('test.py', 'print "Hello World!"') + <pygments.lexers.PythonLexer> + +:func:`.guess_lexer()` passes the given content to the lexer classes' +:meth:`analyse_text()` method and returns the one for which it returns the +highest number. + +All lexers have two different filename pattern lists: the primary and the +secondary one. The :func:`.get_lexer_for_filename()` function only uses the +primary list, whose entries are supposed to be unique among all lexers. +:func:`.guess_lexer_for_filename()`, however, will first loop through all lexers +and look at the primary and secondary filename patterns if the filename matches. +If only one lexer matches, it is returned, else the guessing mechanism of +:func:`.guess_lexer()` is used with the matching lexers. + +As usual, keyword arguments to these functions are given to the created lexer +as options. + + +Command line usage +================== + +You can use Pygments from the command line, using the :program:`pygmentize` +script:: + + $ pygmentize test.py + +will highlight the Python file test.py using ANSI escape sequences +(a.k.a. terminal colors) and print the result to standard output. + +To output HTML, use the ``-f`` option:: + + $ pygmentize -f html -o test.html test.py + +to write an HTML-highlighted version of test.py to the file test.html. +Note that it will only be a snippet of HTML, if you want a full HTML document, +use the "full" option:: + + $ pygmentize -f html -O full -o test.html test.py + +This will produce a full HTML document with included stylesheet. + +A style can be selected with ``-O style=<name>``. + +If you need a stylesheet for an existing HTML file using Pygments CSS classes, +it can be created with:: + + $ pygmentize -S default -f html > style.css + +where ``default`` is the style name. + +More options and tricks can be found in the :doc:`command line reference +<cmdline>`. diff --git a/doc/docs/rstdirective.rst b/doc/docs/rstdirective.rst new file mode 100644 index 0000000..edc117d --- /dev/null +++ b/doc/docs/rstdirective.rst @@ -0,0 +1,22 @@ +.. -*- mode: rst -*- + +================================ +Using Pygments in ReST documents +================================ + +Many Python people use `ReST`_ for documentation their sourcecode, programs, +scripts et cetera. This also means that documentation often includes sourcecode +samples or snippets. + +You can easily enable Pygments support for your ReST texts using a custom +directive -- this is also how this documentation displays source code. + +From Pygments 0.9, the directive is shipped in the distribution as +`external/rst-directive.py`. You can copy and adapt this code to your liking. + +.. removed -- too confusing + *Loosely related note:* The ReST lexer now recognizes ``.. sourcecode::`` and + ``.. code::`` directives and highlights the contents in the specified language + if the `handlecodeblocks` option is true. + +.. _ReST: https://docutils.sourceforge.io/rst.html diff --git a/doc/docs/security.rst b/doc/docs/security.rst new file mode 100644 index 0000000..72f2d05 --- /dev/null +++ b/doc/docs/security.rst @@ -0,0 +1,31 @@ +Security considerations +----------------------- + +Pygments provides no guarantees on execution time, which needs to be taken +into consideration when using Pygments to process arbitrary user inputs. For +example, if you have a web service which uses Pygments for highlighting, there +may be inputs which will cause the Pygments process to run "forever" and/or use +significant amounts of memory. This can subsequently be used to perform a +remote denial-of-service attack on the server if the processes are not +terminated quickly. + +Unfortunately, it's practically impossible to harden Pygments itself against +those issues: Some regular expressions can result in "catastrophic +backtracking", but other bugs like incorrect matchers can also +cause similar problems, and there is no way to find them in an automated fashion +(short of solving the halting problem.) Pygments has extensive unit tests, +automated randomized testing, and is also tested by `OSS-Fuzz <https://github.com/google/oss-fuzz/tree/master/projects/pygments>`_, +but we will never be able to eliminate all bugs in this area. + +Our recommendations are: + +* Ensure that the Pygments process is *terminated* after a reasonably short + timeout. In general Pygments should take seconds at most for reasonably-sized + input. +* *Limit* the number of concurrent Pygments processes to avoid oversubscription + of resources. + +The Pygments authors will treat any bug resulting in long processing times with +high priority -- it's one of those things that will be fixed in a patch release. +When reporting a bug where you suspect super-linear execution times, please make +sure to attach an input to reproduce it.
\ No newline at end of file diff --git a/doc/docs/styledevelopment.rst b/doc/docs/styledevelopment.rst new file mode 100644 index 0000000..8c4ec2d --- /dev/null +++ b/doc/docs/styledevelopment.rst @@ -0,0 +1,96 @@ +.. -*- mode: rst -*- + +.. _creating-own-styles: + +Creating Own Styles +=================== + +So, how to create a style? All you have to do is to subclass `Style` and +define some styles: + +.. sourcecode:: python + + from pygments.style import Style + from pygments.token import Token, Comment, Keyword, Name, String, \ + Error, Generic, Number, Operator + + + class YourStyle(Style): + + styles = { + Token: '', + Comment: 'italic #888', + Keyword: 'bold #005', + Name: '#f00', + Name.Class: 'bold #0f0', + Name.Function: '#0f0', + String: 'bg:#eee #111' + } + +That's it, save it as ``your.py``. There are just a few rules. When you define a style for `Name` +the style automatically also affects `Name.Function` and so on. If you +defined ``'bold'`` and you don't want boldface for a subtoken use ``'nobold'``. + +(Philosophy: the styles aren't written in CSS syntax since this way +they can be used for a variety of formatters.) + +``Token`` is the default style inherited by all token types. + +To make the style usable for Pygments, you must + +* either register it as a plugin (see :doc:`the plugin docs <plugins>`) +* or update the ``pygments.styles`` subpackage directory. For example: + + * add ``your.py`` file + * register the new style by adding a line to the ``__init__.py`` file: + + .. sourcecode:: python + + STYLE_MAP = { + ... + 'your': 'your::YourStyle', + +.. note:: + + You should *only* add it to the ``pygments.styles`` subdirectory if you are + working on a contribution to Pygments. You should not use that + method to extend an already existing copy of Pygments, use the plugins + mechanism for that. + + +Style Rules +=========== + +Here a small overview of all allowed styles: + +``bold`` + render text as bold +``nobold`` + don't render text as bold (to prevent subtokens being highlighted bold) +``italic`` + render text italic +``noitalic`` + don't render text as italic +``underline`` + render text underlined +``nounderline`` + don't render text underlined +``bg:`` + transparent background +``bg:#000000`` + background color (black) +``border:`` + no border +``border:#ffffff`` + border color (white) +``#ff0000`` + text color (red) +``noinherit`` + don't inherit styles from supertoken + +Note that there may not be a space between ``bg:`` and the color value +since the style definition string is split at whitespace. +Also, using named colors is not allowed since the supported color names +vary for different formatters. + +Furthermore, not all lexers might support every style. diff --git a/doc/docs/styles.rst b/doc/docs/styles.rst new file mode 100644 index 0000000..91689d3 --- /dev/null +++ b/doc/docs/styles.rst @@ -0,0 +1,157 @@ +.. -*- mode: rst -*- + +====== +Styles +====== + +Pygments comes with :doc:`some builtin styles </styles/>` that work for both the +HTML and LaTeX formatter. + +The builtin styles can be looked up with the `get_style_by_name` function: + +.. sourcecode:: pycon + + >>> from pygments.styles import get_style_by_name + >>> get_style_by_name('colorful') + <class 'pygments.styles.colorful.ColorfulStyle'> + +You can pass a instance of a `Style` class to a formatter as the `style` +option in form of a string: + +.. sourcecode:: pycon + + >>> from pygments.styles import get_style_by_name + >>> from pygments.formatters import HtmlFormatter + >>> HtmlFormatter(style='colorful').style + <class 'pygments.styles.colorful.ColorfulStyle'> + +Or you can also import your own style (which must be a subclass of +`pygments.style.Style`) and pass it to the formatter: + +.. sourcecode:: pycon + + >>> from yourapp.yourmodule import YourStyle + >>> from pygments.formatters import HtmlFormatter + >>> HtmlFormatter(style=YourStyle).style + <class 'yourapp.yourmodule.YourStyle'> + + +Creating Own Styles +=================== + +See :ref:`creating-own-styles`. + + +Builtin Styles +============== + +Pygments ships some builtin styles which are maintained by the Pygments team. + +To get a list of known styles you can use this snippet: + +.. sourcecode:: pycon + + >>> from pygments.styles import STYLE_MAP + >>> STYLE_MAP.keys() + ['default', 'emacs', 'friendly', 'colorful'] + + +Getting a list of available styles +================================== + +.. versionadded:: 0.6 + +Because it could be that a plugin registered a style, there is +a way to iterate over all styles: + +.. sourcecode:: pycon + + >>> from pygments.styles import get_all_styles + >>> styles = list(get_all_styles()) + + +.. _AnsiTerminalStyle: + +Terminal Styles +=============== + +.. versionadded:: 2.2 + +Custom styles used with the 256-color terminal formatter can also map colors to +use the 8 default ANSI colors. To do so, use ``ansigreen``, ``ansibrightred`` or +any other colors defined in :attr:`pygments.style.ansicolors`. Foreground ANSI +colors will be mapped to the corresponding `escape codes 30 to 37 +<https://en.wikipedia.org/wiki/ANSI_escape_code#Colors>`_ thus respecting any +custom color mapping and themes provided by many terminal emulators. Light +variants are treated as foreground color with and an added bold flag. +``bg:ansi<color>`` will also be respected, except the light variant will be the +same shade as their dark variant. + +See the following example where the color of the string ``"hello world"`` is +governed by the escape sequence ``\x1b[34;01m`` (Ansi bright blue, Bold, 41 being red +background) instead of an extended foreground & background color. + +.. sourcecode:: pycon + + >>> from pygments import highlight + >>> from pygments.style import Style + >>> from pygments.token import Token + >>> from pygments.lexers import Python3Lexer + >>> from pygments.formatters import Terminal256Formatter + + >>> class MyStyle(Style): + styles = { + Token.String: 'ansibrightblue bg:ansibrightred', + } + + >>> code = 'print("Hello World")' + >>> result = highlight(code, Python3Lexer(), Terminal256Formatter(style=MyStyle)) + >>> print(result.encode()) + b'\x1b[34;41;01m"\x1b[39;49;00m\x1b[34;41;01mHello World\x1b[39;49;00m\x1b[34;41;01m"\x1b[39;49;00m' + +Colors specified using ``ansi*`` are converted to a default set of RGB colors +when used with formatters other than the terminal-256 formatter. + +By definition of ANSI, the following colors are considered "light" colors, and +will be rendered by most terminals as bold: + +- "brightblack" (darkgrey), "brightred", "brightgreen", "brightyellow", "brightblue", + "brightmagenta", "brightcyan", "white" + +The following are considered "dark" colors and will be rendered as non-bold: + +- "black", "red", "green", "yellow", "blue", "magenta", "cyan", + "gray" + +Exact behavior might depends on the terminal emulator you are using, and its +settings. + +.. _new-ansi-color-names: + +.. versionchanged:: 2.4 + +The definition of the ANSI color names has changed. +New names are easier to understand and align to the colors used in other projects. + +===================== ==================== +New names Pygments up to 2.3 +===================== ==================== +``ansiblack`` ``#ansiblack`` +``ansired`` ``#ansidarkred`` +``ansigreen`` ``#ansidarkgreen`` +``ansiyellow`` ``#ansibrown`` +``ansiblue`` ``#ansidarkblue`` +``ansimagenta`` ``#ansipurple`` +``ansicyan`` ``#ansiteal`` +``ansigray`` ``#ansilightgray`` +``ansibrightblack`` ``#ansidarkgray`` +``ansibrightred`` ``#ansired`` +``ansibrightgreen`` ``#ansigreen`` +``ansibrightyellow`` ``#ansiyellow`` +``ansibrightblue`` ``#ansiblue`` +``ansibrightmagenta`` ``#ansifuchsia`` +``ansibrightcyan`` ``#ansiturquoise`` +``ansiwhite`` ``#ansiwhite`` +===================== ==================== + +Old ANSI color names are deprecated but will still work. diff --git a/doc/docs/terminal-sessions.rst b/doc/docs/terminal-sessions.rst new file mode 100644 index 0000000..45af0eb --- /dev/null +++ b/doc/docs/terminal-sessions.rst @@ -0,0 +1,46 @@ +Interactive terminal/shell sessions +----------------------------------- + +To highlight an interactive terminal or shell session, prefix your code snippet +with a specially formatted prompt. + +Supported shells with examples are shown below. In each example, prompt parts in +brackets ``[any]`` represent optional parts of the prompt, and prompt parts +without brackets or in parenthesis ``(any)`` represent required parts of the +prompt. + +* **Bash Session** (console, shell-session): + + .. code-block:: console + + [any@any]$ ls -lh + [any@any]# ls -lh + [any@any]% ls -lh + $ ls -lh + # ls -lh + % ls -lh + > ls -lh + +* **MSDOS Session** (doscon): + + .. code-block:: doscon + + [any]> dir + > dir + More? dir + +* **Tcsh Session** (tcshcon): + + .. code-block:: tcshcon + + (any)> ls -lh + ? ls -lh + +* **PowerShell Session** (ps1con): + + .. code-block:: ps1con + + PS[any]> Get-ChildItem + PS> Get-ChildItem + >> Get-ChildItem + diff --git a/doc/docs/tokens.rst b/doc/docs/tokens.rst new file mode 100644 index 0000000..0bc7586 --- /dev/null +++ b/doc/docs/tokens.rst @@ -0,0 +1,376 @@ +.. -*- mode: rst -*- + +============== +Builtin Tokens +============== + +.. module:: pygments.token + +In the :mod:`pygments.token` module, there is a special object called `Token` +that is used to create token types. + +You can create a new token type by accessing an attribute of `Token` whose +name starts with an uppercase letter: + +.. sourcecode:: pycon + + >>> from pygments.token import Token + >>> Token.String + Token.String + >>> Token.String is Token.String + True + +Note that tokens are singletons so you can use the ``is`` operator for comparing +token types. + +You can also use the ``in`` operator to perform set tests: + +.. sourcecode:: pycon + + >>> from pygments.token import Comment + >>> Comment.Single in Comment + True + >>> Comment in Comment.Multi + False + +This can be useful in :doc:`filters <filters>` and if you write lexers on your +own without using the base lexers. + +You can also split a token type into a hierarchy, and get the parent of it: + +.. sourcecode:: pycon + + >>> String.split() + [Token, Token.Literal, Token.Literal.String] + >>> String.parent + Token.Literal + +In principle, you can create an unlimited number of token types but nobody can +guarantee that a style would define style rules for a token type. Because of +that, Pygments proposes some global token types defined in the +`pygments.token.STANDARD_TYPES` dict. + +For some tokens aliases are already defined: + +.. sourcecode:: pycon + + >>> from pygments.token import String + >>> String + Token.Literal.String + +Inside the :mod:`pygments.token` module the following aliases are defined: + +============= ============================ ==================================== +`Text` `Token.Text` for any type of text data +`Whitespace` `Token.Text.Whitespace` for whitespace +`Error` `Token.Error` represents lexer errors +`Other` `Token.Other` special token for data not + matched by a parser (e.g. HTML + markup in PHP code) +`Keyword` `Token.Keyword` any kind of keywords +`Name` `Token.Name` variable/function names +`Literal` `Token.Literal` Any literals +`String` `Token.Literal.String` string literals +`Number` `Token.Literal.Number` number literals +`Operator` `Token.Operator` operators (``+``, ``not``...) +`Punctuation` `Token.Punctuation` punctuation (``[``, ``(``...) +`Comment` `Token.Comment` any kind of comments +`Generic` `Token.Generic` generic tokens (have a look at + the explanation below) +============= ============================ ==================================== + +Normally you just create token types using the already defined aliases. For each +of those token aliases, a number of subtypes exists (excluding the special tokens +`Token.Text`, `Token.Error` and `Token.Other`) + +It's also possible to convert strings to token types (for example +if you want to supply a token from the command line): + +.. sourcecode:: pycon + + >>> from pygments.token import String, string_to_tokentype + >>> string_to_tokentype("String") + Token.Literal.String + >>> string_to_tokentype("Token.Literal.String") + Token.Literal.String + >>> string_to_tokentype(String) + Token.Literal.String + + +Keyword Tokens +============== + +`Keyword` + For any kind of keyword (especially if it doesn't match any of the + subtypes of course). + +`Keyword.Constant` + For keywords that are constants (e.g. ``None`` in future Python versions). + +`Keyword.Declaration` + For keywords used for variable declaration (e.g. ``var`` in some programming + languages like JavaScript). + +`Keyword.Namespace` + For keywords used for namespace declarations (e.g. ``import`` in Python and + Java and ``package`` in Java). + +`Keyword.Pseudo` + For keywords that aren't really keywords (e.g. ``None`` in old Python + versions). + +`Keyword.Reserved` + For reserved keywords. + +`Keyword.Type` + For builtin types that can't be used as identifiers (e.g. ``int``, + ``char`` etc. in C). + + +Name Tokens +=========== + +`Name` + For any name (variable names, function names, classes). + +`Name.Attribute` + For all attributes (e.g. in HTML tags). + +`Name.Builtin` + Builtin names; names that are available in the global namespace. + +`Name.Builtin.Pseudo` + Builtin names that are implicit (e.g. ``self`` in Ruby, ``this`` in Java). + +`Name.Class` + Class names. Because no lexer can know if a name is a class or a function + or something else this token is meant for class declarations. + +`Name.Constant` + Token type for constants. In some languages you can recognise a token by the + way it's defined (the value after a ``const`` keyword for example). In + other languages constants are uppercase by definition (Ruby). + +`Name.Decorator` + Token type for decorators. Decorators are syntactic elements in the Python + language. Similar syntax elements exist in C# and Java. + +`Name.Entity` + Token type for special entities. (e.g. `` `` in HTML). + +`Name.Exception` + Token type for exception names (e.g. ``RuntimeError`` in Python). Some languages + define exceptions in the function signature (Java). You can highlight + the name of that exception using this token then. + +`Name.Function` + Token type for function names. + +`Name.Function.Magic` + same as `Name.Function` but for special function names that have an implicit use + in a language (e.g. ``__init__`` method in Python). + +`Name.Label` + Token type for label names (e.g. in languages that support ``goto``). + +`Name.Namespace` + Token type for namespaces. (e.g. import paths in Java/Python), names following + the ``module``/``namespace`` keyword in other languages. + +`Name.Other` + Other names. Normally unused. + +`Name.Property` + Additional token type occasionally used for class attributes. + +`Name.Tag` + Tag names (in HTML/XML markup or configuration files). + +`Name.Variable` + Token type for variables. Some languages have prefixes for variable names + (PHP, Ruby, Perl). You can highlight them using this token. + +`Name.Variable.Class` + same as `Name.Variable` but for class variables (also static variables). + +`Name.Variable.Global` + same as `Name.Variable` but for global variables (used in Ruby, for + example). + +`Name.Variable.Instance` + same as `Name.Variable` but for instance variables. + +`Name.Variable.Magic` + same as `Name.Variable` but for special variable names that have an implicit use + in a language (e.g. ``__doc__`` in Python). + + +Literals +======== + +`Literal` + For any literal (if not further defined). + +`Literal.Date` + for date literals (e.g. ``42d`` in Boo). + + +`String` + For any string literal. + +`String.Affix` + Token type for affixes that further specify the type of the string they're + attached to (e.g. the prefixes ``r`` and ``u8`` in ``r"foo"`` and ``u8"foo"``). + +`String.Backtick` + Token type for strings enclosed in backticks. + +`String.Char` + Token type for single characters (e.g. Java, C). + +`String.Delimiter` + Token type for delimiting identifiers in "heredoc", raw and other similar + strings (e.g. the word ``END`` in Perl code ``print <<'END';``). + +`String.Doc` + Token type for documentation strings (for example Python). + +`String.Double` + Double quoted strings. + +`String.Escape` + Token type for escape sequences in strings. + +`String.Heredoc` + Token type for "heredoc" strings (e.g. in Ruby or Perl). + +`String.Interpol` + Token type for interpolated parts in strings (e.g. ``#{foo}`` in Ruby). + +`String.Other` + Token type for any other strings (for example ``%q{foo}`` string constructs + in Ruby). + +`String.Regex` + Token type for regular expression literals (e.g. ``/foo/`` in JavaScript). + +`String.Single` + Token type for single quoted strings. + +`String.Symbol` + Token type for symbols (e.g. ``:foo`` in LISP or Ruby). + + +`Number` + Token type for any number literal. + +`Number.Bin` + Token type for binary literals (e.g. ``0b101010``). + +`Number.Float` + Token type for float literals (e.g. ``42.0``). + +`Number.Hex` + Token type for hexadecimal number literals (e.g. ``0xdeadbeef``). + +`Number.Integer` + Token type for integer literals (e.g. ``42``). + +`Number.Integer.Long` + Token type for long integer literals (e.g. ``42L`` in Python). + +`Number.Oct` + Token type for octal literals. + + +Operators +========= + +`Operator` + For any punctuation operator (e.g. ``+``, ``-``). + +`Operator.Word` + For any operator that is a word (e.g. ``not``). + + +Punctuation +=========== + +.. versionadded:: 0.7 + +`Punctuation` + For any punctuation which is not an operator (e.g. ``[``, ``(``...) + +`Punctuation.Marker` + For markers that point to a location (e.g., carets in Python + tracebacks for syntax errors). + + .. versionadded:: 2.10 + + +Comments +======== + +`Comment` + Token type for any comment. + +`Comment.Hashbang` + Token type for hashbang comments (i.e. first lines of files that start with + ``#!``). + +`Comment.Multiline` + Token type for multiline comments. + +`Comment.Preproc` + Token type for preprocessor comments (also ``<?php``/``<%`` constructs). + +`Comment.PreprocFile` + Token type for filenames in preprocessor comments, such as include files in C/C++. + +`Comment.Single` + Token type for comments that end at the end of a line (e.g. ``# foo``). + +`Comment.Special` + Special data in comments. For example code tags, author and license + information, etc. + + +Generic Tokens +============== + +Generic tokens are for special lexers like the `DiffLexer` that doesn't really +highlight a programming language but a patch file. + + +`Generic` + A generic, unstyled token. Normally you don't use this token type. + +`Generic.Deleted` + Marks the token value as deleted. + +`Generic.Emph` + Marks the token value as emphasized. + +`Generic.Error` + Marks the token value as an error message. + +`Generic.Heading` + Marks the token value as headline. + +`Generic.Inserted` + Marks the token value as inserted. + +`Generic.Output` + Marks the token value as program output (e.g. for python cli lexer). + +`Generic.Prompt` + Marks the token value as command prompt (e.g. bash lexer). + +`Generic.Strong` + Marks the token value as bold (e.g. for rst lexer). + +`Generic.Subheading` + Marks the token value as subheadline. + +`Generic.Traceback` + Marks the token value as a part of an error traceback. diff --git a/doc/docs/unicode.rst b/doc/docs/unicode.rst new file mode 100644 index 0000000..dca9111 --- /dev/null +++ b/doc/docs/unicode.rst @@ -0,0 +1,58 @@ +===================== +Unicode and Encodings +===================== + +Since Pygments 0.6, all lexers use unicode strings internally. Because of that +you might encounter the occasional :exc:`UnicodeDecodeError` if you pass strings +with the wrong encoding. + +Per default all lexers have their input encoding set to `guess`. This means +that the following encodings are tried: + +* UTF-8 (including BOM handling) +* The locale encoding (i.e. the result of `locale.getpreferredencoding()`) +* As a last resort, `latin1` + +If you pass a lexer a byte string object (not unicode), it tries to decode the +data using this encoding. + +You can override the encoding using the `encoding` or `inencoding` lexer +options. If you have the `chardet`_ library installed and set the encoding to +``chardet`` if will analyse the text and use the encoding it thinks is the +right one automatically: + +.. sourcecode:: python + + from pygments.lexers import PythonLexer + lexer = PythonLexer(encoding='chardet') + +The best way is to pass Pygments unicode objects. In that case you can't get +unexpected output. + +The formatters now send Unicode objects to the stream if you don't set the +output encoding. You can do so by passing the formatters an `encoding` option: + +.. sourcecode:: python + + from pygments.formatters import HtmlFormatter + f = HtmlFormatter(encoding='utf-8') + +**You will have to set this option if you have non-ASCII characters in the +source and the output stream does not accept Unicode written to it!** +This is the case for all regular files and for terminals. + +Note: The Terminal formatter tries to be smart: if its output stream has an +`encoding` attribute, and you haven't set the option, it will encode any +Unicode string with this encoding before writing it. This is the case for +`sys.stdout`, for example. The other formatters don't have that behavior. + +Another note: If you call Pygments via the command line (`pygmentize`), +encoding is handled differently, see :doc:`the command line docs <cmdline>`. + +.. versionadded:: 0.7 + The formatters now also accept an `outencoding` option which will override + the `encoding` option if given. This makes it possible to use a single + options dict with lexers and formatters, and still have different input and + output encodings. + +.. _chardet: https://chardet.github.io/ diff --git a/doc/download.rst b/doc/download.rst new file mode 100644 index 0000000..7ac0868 --- /dev/null +++ b/doc/download.rst @@ -0,0 +1,39 @@ +Download and installation +========================= + +The current release is version |version|. + +Packaged versions +----------------- + +You can download it `from the Python Package Index +<https://pypi.python.org/pypi/Pygments>`_. For installation of packages from +PyPI, we recommend `Pip <https://www.pip-installer.org>`_, which works on all +major platforms. + +Under Linux, most distributions include a package for Pygments, usually called +``pygments`` or ``python-pygments``. You can install it with the package +manager as usual. + +Development sources +------------------- + +We're using the Git version control system. You can get the development source +using this command:: + + git clone https://github.com/pygments/pygments + +Development takes place at `GitHub <https://github.com/pygments/pygments>`_. + +The latest changes in the development source code are listed in the `changelog +<https://github.com/pygments/pygments/blob/master/CHANGES>`_. + +.. Documentation + ------------- + +.. XXX todo + + You can download the <a href="/docs/">documentation</a> either as + a bunch of rst files from the Git repository, see above, or + as a tar.gz containing rendered HTML files:</p> + <p><a href="/docs/download/pygmentsdocs.tar.gz">pygmentsdocs.tar.gz</a></p> diff --git a/doc/examples/example.py b/doc/examples/example.py new file mode 100644 index 0000000..6c9e2f1 --- /dev/null +++ b/doc/examples/example.py @@ -0,0 +1,14 @@ +from typing import Iterator + +# This is an example +class Math: + @staticmethod + def fib(n: int) -> Iterator[int]: + """Fibonacci series up to n.""" + a, b = 0, 1 + while a < n: + yield a + a, b = b, a + b + +result = sum(Math.fib(42)) +print("The answer is {}".format(result)) diff --git a/doc/faq.rst b/doc/faq.rst new file mode 100644 index 0000000..4e078dc --- /dev/null +++ b/doc/faq.rst @@ -0,0 +1,142 @@ +:orphan: + +Pygments FAQ +============= + +What is Pygments? +----------------- + +Pygments is a syntax highlighting engine written in Python. That means, it will +take source code (or other markup) in a supported language and output a +processed version (in different formats) containing syntax highlighting markup. + +Its features include: + +* a wide range of common :doc:`languages and markup formats <languages>` is supported +* new languages and formats are added easily +* a number of output formats is available, including: + + - HTML + - ANSI sequences (console output) + - LaTeX + - RTF + +* it is usable as a command-line tool and as a library +* parsing and formatting is fast + +Pygments is licensed under the BSD license. + +Where does the name Pygments come from? +--------------------------------------- + +*Py* of course stands for Python, while *pigments* are used for coloring paint, +and in this case, source code! + +What are the system requirements? +--------------------------------- + +Pygments only needs a standard Python install, version 3.6 or higher. No +additional libraries are needed. + +How can I use Pygments? +----------------------- + +Pygments is usable as a command-line tool as well as a library. + +From the command-line, usage looks like this (assuming the pygmentize script is +properly installed):: + + pygmentize -f html /path/to/file.py + +This will print a HTML-highlighted version of /path/to/file.py to standard output. + +For a complete help, please run ``pygmentize -h``. + +Usage as a library is thoroughly demonstrated in the Documentation section. + +How do I make a new style? +-------------------------- + +Please see the :doc:`documentation on styles <docs/styles>`. + +How can I report a bug or suggest a feature? +-------------------------------------------- + +Please report bugs and feature wishes in the tracker at GitHub. + +You can also e-mail the authors, see the contact details. + +I want this support for this language! +-------------------------------------- + +Instead of waiting for others to include language support, why not write it +yourself? All you have to know is :doc:`outlined in the docs +<docs/lexerdevelopment>`. + +Can I use Pygments for programming language processing? +------------------------------------------------------- + +The Pygments lexing machinery is quite powerful can be used to build lexers for +basically all languages. However, parsing them is not possible, though some +lexers go some steps in this direction in order to e.g. highlight function names +differently. + +Also, error reporting is not the scope of Pygments. It focuses on correctly +highlighting syntactically valid documents, not finding and compensating errors. + +Who uses Pygments? +------------------ + +This is an (incomplete) list of projects and sites known to use the Pygments highlighter. + +* `Wikipedia <https://en.wikipedia.org/>`_ +* `BitBucket <https://bitbucket.org/>`_, a Mercurial and Git hosting site +* `The Sphinx documentation builder <https://sphinx-doc.org/>`_, for embedded source examples +* `rst2pdf <https://github.com/ralsina/rst2pdf>`_, a reStructuredText to PDF converter +* `Codecov <https://codecov.io/>`_, a code coverage CI service +* `Trac <https://trac.edgewall.org/>`_, the universal project management tool +* `AsciiDoc <https://www.methods.co.nz/asciidoc/>`_, a text-based documentation generator +* `ActiveState Code <https://code.activestate.com/>`_, the Python Cookbook successor +* `ViewVC <http://viewvc.org/>`_, a web-based version control repository browser +* `BzrFruit <https://repo.or.cz/w/bzrfruit.git>`_, a Bazaar branch viewer +* `QBzr <http://bazaar-vcs.org/QBzr>`_, a cross-platform Qt-based GUI front end for Bazaar +* `Review Board <https://www.reviewboard.org/>`_, a collaborative code reviewing tool +* `Diamanda <https://code.google.com/archive/p/diamanda/>`_, a Django powered wiki system with support for Pygments +* `Progopedia <http://progopedia.ru/>`_ (`English <http://progopedia.com/>`_), + an encyclopedia of programming languages +* `Bruce <https://sites.google.com/site/r1chardj0n3s/bruce>`_, a reStructuredText presentation tool +* `PIDA <http://pida.co.uk/>`_, a universal IDE written in Python +* `BPython <https://bpython-interpreter.org/>`_, a curses-based intelligent Python shell +* `PuDB <https://pypi.org/project/pudb/>`_, a console Python debugger +* `XWiki <https://www.xwiki.org/>`_, a wiki-based development framework in Java, using Jython +* `roux <http://ananelson.com/software/roux/>`_, a script for running R scripts + and creating beautiful output including graphs +* `hurl <http://hurl.it/>`_, a web service for making HTTP requests +* `wxHTMLPygmentizer <http://colinbarnette.net/projects/wxHTMLPygmentizer>`_ is + a GUI utility, used to make code-colorization easier +* `Postmarkup <https://code.google.com/archive/p/postmarkup/>`_, a BBCode to XHTML generator +* `WpPygments <http://blog.mirotin.net/?page_id=49>`_, and `WPygments + <https://github.com/capynet/WPygments>`_, highlighter plugins for WordPress +* `Siafoo <http://siafoo.net>`_, a tool for sharing and storing useful code and programming experience +* `D source <http://www.dsource.org/>`_, a community for the D programming language +* `dpaste.com <http://dpaste.com/>`_, another Django pastebin +* `Django snippets <https://djangosnippets.org/>`_, a pastebin for Django code +* `Fayaa <http://www.fayaa.com/code/>`_, a Chinese pastebin +* `Incollo.com <http://incollo.com>`_, a free collaborative debugging tool +* `PasteBox <https://p.boxnet.eu/>`_, a pastebin focused on privacy +* `hilite.me <http://www.hilite.me/>`_, a site to highlight code snippets +* `patx.me <http://patx.me/paste>`_, a pastebin +* `Fluidic <https://github.com/richsmith/fluidic>`_, an experiment in + integrating shells with a GUI +* `pygments.rb <https://github.com/pygments/pygments.rb>`_, a pygments wrapper for Ruby +* `Clygments <https://github.com/bfontaine/clygments>`_, a pygments wrapper for + Clojure +* `PHPygments <https://github.com/capynet/PHPygments>`_, a pygments wrapper for PHP +* `Spyder <https://www.spyder-ide.org/>`_, the Scientific Python Development + Environment, uses pygments for the multi-language syntax highlighting in its + `editor <https://docs.spyder-ide.org/editor.html>`_. +* `snippet.host <https://snippet.host>`_, minimal text and code snippet hosting +* `sourcehut <https://sourcehut.org>`_, the hacker's forge + +If you have a project or web site using Pygments, `open an issue or PR +<https://github.com/pygments/pygments>`_ and we'll add a line here. diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 0000000..dbd1596 --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,47 @@ +Welcome! +======== + +This is the home of Pygments. It is a generic syntax highlighter suitable for +use in code hosting, forums, wikis or other applications that need to prettify +source code. Highlights are: + +* a wide range of |language_count| languages and other text formats is supported +* special attention is paid to details that increase highlighting quality +* support for new languages and formats are added easily; most languages use a + simple regex-based lexing mechanism +* a number of output formats is available, among them HTML, RTF, LaTeX and ANSI + sequences +* it is usable as a command-line tool and as a library + +Read more in the :doc:`FAQ list <faq>` or the :doc:`documentation <docs/index>`, +or `download the latest release <https://pypi.python.org/pypi/Pygments>`_. + +.. _contribute: + +Contribute +---------- + +Like every open-source project, we are always looking for volunteers to help us +with programming. Python knowledge is required, but don't fear: Python is a very +clear and easy to learn language. + +Development takes place on `GitHub <https://github.com/pygments/pygments>`_. + +If you found a bug, just open a ticket in the GitHub tracker. Be sure to log +in to be notified when the issue is fixed -- development is not fast-paced as +the library is quite stable. You can also send an e-mail to the developers, see +below. + +The authors +----------- + +Pygments is maintained by **Georg Brandl**, e-mail address *georg*\ *@*\ *python.org*, **Matthäus Chajdas** and **Jean Abou-Samra**. + +Many lexers and fixes have been contributed by **Armin Ronacher**, the rest of +the `Pocoo <https://dev.pocoo.org/>`_ team and **Tim Hatch**. + +.. toctree:: + :maxdepth: 1 + :hidden: + + docs/index diff --git a/doc/languages.rst b/doc/languages.rst new file mode 100644 index 0000000..8136442 --- /dev/null +++ b/doc/languages.rst @@ -0,0 +1,18 @@ +:orphan: + +Languages +========= + +.. pygmentsdoc:: lexers_overview + +... that's all? +--------------- + +Well, why not write your own? Contributing to Pygments is easy and fun. Take a +look at the :doc:`docs on lexer development <docs/lexerdevelopment>`. Pull +requests are welcome on `GitHub <https://github.com/pygments/pygments>`_. + +.. note:: + + The languages listed here are supported in the development version. The + latest release may lack a few of them. diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..8803c98 --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,190 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^<target^>` where ^<target^> is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Pygments.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Pygments.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end diff --git a/doc/pygmentize.1 b/doc/pygmentize.1 new file mode 100644 index 0000000..5ac8fe6 --- /dev/null +++ b/doc/pygmentize.1 @@ -0,0 +1,112 @@ +.TH PYGMENTIZE 1 "January 20, 2021" + +.SH NAME +pygmentize \- highlights the input file + +.SH SYNOPSIS +.B \fBpygmentize\fP +.RI [-l\ \fI<lexer>\fP\ |\ -g]\ [-F\ \fI<filter>\fP[:\fI<options>\fP]]\ [-f\ \fI<formatter>\fP] +.RI [-O\ \fI<options>\fP]\ [-P\ \fI<option=value>\fP]\ [-o\ \fI<outfile>\fP]\ [\fI<infile>\fP] +.br +.B \fBpygmentize\fP +.RI -S\ \fI<style>\fP\ -f\ \fI<formatter>\fP\ [-a\ \fI<arg>\fP]\ [-O\ \fI<options>\fP]\ [-P\ \fI<option=value>\fP] +.br +.B \fBpygmentize\fP +.RI -L\ [\fI<which>\fP\ ...] +.br +.B \fBpygmentize\fP +.RI -N\ \fI<filename>\fP +.br +.B \fBpygmentize\fP +.RI -C +.br +.B \fBpygmentize\fP +.RI -H\ \fI<type>\fP\ \fI<name>\fP +.br +.B \fBpygmentize\fP +.RI -h\ |\ -V + +.SH DESCRIPTION +Pygments is a generic syntax highlighter for general use in all kinds +of software such as forum systems, wikis or other applications that need to +prettify source code. +.PP +Its highlights are: + * a wide range of common languages and markup formats is supported + * special attention is paid to details, increasing quality by a fair amount + * support for new languages and formats are added easily + * a number of output formats, presently HTML, LaTeX and ANSI sequences + * it is usable as a command-line tool and as a library + * ... and it highlights even Brainfuck! +.PP +\fBpygmentize\fP is a command that uses Pygments to highlight the input file and +write the result to \fI<outfile>\fP. If no \fI<infile>\fP is given, stdin is used. +.SH OPTIONS +A summary of options is included below. +.TP +.B \-l \fI<lexer>\fP +Set the lexer name. If not given, the lexer is guessed from the extension of the +input file name (this obviously doesn't work if the input is stdin). +.TP +.B \-g +Attempt to guess the lexer from the file contents, or pass through as plain text +if this fails (this option works for highlighting standard input). +.TP +.B \-F \fI<filter>\fP[:\fI<options>\fP] +Add a filter to the token stream. You can give options in the same way as for +-O after a colon (note: there must not be spaces around the colon). +This option can be given multiple times. +.TP +.B \-f \fI<formatter>\fP +Set the formatter name. If not given, it will be guessed from the extension of +the output file name. If no output file is given, the terminal formatter will be +used by default. +.TP +.B \-o \fI<outfile>\fP +Set output file. If not given, stdout is used. +.TP +.B \-O \fI<options>\fP +With this option, you can give the lexer and formatter a comma-separated list of +options, e.g. "-O bg=light,python=cool". Which options are valid for which +lexers and formatters can be found in the documentation. +This option can be given multiple times. +.TP +.B \-P \fI<option=value>\fP +This option adds lexer and formatter options like the -O option, but +you can only give one option per -P. That way, the option value may contain +commas and equals signs, which it can't with -O. +.TP +.B \-S \fI<style>\fP +Print out style definitions for style \fI<style>\fP and for formatter \fI<formatter>\fP. +The meaning of the argument given by +.B \-a \fI<arg>\fP +is formatter dependent and can be found in the documentation. +.TP +.B \-L [\fI<which>\fP ...] +List lexers, formatters, styles or filters. Set \fI<which>\fP to the thing you want +to list (e.g. "styles"), or omit it to list everything. +.TP +.B \-N \fI<filename>\fP +Guess and print out a lexer name based solely on the given filename. Does not +take input or highlight anything. If no specific lexer can be found, "text" +is printed. +.TP +.B \-C +Like \fI-N\fP, but guess a lexer based on content read from standard input. +.TP +.B \-H \fI<type>\fP \fI<name>\fP +Print detailed help for the object \fI<name>\fP of type \fI<type>\fP, where \fI<type>\fP is one +of "lexer", "formatter" or "filter". +.TP +.B \-h +Show help screen. +.TP +.B \-V +Show version of the Pygments package. +.SH SEE ALSO +/usr/share/doc/python-pygments/index.html +.SH AUTHOR +pygmentize was written by Georg Brandl <g.brandl@gmx.net>. +.PP +This manual page was written by Piotr Ozarowski <ozarow@gmail.com>, +for the Debian project (but may be used by others). diff --git a/doc/pyodide/Dockerfile b/doc/pyodide/Dockerfile new file mode 100644 index 0000000..969651c --- /dev/null +++ b/doc/pyodide/Dockerfile @@ -0,0 +1,20 @@ +# Dockerfile for building Pyodide with a Pygmenets version from the current checkout. +# For an example of how to use this image, see the `pyodide` target in the documentation's Makefile. +FROM ghcr.io/pyodide/pyodide:0.20.0 AS build-stage + +WORKDIR pyodide + +# Copy new meta with path to local Pygments instead of pypi url. +COPY doc/pyodide/meta.yaml packages/Pygments/ + +COPY . /pygments + +# Add Pygments to the Pyodide build. +ENV PYODIDE_PACKAGES=Pygments + +# Build Pyodide. +RUN make + +FROM scratch AS export-stage + +COPY --from=build-stage /src/pyodide/build / diff --git a/doc/pyodide/meta.yaml b/doc/pyodide/meta.yaml new file mode 100644 index 0000000..d58e1d5 --- /dev/null +++ b/doc/pyodide/meta.yaml @@ -0,0 +1,8 @@ +package: + name: Pygments + version: '2.99' +source: + path: /pygments +test: + imports: + - pygments diff --git a/doc/styles.rst b/doc/styles.rst new file mode 100644 index 0000000..a1bb019 --- /dev/null +++ b/doc/styles.rst @@ -0,0 +1,5 @@ +:orphan: + +This file is overridden by _templates/styles.html and just exists to allow the +Styles gallery to be reliably linked from the documentation +(since its location varies between `make html` and `make dirhtml`). diff --git a/external/autopygmentize b/external/autopygmentize new file mode 100755 index 0000000..85d2366 --- /dev/null +++ b/external/autopygmentize @@ -0,0 +1,145 @@ +#!/bin/bash +# Best effort auto-pygmentization with transparent decompression +# by Reuben Thomas 2008-2022 +# This program is in the public domain. + +# Strategy: first see if pygmentize can find a lexer; if not, ask file; if that finds nothing, fail +# Set the environment variable PYGMENTIZE_OPTS or pass options before the file path to configure pygments. + +# This program can be used as a .lessfilter for the less pager to auto-color less's output + +file="${!#}" # last argument +options=${@:1:$(($#-1))} # handle others args as options to pass to pygmentize + +file_common_opts="--brief --dereference" + +case $(file --mime-type --uncompress $file_common_opts "$file") in + application/xml|image/svg+xml) lexer=xml;; + application/javascript) lexer=javascript;; + application/json) lexer=json;; + text/html) lexer=html;; + text/troff) lexer=nroff;; + text/x-asm) lexer=nasm;; + text/x-awk) lexer=awk;; + text/x-c) lexer=c;; + text/x-c++) lexer=cpp;; + text/x-clojure) lexer=clojure;; + text/x-crystal) lexer=crystal;; + text/x-diff) lexer=diff;; + text/x-execline) lexer=execline;; + text/x-forth) lexer=forth;; + text/x-fortran) lexer=fortran;; + text/x-gawk) lexer=gawk;; + text/x-java) lexer=java;; + text/x-lisp) lexer=common-lisp;; + text/x-lua|text/x-luatex) lexer=lua;; + text/x-makefile) lexer=make;; + text/x-msdos-batch) lexer=bat;; + text/x-nawk) lexer=nawk;; + text/x-objective-c) lexer=objective-c;; + text/x-pascal) lexer=pascal;; + text/x-perl) lexer=perl;; + text/x-php) lexer=php;; + text/x-po) lexer=po;; + text/x-python) lexer=python;; + text/x-ruby) lexer=ruby;; + text/x-script.python) lexer=python;; + text/x-shellscript) lexer=sh;; + text/x-tcl) lexer=tcl;; + text/x-tex|text/x-texinfo) lexer=latex;; # FIXME: texinfo really needs its own lexer + text/xml) lexer=xml;; + text/vnd.graphviz) lexer=graphviz;; + + # Types that file outputs which pygmentize didn't support as of file 5.41, pygments 2.11.2 + # text/binary + # text/calendar + # text/PGP + # text/prs.lines.tag + # text/rtf + # text/spreadsheet + # text/texmacs + # text/vcard + # text/vnd.sosi + # text/x-Algol68 + # text/x-bcpl + # text/x-dmtf-mif + # text/x-gimp-curve + # text/x-gimp-ggr + # text/x-gimp-gpl + # text/x-info + # text/x-installshield-lid + # text/x-m4 + # text/x-modulefile + # text/x-ms-adm + # text/x-ms-cpx + # text/x-ms-regedirt + # text/x-ms-tag + # text/x-systemtap + # text/x-vcard + # text/x-wine-extension-reg + # text/x-xmcd + + text/plain) # special filenames. TODO: insert more + case $(basename "$file") in + .zshrc) lexer=sh;; + esac + # pygmentize -N is much cheaper than file, but makes some bad guesses (e.g. + # it guesses ".pl" is Prolog, not Perl) + lexer=$(pygmentize -N "$file") + ;; +esac + +# Find a concatenator for compressed files +concat= +concat_opts= +case $(file $file_common_opts --mime-type "$file") in + # TODO: add support + # application/x-rzip (does not decompress to stdout) + # application/x-dzip (Windows only) + application/gzip|application/x-gzip) concat=zcat;; + application/x-bzip) concat=bzip; concat_opts=-dc;; + application/x-bzip2) concat=bzcat;; + application/x-lz4) concat=lz4; concat_opts=-dc;; + application/x-lzh-compressed) concat=p7zip; concat_opts=-dc;; + application/x-lzma) concat=lzcat;; + application/x-lzip) concat=lzip; concat_opts=-dc;; + application/x-xz) concat=xzcat;; + application/x-zoo) concat=zoo; concat_opts=fu;; +esac +# If concat is unset or doesn't exist, use cat instead +if [[ "$concat" == "" ]] || ! command -v "$concat"; then + concat=cat + concat_opts= +fi + +# Find a suitable reader, preceded by a hex dump for binary files, +# or fmt for text with very long lines +prereader="" +reader=cat +encoding=$(file --mime-encoding --uncompress $file_common_opts "$file") +# FIXME: need a way to switch between hex and text view, as file often +# misdiagnoses files when they contain a few control characters +# if [[ $encoding == "binary" ]]; then +# prereader="od -x" # POSIX fallback +# if [[ -n $(which hd) ]]; then +# prereader=hd # preferred +# fi +# lexer=hexdump +# encoding=latin1 +#el +# FIXME: Using fmt does not work well for system logs +# if [[ "$lexer" == "text" ]]; then +# if file "$file" | grep -ql "text, with very long lines"; then +# reader=fmt +# fi +# fi +if [[ "$lexer" != "text" ]]; then + reader="pygmentize -O inencoding=$encoding $PYGMENTIZE_OPTS $options -l $lexer" +fi + +# Run the reader +if [[ -n "$prereader" ]]; then + exec $concat "$file" | $prereader | $reader +else + exec $concat "$file" | $reader +fi diff --git a/external/lasso-builtins-generator-9.lasso b/external/lasso-builtins-generator-9.lasso new file mode 100755 index 0000000..0156299 --- /dev/null +++ b/external/lasso-builtins-generator-9.lasso @@ -0,0 +1,162 @@ +#!/usr/bin/lasso9 + +/* + Builtins Generator for Lasso 9 + + This is the shell script that was used to extract Lasso 9's built-in keywords + and generate most of the _lasso_builtins.py file. When run, it creates a file + containing the types, traits, methods, and members of the currently-installed + version of Lasso 9. + + A list of tags in Lasso 8 can be generated with this code: + + <?LassoScript + local('l8tags' = list, + 'l8libs' = array('Cache','ChartFX','Client','Database','File','HTTP', + 'iCal','Lasso','Link','List','PDF','Response','Stock','String', + 'Thread','Valid','WAP','XML')); + iterate(#l8libs, local('library')); + local('result' = namespace_load(#library)); + /iterate; + iterate(tags_list, local('i')); + #l8tags->insert(string_removeleading(#i, -pattern='_global_')); + /iterate; + #l8tags->sort; + iterate(#l8tags, local('i')); + string_lowercase(#i)+"<br>"; + /iterate; + +*/ + +output("This output statement is required for a complete list of methods.") +local(f) = file("_lasso_builtins-9.py") +#f->doWithClose => { + +#f->openTruncate +#f->writeString('# -*- coding: utf-8 -*- +""" + pygments.lexers._lasso_builtins + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Built-in Lasso types, traits, methods, and members. + + :copyright: Copyright 2006-'+date->year+' by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +') + +// Load and register contents of $LASSO9_MASTER_HOME/LassoModules/ +database_initialize + +// Load all of the libraries from builtins and lassoserver +// This forces all possible available types and methods to be registered +local(srcs = + (: + dir(sys_masterHomePath + '/LassoLibraries/builtins/')->eachFilePath, + dir(sys_masterHomePath + '/LassoLibraries/lassoserver/')->eachFilePath + ) +) + +with topLevelDir in delve(#srcs) +where not #topLevelDir->lastComponent->beginsWith('.') +do protect => { + handle_error => { + stdoutnl('Unable to load: ' + #topLevelDir + ' ' + error_msg) + } + library_thread_loader->loadLibrary(#topLevelDir) + stdoutnl('Loaded: ' + #topLevelDir) +} + +email_initialize +log_initialize +session_initialize + +local( + typesList = set(), + traitsList = set(), + unboundMethodsList = set(), + memberMethodsList = set() +) + +// types +with type in sys_listTypes +where not #type->asString->endsWith('$') // skip threads +do { + #typesList->insert(#type) +} + +// traits +with trait in sys_listTraits +where not #trait->asString->beginsWith('$') // skip combined traits +do { + #traitsList->insert(#trait) +} + +// member methods +with type in #typesList +do { + with method in #type->getType->listMethods + where #method->typeName == #type // skip inherited methods + let name = #method->methodName + where not #name->asString->endsWith('=') // skip setter methods + where #name->asString->isAlpha(1) // skip unpublished methods + do { + #memberMethodsList->insert(#name) + } +} +with trait in #traitsList +do { + with method in #trait->getType->provides + where #method->typeName == #trait // skip inherited methods + let name = #method->methodName + where not #name->asString->endsWith('=') // skip setter methods + where #name->asString->isAlpha(1) // skip unpublished methods + do { + #memberMethodsList->insert(#name) + } +} + +// unbound methods +with method in sys_listUnboundMethods +let name = #method->methodName +where not #name->asString->endsWith('=') // skip setter methods +where #name->asString->isAlpha(1) // skip unpublished methods +where #typesList !>> #name +where #traitsList !>> #name +do { + #unboundMethodsList->insert(#name) +} + +// write to file +with i in (: + pair(#typesList, "BUILTINS = { + 'Types': ( +"), + pair(#traitsList, " ), + 'Traits': ( +"), + pair(#unboundMethodsList, " ), + 'Unbound Methods': ( +"), + pair(#memberMethodsList, " ) +} +MEMBERS = { + 'Member Methods': ( +") +) +do { + #f->writeString(#i->second) + with t in (#i->first) + let ts = #t->asString + order by #ts + do { + #f->writeString(" '"+#ts->lowercase&asString+"',\n") + } +} + +#f->writeString(" ) +} +") + +} diff --git a/external/lilypond-builtins-generator.ly b/external/lilypond-builtins-generator.ly new file mode 100644 index 0000000..983b4c3 --- /dev/null +++ b/external/lilypond-builtins-generator.ly @@ -0,0 +1,391 @@ +%% Autogenerate a list of LilyPond keywords + +\version "2.23.6" + +#(use-modules (ice-9 receive) + (ice-9 regex)) + +#(define port (open-output-file "../pygments/lexers/_lilypond_builtins.py")) + +#(define output-preamble + "\"\"\" + pygments.lexers._lilypond_builtins + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + LilyPond builtins. + + :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +\"\"\" + +# Contents generated by the script lilypond-builtins-generator.ly +# found in the external/ directory of the source tree. + +") + +#(format port "~a" output-preamble) + +#(define (dump-py-list name vals) + (let* ((string-vals + (map symbol->string vals)) + (fixed-vals + (filter-map + (lambda (str) + ; To avoid conflicts with Scheme builtins, + ; a leading backslash is prepended to \<, + ; \= and a few others. The lexer finds it + ; itself, so remove it here. + (cond + ((equal? str "\\\\") + #f) + ((string-startswith str "\\") + (string-drop str 1)) + (else + str))) + string-vals)) + (sorted-vals ; reproducibility + ; Avoid duplicates (e.g., identical pitches + ; in different languages) + (uniq-list + (sort fixed-vals string<?))) + (formatted-vals + (map + (lambda (val) + (format #f " \"~a\"," val name)) + sorted-vals)) + (joint-vals + (string-join formatted-vals "\n"))) + (format port + "~a = [ +~a +] + +" + name + joint-vals))) + + +%% KEYWORDS + +#(define keywords + '( + ; Lexical modes. + notemode + lyricmode + lyricsto + addlyrics + chordmode + chords + figuremode + figures + drummode + drums + ; Output definitions. + header + layout + midi + paper + ; Context definitions. + ;; \context is also used in music. We take it as + ;; a keyword in both cases. + context + with + name + type + accepts + denies + alias + defaultchild + consists + remove + description + ;; Not strictly a keyword, but can be viewed so. + inherit-acceptability + ; Blocks. + book + bookpart + score + ; Other. + new + etc + include + language + version)) + +#(dump-py-list 'keywords keywords) + +%% CLEFS + +#(define all-clefs + (map string->symbol (map car supported-clefs))) + +#(dump-py-list 'clefs all-clefs) + +%% SCALES + +#(define all-scales + '(major + minor + ionian + locrian + aeolian + mixolydian + lydian + phrygian + dorian)) + +#(dump-py-list 'scales all-scales) + +%% REPEAT TYPES + +#(define all-repeat-types + '(volta percent unfold segno)) + +#(dump-py-list 'repeat_types all-repeat-types) + +%% UNITS + +#(define all-units + '(mm cm in pt staff-space)) + +#(dump-py-list 'units all-units) + +%% CHORD MODIFIERS + +#(define all-chord-modifiers + '(m dim aug maj)) + +#(dump-py-list 'chord_modifiers all-chord-modifiers) + +%% PITCHES + +#(define all-pitch-language-names + (map car language-pitch-names)) + +#(dump-py-list 'pitch_language_names all-pitch-language-names) + +#(define all-pitch-names + (append + ; We highlight rests just like pitches. + '(r R) + (map car (append-map cdr language-pitch-names)) + ; Drum note names. + (map car drumPitchNames))) + +#(dump-py-list 'pitches all-pitch-names) + +%% MUSIC FUNCTIONS AND SHORTCUTS + +% View these as music functions. +#(define extra-music-functions + '(set + unset + override + revert + tweak + once + undo + temporary + repeat + alternative + tempo + change)) + +#(let* ((module (current-module)) + (module-alist (ly:module->alist module)) + (all-music-functions + (filter + (lambda (entry) + (ly:music-function? (cdr entry))) + module-alist)) + (all-predefined-music-objects + (filter + (lambda (entry) + (ly:music? (cdr entry))) + module-alist))) + (receive (articulations non-articulations) + (partition + (lambda (entry) + (ly:event? (cdr entry))) + all-predefined-music-objects) + (receive (dynamics non-dynamic-articulations) + (partition + (lambda (entry) + (any + (lambda (type) + (music-is-of-type? (cdr entry) + type)) + '(dynamic-event crescendo-event decrescendo-event))) + articulations) + (dump-py-list 'music_functions + (append extra-music-functions + (map car all-music-functions))) + (dump-py-list 'dynamics (map car dynamics)) + (dump-py-list 'articulations (map car non-dynamic-articulations)) + (dump-py-list 'music_commands (map car non-articulations))))) + +%% MARKUP COMMANDS + +#(let* ((markup-name-regexp + (make-regexp "(.*)-markup(-list)?")) + (modules + (cons (current-module) + (map resolve-module '((lily) (lily accreg))))) + (alist + (apply append + (map ly:module->alist modules))) + (markup-commands + (filter + (lambda (entry) + (or (markup-function? (cdr entry)) + (markup-list-function? (cdr entry)))) + alist)) + (markup-command-names + (map + (lambda (entry) + (let* ((string-name (symbol->string (car entry))) + (match (regexp-exec markup-name-regexp string-name))) + (string->symbol (match:substring match 1)))) + markup-commands)) + (markup-words + (append '(markup markuplist) + markup-command-names))) + (dump-py-list 'markup_commands markup-words)) + +%% GROBS + +#(let ((grob-names (map car all-grob-descriptions))) + (dump-py-list 'grobs grob-names)) + +%% CONTEXTS + +#(let* ((layout-module + (ly:output-def-scope $defaultlayout)) + (layout-alist + (ly:module->alist layout-module)) + (all-context-defs + (filter + (lambda (entry) + (ly:context-def? (cdr entry))) + layout-alist)) + (context-def-names + (map car all-context-defs))) + (dump-py-list 'contexts context-def-names)) + +%% TRANSLATORS + +#(let* ((all-translators + (ly:get-all-translators)) + (translator-names + (map ly:translator-name all-translators))) + (dump-py-list 'translators translator-names)) + +%% SCHEME FUNCTIONS + +#(let* ((module (resolve-module '(lily))) + (module-alist (ly:module->alist module)) + (all-functions + (filter + (lambda (entry) + (or (procedure? (cdr entry)) + (macro? (cdr entry)))) + module-alist)) + (all-function-names + (map car all-functions))) + (dump-py-list 'scheme_functions all-function-names)) + +%% PROPERTIES + +#(dump-py-list 'context_properties all-translation-properties) +#(dump-py-list 'grob_properties all-backend-properties) + +%% PAPER VARIABLES + +% Reference: https://lilypond.org/doc/v2.22/Documentation/notation/page-layout +#(define all-paper-variables + '(paper-height + top-margin + bottom-margin + ragged-bottom + ragged-last-bottom + markup-system-spacing + score-markup-spacing + score-system-spacing + system-system-spacing + markup-markup-spacing + last-bottom-spacing + top-system-spacing + top-markup-spacing + paper-width + line-width + left-margin + right-margin + check-consistency + ragged-right + ragged-last + two-sided + inner-margin + outer-margin + binding-offset + horizontal-shift + indent + short-indent + max-systems-per-page + min-systems-per-page + systems-per-page + system-count + page-breaking + page-breaking-system-system-spacing + page-count + blank-page-penalty + blank-last-page-penalty + auto-first-page-number + first-page-number + print-first-page-number + page-number-type + page-spacing-weight + print-all-headers + system-separator-markup + footnote-separator-markup + ;; Let's view these four as \paper variables. + basic-distance + minimum-distance + padding + stretchability + ;; These were forgotten in the documentation. + evenHeaderMarkup + oddHeaderMarkup + evenFooterMarkup + oddFooterMarkup + bookTitleMarkup + scoreTitleMarkup + )) + +#(dump-py-list 'paper_variables all-paper-variables) + +%% HEADER VARIABLES + +% Reference: https://lilypond.org/doc/v2.22/Documentation/notation/creating-titles-headers-and-footers.html#default-layout-of-bookpart-and-score-titles +#(define all-header-variables + '(dedication + title + subtitle + subsubtitle + instrument + poet + composer + meter + arranger + tagline + copyright + piece + opus + ; The following are used in LSR snippets and regression tests. + lsrtags + doctitle + texidoc)) + +#(dump-py-list 'header_variables all-header-variables) + + +#(close-port port) diff --git a/external/markdown-processor.py b/external/markdown-processor.py new file mode 100644 index 0000000..d72012f --- /dev/null +++ b/external/markdown-processor.py @@ -0,0 +1,66 @@ +""" + The Pygments Markdown Preprocessor + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + This fragment is a Markdown_ preprocessor that renders source code + to HTML via Pygments. To use it, invoke Markdown like so:: + + import markdown + + html = markdown.markdown(someText, extensions=[CodeBlockExtension()]) + + This uses CSS classes by default, so use + ``pygmentize -S <some style> -f html > pygments.css`` + to create a stylesheet to be added to the website. + + You can then highlight source code in your markdown markup:: + + [sourcecode:lexer] + some code + [/sourcecode] + + .. _Markdown: https://pypi.python.org/pypi/Markdown + + :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +# Options +# ~~~~~~~ + +# Set to True if you want inline CSS styles instead of classes +INLINESTYLES = False + + +import re + +from markdown.preprocessors import Preprocessor +from markdown.extensions import Extension + +from pygments import highlight +from pygments.formatters import HtmlFormatter +from pygments.lexers import get_lexer_by_name, TextLexer + + +class CodeBlockPreprocessor(Preprocessor): + + pattern = re.compile(r'\[sourcecode:(.+?)\](.+?)\[/sourcecode\]', re.S) + + formatter = HtmlFormatter(noclasses=INLINESTYLES) + + def run(self, lines): + def repl(m): + try: + lexer = get_lexer_by_name(m.group(1)) + except ValueError: + lexer = TextLexer() + code = highlight(m.group(2), lexer, self.formatter) + code = code.replace('\n\n', '\n \n').replace('\n', '<br />') + return '\n\n<div class="code">%s</div>\n\n' % code + joined_lines = "\n".join(lines) + joined_lines = self.pattern.sub(repl, joined_lines) + return joined_lines.split("\n") + +class CodeBlockExtension(Extension): + def extendMarkdown(self, md, md_globals): + md.preprocessors.add('CodeBlockPreprocessor', CodeBlockPreprocessor(), '_begin') diff --git a/external/moin-parser.py b/external/moin-parser.py new file mode 100644 index 0000000..562b76f --- /dev/null +++ b/external/moin-parser.py @@ -0,0 +1,111 @@ +""" + The Pygments MoinMoin Parser + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + This is a MoinMoin parser plugin that renders source code to HTML via + Pygments; you need Pygments 0.7 or newer for this parser to work. + + To use it, set the options below to match your setup and put this file in + the data/plugin/parser subdirectory of your Moin instance, and give it the + name that the parser directive should have. For example, if you name the + file ``code.py``, you can get a highlighted Python code sample with this + Wiki markup:: + + {{{ + #!code python + [...] + }}} + + Additionally, if you set ATTACHMENTS below to True, Pygments will also be + called for all attachments for whose filenames there is no other parser + registered. + + You are responsible for including CSS rules that will map the Pygments CSS + classes to colors. You can output a stylesheet file with `pygmentize`, put + it into the `htdocs` directory of your Moin instance and then include it in + the `stylesheets` configuration option in the Moin config, e.g.:: + + stylesheets = [('screen', '/htdocs/pygments.css')] + + If you do not want to do that and are willing to accept larger HTML + output, you can set the INLINESTYLES option below to True. + + :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +# Options +# ~~~~~~~ + +# Set to True if you want to highlight attachments, in addition to +# {{{ }}} blocks. +ATTACHMENTS = True + +# Set to True if you want inline CSS styles instead of classes +INLINESTYLES = False + + +import sys + +from pygments import highlight +from pygments.lexers import get_lexer_by_name, get_lexer_for_filename, TextLexer +from pygments.formatters import HtmlFormatter +from pygments.util import ClassNotFound + + +# wrap lines in <span>s so that the Moin-generated line numbers work +class MoinHtmlFormatter(HtmlFormatter): + def wrap(self, source, outfile): + for line in source: + yield 1, '<span class="line">' + line[1] + '</span>' + +htmlformatter = MoinHtmlFormatter(noclasses=INLINESTYLES) +textlexer = TextLexer() +codeid = [0] + + +class Parser: + """ + MoinMoin Pygments parser. + """ + if ATTACHMENTS: + extensions = '*' + else: + extensions = [] + + Dependencies = [] + + def __init__(self, raw, request, **kw): + self.raw = raw + self.req = request + if "format_args" in kw: + # called from a {{{ }}} block + try: + self.lexer = get_lexer_by_name(kw['format_args'].strip()) + except ClassNotFound: + self.lexer = textlexer + return + if "filename" in kw: + # called for an attachment + filename = kw['filename'] + else: + # called for an attachment by an older moin + # HACK: find out the filename by peeking into the execution + # frame which might not always work + try: + frame = sys._getframe(1) + filename = frame.f_locals['filename'] + except: + filename = 'x.txt' + try: + self.lexer = get_lexer_for_filename(filename) + except ClassNotFound: + self.lexer = textlexer + + def format(self, formatter): + codeid[0] += 1 + id = "pygments_%s" % codeid[0] + w = self.req.write + w(formatter.code_area(1, id, start=1, step=1)) + w(formatter.rawHTML(highlight(self.raw, self.lexer, htmlformatter))) + w(formatter.code_area(0, id)) diff --git a/external/pygments.bashcomp b/external/pygments.bashcomp new file mode 100644 index 0000000..1299fdb --- /dev/null +++ b/external/pygments.bashcomp @@ -0,0 +1,38 @@ +#!bash +# +# Bash completion support for Pygments (the 'pygmentize' command). +# + +_pygmentize() +{ + local cur prev + + COMPREPLY=() + cur=`_get_cword` + prev=${COMP_WORDS[COMP_CWORD-1]} + + case "$prev" in + -f) + FORMATTERS=`pygmentize -L formatters | grep '* ' | cut -c3- | sed -e 's/,//g' -e 's/:$//'` + COMPREPLY=( $( compgen -W '$FORMATTERS' -- "$cur" ) ) + return 0 + ;; + -l) + LEXERS=`pygmentize -L lexers | grep '* ' | cut -c3- | sed -e 's/,//g' -e 's/:$//'` + COMPREPLY=( $( compgen -W '$LEXERS' -- "$cur" ) ) + return 0 + ;; + -S) + STYLES=`pygmentize -L styles | grep '* ' | cut -c3- | sed s/:$//` + COMPREPLY=( $( compgen -W '$STYLES' -- "$cur" ) ) + return 0 + ;; + esac + + if [[ "$cur" == -* ]]; then + COMPREPLY=( $( compgen -W '-f -l -S -L -g -O -P -F \ + -N -H -h -V -o' -- "$cur" ) ) + return 0 + fi +} +complete -F _pygmentize -o default pygmentize diff --git a/external/rst-directive.py b/external/rst-directive.py new file mode 100644 index 0000000..5872185 --- /dev/null +++ b/external/rst-directive.py @@ -0,0 +1,81 @@ +""" + The Pygments reStructuredText directive + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + This fragment is a Docutils_ 0.5 directive that renders source code + (to HTML only, currently) via Pygments. + + To use it, adjust the options below and copy the code into a module + that you import on initialization. The code then automatically + registers a ``sourcecode`` directive that you can use instead of + normal code blocks like this:: + + .. sourcecode:: python + + My code goes here. + + If you want to have different code styles, e.g. one with line numbers + and one without, add formatters with their names in the VARIANTS dict + below. You can invoke them instead of the DEFAULT one by using a + directive option:: + + .. sourcecode:: python + :linenos: + + My code goes here. + + Look at the `directive documentation`_ to get all the gory details. + + .. _Docutils: https://docutils.sourceforge.io/ + .. _directive documentation: + https://docutils.sourceforge.io/docs/howto/rst-directives.html + + :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +# Options +# ~~~~~~~ + +# Set to True if you want inline CSS styles instead of classes +INLINESTYLES = False + +from pygments.formatters import HtmlFormatter + +# The default formatter +DEFAULT = HtmlFormatter(noclasses=INLINESTYLES) + +# Add name -> formatter pairs for every variant you want to use +VARIANTS = { + # 'linenos': HtmlFormatter(noclasses=INLINESTYLES, linenos=True), +} + + +from docutils import nodes +from docutils.parsers.rst import directives, Directive + +from pygments import highlight +from pygments.lexers import get_lexer_by_name, TextLexer + +class Pygments(Directive): + """ Source code syntax highlighting. + """ + required_arguments = 1 + optional_arguments = 0 + final_argument_whitespace = True + option_spec = {key: directives.flag for key in VARIANTS} + has_content = True + + def run(self): + self.assert_has_content() + try: + lexer = get_lexer_by_name(self.arguments[0]) + except ValueError: + # no lexer found - use the text one instead of an exception + lexer = TextLexer() + # take an arbitrary option if more than one is given + formatter = self.options and VARIANTS[list(self.options)[0]] or DEFAULT + parsed = highlight('\n'.join(self.content), lexer, formatter) + return [nodes.raw('', parsed, format='html')] + +directives.register_directive('sourcecode', Pygments) diff --git a/external/scheme-builtins-generator.scm b/external/scheme-builtins-generator.scm new file mode 100644 index 0000000..5c260b8 --- /dev/null +++ b/external/scheme-builtins-generator.scm @@ -0,0 +1,116 @@ +;; Autogenerate a list of Scheme keywords (i.e., macros) and built-in +;; functions. This is written for the Guile implementation. The +;; principle of autogenerating this has the advantage of catching many +;; builtins that would be tedious to maintain by hand, and the +;; disadvantage that some builtins very specific to Guile and not +;; relevant to other implementations are caught as well. However, +;; since Scheme builtin function names tend to be rather specific, +;; this should not be a significant problem. + +(define port (open-output-file "../pygments/lexers/_scheme_builtins.py")) + +(display + "\"\"\" + pygments.lexers._scheme_builtins + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Scheme builtins. + + :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +\"\"\" +" + port) + +(format port +"\n# Autogenerated by external/scheme-builtins-generator.scm\n\ +# using Guile ~a.\n\n" + (version)) + +(use-modules (srfi srfi-1) + (ice-9 match)) + +(define relevant-modules + ;; This is a nightmare. Scheme builtins are split in + ;; gazillions of standards, SRFIs and implementation + ;; extensions. With so many sources, it's hard to define + ;; what is really a Scheme builtin. This is a rather + ;; conservative list of Guile modules that might be used + ;; the most frequently (somewhat subjective, admittedly). + '( + ;; The real builtins. + (guile) + ;; Let's include the fundamental list library. + (srfi srfi-1) + ;; define-record-type + (srfi srfi-9) + ;; let-values, let*-values + (srfi srfi-11) + ;; case-lambda + (srfi srfi-16) + ;; Pattern matching + (ice-9 match) + ;; Included for compatibility with files written for R5RS + (rnrs r5rs))) + +(define (get-all-bindings module) + ;; Need to recurse to find all public bindings. module-map + ;; only considers the module's own bindings. + (let* ((own (module-map cons module)) + (uses (module-uses module))) + (append own (append-map get-all-bindings uses)))) + +(define all-bindings + (append-map + ;; Need to use module-public-interface to restrict to + ;; public bindings. Note that module-uses already + ;; returns public interfaces. + (lambda (mod-path) + (let* ((mod-object (resolve-module mod-path)) + (iface (module-public-interface mod-object))) + (get-all-bindings iface))) + relevant-modules)) + +(define (filter-for pred) + (filter-map + (match-lambda + ((key . variable) + (and (variable-bound? variable) + (let ((value (variable-ref variable))) + (and (pred value) + key))))) + all-bindings)) + +(define (sort-and-uniq lst pred) + (let loop ((lst (sort lst pred)) + (acc '())) + (match lst + (() (reverse! acc)) + ((one . rest) + (loop (drop-while (lambda (elt) + (equal? elt one)) + rest) + (cons one acc)))))) + +(define (dump-py-list lst) + (string-join + (map + (lambda (name) + (format #f " \"~a\"," name)) + (sort-and-uniq + (map symbol->string lst) + string<?)) + "\n")) + +(define (dump-builtins name pred extra) + (format port + "~a = {\n~a\n}\n\n" + name + (dump-py-list (append extra (filter-for pred))))) + +(define extra-procedures + ;; These are found in RnRS but not implemented by Guile. + '(load transcript-off transcript-on)) + +(dump-builtins 'scheme_keywords macro? '()) +(dump-builtins 'scheme_builtins procedure? extra-procedures) diff --git a/pygments/__init__.py b/pygments/__init__.py new file mode 100644 index 0000000..9cb60d1 --- /dev/null +++ b/pygments/__init__.py @@ -0,0 +1,82 @@ +""" + Pygments + ~~~~~~~~ + + Pygments is a syntax highlighting package written in Python. + + It is a generic syntax highlighter for general use in all kinds of software + such as forum systems, wikis or other applications that need to prettify + source code. Highlights are: + + * a wide range of common languages and markup formats is supported + * special attention is paid to details, increasing quality by a fair amount + * support for new languages and formats are added easily + * a number of output formats, presently HTML, LaTeX, RTF, SVG, all image + formats that PIL supports, and ANSI sequences + * it is usable as a command-line tool and as a library + * ... and it highlights even Brainfuck! + + The `Pygments master branch`_ is installable with ``easy_install Pygments==dev``. + + .. _Pygments master branch: + https://github.com/pygments/pygments/archive/master.zip#egg=Pygments-dev + + :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" +from io import StringIO, BytesIO + +__version__ = '2.14.0' +__docformat__ = 'restructuredtext' + +__all__ = ['lex', 'format', 'highlight'] + + +def lex(code, lexer): + """ + Lex ``code`` with ``lexer`` and return an iterable of tokens. + """ + try: + return lexer.get_tokens(code) + except TypeError: + # Heuristic to catch a common mistake. + from pygments.lexer import RegexLexer + if isinstance(lexer, type) and issubclass(lexer, RegexLexer): + raise TypeError('lex() argument must be a lexer instance, ' + 'not a class') + raise + + +def format(tokens, formatter, outfile=None): # pylint: disable=redefined-builtin + """ + Format a tokenlist ``tokens`` with the formatter ``formatter``. + + If ``outfile`` is given and a valid file object (an object + with a ``write`` method), the result will be written to it, otherwise + it is returned as a string. + """ + try: + if not outfile: + realoutfile = getattr(formatter, 'encoding', None) and BytesIO() or StringIO() + formatter.format(tokens, realoutfile) + return realoutfile.getvalue() + else: + formatter.format(tokens, outfile) + except TypeError: + # Heuristic to catch a common mistake. + from pygments.formatter import Formatter + if isinstance(formatter, type) and issubclass(formatter, Formatter): + raise TypeError('format() argument must be a formatter instance, ' + 'not a class') + raise + + +def highlight(code, lexer, formatter, outfile=None): + """ + Lex ``code`` with ``lexer`` and format it with the formatter ``formatter``. + + If ``outfile`` is given and a valid file object (an object + with a ``write`` method), the result will be written to it, otherwise + it is returned as a string. + """ + return format(lex(code, lexer), formatter, outfile) diff --git a/pygments/__main__.py b/pygments/__main__.py new file mode 100644 index 0000000..423b46e --- /dev/null +++ b/pygments/__main__.py @@ -0,0 +1,17 @@ +""" + pygments.__main__ + ~~~~~~~~~~~~~~~~~ + + Main entry point for ``python -m pygments``. + + :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import sys +import pygments.cmdline + +try: + sys.exit(pygments.cmdline.main(sys.argv)) +except KeyboardInterrupt: + sys.exit(1) diff --git a/pygments/cmdline.py b/pygments/cmdline.py new file mode 100644 index 0000000..1fdf335 --- /dev/null +++ b/pygments/cmdline.py @@ -0,0 +1,668 @@ +""" + pygments.cmdline + ~~~~~~~~~~~~~~~~ + + Command line interface. + + :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import os +import sys +import shutil +import argparse +from textwrap import dedent + +from pygments import __version__, highlight +from pygments.util import ClassNotFound, OptionError, docstring_headline, \ + guess_decode, guess_decode_from_terminal, terminal_encoding, \ + UnclosingTextIOWrapper +from pygments.lexers import get_all_lexers, get_lexer_by_name, guess_lexer, \ + load_lexer_from_file, get_lexer_for_filename, find_lexer_class_for_filename +from pygments.lexers.special import TextLexer +from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter +from pygments.formatters import get_all_formatters, get_formatter_by_name, \ + load_formatter_from_file, get_formatter_for_filename, find_formatter_class +from pygments.formatters.terminal import TerminalFormatter +from pygments.formatters.terminal256 import Terminal256Formatter, TerminalTrueColorFormatter +from pygments.filters import get_all_filters, find_filter_class +from pygments.styles import get_all_styles, get_style_by_name + + +def _parse_options(o_strs): + opts = {} + if not o_strs: + return opts + for o_str in o_strs: + if not o_str.strip(): + continue + o_args = o_str.split(',') + for o_arg in o_args: + o_arg = o_arg.strip() + try: + o_key, o_val = o_arg.split('=', 1) + o_key = o_key.strip() + o_val = o_val.strip() + except ValueError: + opts[o_arg] = True + else: + opts[o_key] = o_val + return opts + + +def _parse_filters(f_strs): + filters = [] + if not f_strs: + return filters + for f_str in f_strs: + if ':' in f_str: + fname, fopts = f_str.split(':', 1) + filters.append((fname, _parse_options([fopts]))) + else: + filters.append((f_str, {})) + return filters + + +def _print_help(what, name): + try: + if what == 'lexer': + cls = get_lexer_by_name(name) + print("Help on the %s lexer:" % cls.name) + print(dedent(cls.__doc__)) + elif what == 'formatter': + cls = find_formatter_class(name) + print("Help on the %s formatter:" % cls.name) + print(dedent(cls.__doc__)) + elif what == 'filter': + cls = find_filter_class(name) + print("Help on the %s filter:" % name) + print(dedent(cls.__doc__)) + return 0 + except (AttributeError, ValueError): + print("%s not found!" % what, file=sys.stderr) + return 1 + + +def _print_list(what): + if what == 'lexer': + print() + print("Lexers:") + print("~~~~~~~") + + info = [] + for fullname, names, exts, _ in get_all_lexers(): + tup = (', '.join(names)+':', fullname, + exts and '(filenames ' + ', '.join(exts) + ')' or '') + info.append(tup) + info.sort() + for i in info: + print(('* %s\n %s %s') % i) + + elif what == 'formatter': + print() + print("Formatters:") + print("~~~~~~~~~~~") + + info = [] + for cls in get_all_formatters(): + doc = docstring_headline(cls) + tup = (', '.join(cls.aliases) + ':', doc, cls.filenames and + '(filenames ' + ', '.join(cls.filenames) + ')' or '') + info.append(tup) + info.sort() + for i in info: + print(('* %s\n %s %s') % i) + + elif what == 'filter': + print() + print("Filters:") + print("~~~~~~~~") + + for name in get_all_filters(): + cls = find_filter_class(name) + print("* " + name + ':') + print(" %s" % docstring_headline(cls)) + + elif what == 'style': + print() + print("Styles:") + print("~~~~~~~") + + for name in get_all_styles(): + cls = get_style_by_name(name) + print("* " + name + ':') + print(" %s" % docstring_headline(cls)) + + +def _print_list_as_json(requested_items): + import json + result = {} + if 'lexer' in requested_items: + info = {} + for fullname, names, filenames, mimetypes in get_all_lexers(): + info[fullname] = { + 'aliases': names, + 'filenames': filenames, + 'mimetypes': mimetypes + } + result['lexers'] = info + + if 'formatter' in requested_items: + info = {} + for cls in get_all_formatters(): + doc = docstring_headline(cls) + info[cls.name] = { + 'aliases': cls.aliases, + 'filenames': cls.filenames, + 'doc': doc + } + result['formatters'] = info + + if 'filter' in requested_items: + info = {} + for name in get_all_filters(): + cls = find_filter_class(name) + info[name] = { + 'doc': docstring_headline(cls) + } + result['filters'] = info + + if 'style' in requested_items: + info = {} + for name in get_all_styles(): + cls = get_style_by_name(name) + info[name] = { + 'doc': docstring_headline(cls) + } + result['styles'] = info + + json.dump(result, sys.stdout) + +def main_inner(parser, argns): + if argns.help: + parser.print_help() + return 0 + + if argns.V: + print('Pygments version %s, (c) 2006-2022 by Georg Brandl, Matthäus ' + 'Chajdas and contributors.' % __version__) + return 0 + + def is_only_option(opt): + return not any(v for (k, v) in vars(argns).items() if k != opt) + + # handle ``pygmentize -L`` + if argns.L is not None: + arg_set = set() + for k, v in vars(argns).items(): + if v: + arg_set.add(k) + + arg_set.discard('L') + arg_set.discard('json') + + if arg_set: + parser.print_help(sys.stderr) + return 2 + + # print version + if not argns.json: + main(['', '-V']) + allowed_types = {'lexer', 'formatter', 'filter', 'style'} + largs = [arg.rstrip('s') for arg in argns.L] + if any(arg not in allowed_types for arg in largs): + parser.print_help(sys.stderr) + return 0 + if not largs: + largs = allowed_types + if not argns.json: + for arg in largs: + _print_list(arg) + else: + _print_list_as_json(largs) + return 0 + + # handle ``pygmentize -H`` + if argns.H: + if not is_only_option('H'): + parser.print_help(sys.stderr) + return 2 + what, name = argns.H + if what not in ('lexer', 'formatter', 'filter'): + parser.print_help(sys.stderr) + return 2 + return _print_help(what, name) + + # parse -O options + parsed_opts = _parse_options(argns.O or []) + + # parse -P options + for p_opt in argns.P or []: + try: + name, value = p_opt.split('=', 1) + except ValueError: + parsed_opts[p_opt] = True + else: + parsed_opts[name] = value + + # encodings + inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) + outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) + + # handle ``pygmentize -N`` + if argns.N: + lexer = find_lexer_class_for_filename(argns.N) + if lexer is None: + lexer = TextLexer + + print(lexer.aliases[0]) + return 0 + + # handle ``pygmentize -C`` + if argns.C: + inp = sys.stdin.buffer.read() + try: + lexer = guess_lexer(inp, inencoding=inencoding) + except ClassNotFound: + lexer = TextLexer + + print(lexer.aliases[0]) + return 0 + + # handle ``pygmentize -S`` + S_opt = argns.S + a_opt = argns.a + if S_opt is not None: + f_opt = argns.f + if not f_opt: + parser.print_help(sys.stderr) + return 2 + if argns.l or argns.INPUTFILE: + parser.print_help(sys.stderr) + return 2 + + try: + parsed_opts['style'] = S_opt + fmter = get_formatter_by_name(f_opt, **parsed_opts) + except ClassNotFound as err: + print(err, file=sys.stderr) + return 1 + + print(fmter.get_style_defs(a_opt or '')) + return 0 + + # if no -S is given, -a is not allowed + if argns.a is not None: + parser.print_help(sys.stderr) + return 2 + + # parse -F options + F_opts = _parse_filters(argns.F or []) + + # -x: allow custom (eXternal) lexers and formatters + allow_custom_lexer_formatter = bool(argns.x) + + # select lexer + lexer = None + + # given by name? + lexername = argns.l + if lexername: + # custom lexer, located relative to user's cwd + if allow_custom_lexer_formatter and '.py' in lexername: + try: + filename = None + name = None + if ':' in lexernam |