diff options
Diffstat (limited to 'doc')
59 files changed, 5234 insertions, 0 deletions
diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..a0dcaaa --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,163 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = PYTHONPATH=.. sphinx-build +PAPER = +BUILDDIR = _build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean pyodide html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " pyodide to make Pyodide with currently checked out Pygments" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +pyodide: + $(if $(test ! -f docker), $(error "Could not find Docker. Please install that before continuing.")) + # Enable the BuildKit backend to use the --output option. + DOCKER_BUILDKIT=1 docker build --file pyodide/Dockerfile --output $(BUILDDIR)/pyodide/pyodide .. + @echo + @echo "Pyodide build finished. The Pyodide artifacts are in $(BUILDDIR)/pyodide." + +html: + $(if $(WEBSITE_BUILD), $(MAKE) pyodide) + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(if $(WEBSITE_BUILD), $(MAKE) pyodide) + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Pygments.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Pygments.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/Pygments" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Pygments" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/doc/_static/demo-worker.js b/doc/_static/demo-worker.js new file mode 100644 index 0000000..22b8b3d --- /dev/null +++ b/doc/_static/demo-worker.js @@ -0,0 +1,74 @@ +importScripts('/_static/pyodide/pyodide.js'); + +async function loadPyodideAndPygments() { + self.pyodide = await loadPyodide(); + await self.pyodide.loadPackage(["Pygments"]); + const styles = self.pyodide.runPython(` + from pygments.formatters.html import HtmlFormatter + from pygments.styles import STYLE_MAP + {s: HtmlFormatter(style=s).get_style_defs('.demo-highlight') for s in STYLE_MAP} + `).toJs(); + self.postMessage({loaded: {styles}}) +} +let pyodideReadyPromise = loadPyodideAndPygments(); + +self.onmessage = async (event) => { + // Make sure loading is done. + await pyodideReadyPromise; + if (event.data.highlight) { + self.pyodide.globals.set('code', event.data.highlight.code); + self.pyodide.globals.set('lexer_name', event.data.highlight.lexer); + + self.pyodide.runPython(` + import pygments.lexers + + lexer = pygments.lexers.get_lexer_by_name(lexer_name) + if type(code) == memoryview: + code = bytes(code) + tokens = lexer.get_tokens(code) + `); + + const formatter = event.data.highlight.formatter; + if (formatter == 'html') { + + const html = self.pyodide.runPython(` + import io + from pygments.formatters.html import HtmlFormatter + + fmter = HtmlFormatter(cssclass='demo-highlight') + buf = io.StringIO() + fmter.format(tokens, buf) + buf.getvalue() + `); + self.postMessage({html}); + } else if (formatter == 'tokens') { + const tokens = self.pyodide.runPython('list(tokens)').toJs(); + self.postMessage({tokens}); + } else { + console.warn('unknown formatter:', formatter); + } + } else if (event.data.guess_lexer) { + self.pyodide.globals.set('code', event.data.guess_lexer.code); + self.pyodide.globals.set('filename', event.data.guess_lexer.filename); + const lexer = self.pyodide.runPython(` + import sys + sys.setrecursionlimit(1000) + # TODO: remove after upgrading to Pyodide 0.19 + + import pygments.lexers + import pygments.util + + if type(code) == memoryview: + code = bytes(code) + + if filename: + lexer = pygments.lexers.guess_lexer_for_filename(filename, code) + else: + lexer = pygments.lexers.guess_lexer(code) + lexer.aliases[0] + `); + self.postMessage({lexer}); + } else { + console.warn('unknown command: expected highlight or guess_lexer but received ', event.data); + } +} diff --git a/doc/_static/demo.css b/doc/_static/demo.css new file mode 100644 index 0000000..eaa4410 --- /dev/null +++ b/doc/_static/demo.css @@ -0,0 +1,89 @@ +#try { + background-color: #f6f6f6; + border-radius: 0; + border: 1px solid #ccc; + margin-top: 15px; + margin-bottom: 10px; + padding: 10px 15px 5px 10px; + position: relative; +} + +#try h2 { + margin-top: 0; +} + +#try textarea { + border: 1px solid #999; + padding: 2px; + width: 100%; + min-height: 150px; + resize: vertical; +} + +#hlcode { + margin: 10px 0; + max-height: 500px; + overflow: auto; + border: 1px solid #ccc; +} + +#hlcode:empty { + display: none; +} + +#hlcode pre { + background-color: transparent; + border: 0; + margin: 0; +} +#hlcode table { + /* unset negative margin from pygments14.css */ + margin: unset; +} + +#code-header:not([hidden]) { + display: flex; + gap: 1em; + padding: 0 15px; +} +.flex-grow-1 { + flex-grow: 1; +} +#lexer { + margin-right: 0.5em; +} +#guessed-lexer:not(:empty):before { + content: '(guessed '; +} +#guessed-lexer:not(:empty):after { + content: ')'; +} + +#loading[hidden] { + visibility: hidden; + display: flex; +} + +#loading { + display: flex; + align-items: center; + gap: 1em; +} + +#format-settings { + display: flex; + gap: 1em; + border-top: 1px solid #ccc; + padding-top: 0.5em; + margin-top: 0.5em; +} + +.tokens code { + /* make whitespace visible */ + white-space: pre; + background: #d9d9d9; +} + +#contrast-warning { + color: darkred; +} diff --git a/doc/_static/demo.js b/doc/_static/demo.js new file mode 100644 index 0000000..b193d10 --- /dev/null +++ b/doc/_static/demo.js @@ -0,0 +1,200 @@ +const loadingDiv = document.getElementById("loading"); +const langSelect = document.getElementById("lang"); +const styleSelect = document.getElementById("style"); +const formatterSelect = document.getElementById("formatter"); +const outputDiv = document.getElementById("hlcode"); +const codeHeader = document.getElementById("code-header"); +const copyLink = document.getElementById("copylink"); +const style = document.getElementById("css-style"); +const textarea = document.getElementById("code"); +const uriTooLongMsg = document.getElementById('uri-too-long'); +const contrastWarning = document.getElementById('contrast-warning'); +const fileInput = document.getElementById("file"); +const fileInputResetButton = document.getElementById('reset-file'); + +const qvars = Object.fromEntries(new URLSearchParams(window.location.search)); +if (qvars.lexer) { + langSelect.value = qvars.lexer; +} +if (qvars.code !== undefined) { + textarea.value = qvars.code; + loadingDiv.hidden = false; +} +if (qvars.style !== undefined) { + styleSelect.value = qvars.style; + updateContrastWarning(); +} +if (qvars.formatter !== undefined) { + formatterSelect.value = qvars.formatter; +} + +styleSelect.addEventListener('change', () => { + if (!styles) + // Worker has not loaded yet. + return; + style.textContent = styles.get(styleSelect.value); + updateCopyLink(); + updateContrastWarning(); +}); + +function updateContrastWarning() { + contrastWarning.hidden = styleSelect.selectedOptions[0].dataset.wcag == 'aa'; +} + +function debounce(func, timeout) { + let timer; + return (...args) => { + clearTimeout(timer); + timer = setTimeout(() => func.apply(this, args), timeout); + }; +} + +const highlightShortDebounce = debounce(highlight, 50); +const highlightLongDebounce = debounce(highlight, 500); + +function debouncedUpdate() { + if (fileInput.files.length > 0) + return; + + if (textarea.value.length < 1000) { + highlightShortDebounce(); + } else { + highlightLongDebounce(); + } +} + +langSelect.addEventListener('change', debouncedUpdate); +textarea.addEventListener('input', debouncedUpdate); +formatterSelect.addEventListener('change', debouncedUpdate); +fileInput.addEventListener('change', () => { + fileInputResetButton.hidden = false; + highlight(); +}); +fileInputResetButton.hidden = fileInput.files.length == 0; +fileInputResetButton.addEventListener('click', () => { + fileInputResetButton.hidden = true; + fileInput.value = ''; + highlight(); +}); + +let styles; + +const highlightWorker = new Worker("/_static/demo-worker.js"); +highlightWorker.onmessage = (msg) => { + if (msg.data.loaded) { + styles = msg.data.loaded.styles; + + if (qvars.code !== undefined || textarea.value) { + loadingDiv.hidden = true; + highlight(); + } + } else if (msg.data.html) { + outputDiv.innerHTML = msg.data.html; + codeHeader.hidden = false; + loadingDiv.hidden = true; + style.textContent = styles.get(styleSelect.value); + } else if (msg.data.tokens) { + const table = document.createElement('table'); + table.className = 'tokens'; + for (const [tokenType, value] of msg.data.tokens) { + const tr = document.createElement('tr'); + const td1 = document.createElement('td'); + td1.textContent = tokenType.join('.'); + const td2 = document.createElement('td'); + const inlineCode = document.createElement('code'); + inlineCode.textContent = value; + td2.appendChild(inlineCode); + tr.appendChild(td1); + tr.appendChild(td2); + table.appendChild(tr); + } + outputDiv.innerHTML = ''; + outputDiv.appendChild(table); + + codeHeader.hidden = false; + loadingDiv.hidden = true; + } else if (msg.data.lexer) { + highlight(msg.data.lexer); + } else { + console.warn('unexpected message from highlight worker', msg); + } +}; + +function updateCopyLink() { + var url = document.location.origin + document.location.pathname + + "?" + new URLSearchParams({ + lexer: langSelect.value, + style: styleSelect.value, + formatter: formatterSelect.value, + code: textarea.value, + }).toString() + if (url.length > 8201) { + // pygments.org is hosted on GitHub pages which does not support URIs longer than 8201 + copyLink.hidden = true; + uriTooLongMsg.hidden = false; + } else { + copyLink.href = url; + copyLink.textContent = 'Copy link'; + copyLink.hidden = false; + uriTooLongMsg.hidden = true; + } +} + +async function highlight(guessedLexer) { + var lexer = langSelect.value || guessedLexer; + var file = fileInput.files[0]; + + let code; + if (file) { + code = await file.arrayBuffer(); + } else { + code = textarea.value; + } + + loadingDiv.hidden = false; + + if (!lexer) { + const guess_lexer = {code}; + if (file) + guess_lexer.filename = file.name; + highlightWorker.postMessage({guess_lexer}); + document.getElementById('loading-text').textContent = 'guessing lexer...'; + return; + } + + document.getElementById('loading-text').textContent = 'highlighting code...'; + + document.getElementById('guessed-lexer').textContent = guessedLexer; + + highlightWorker.postMessage({highlight: {code, lexer, formatter: formatterSelect.value}}); + + if (code instanceof ArrayBuffer) { + copyLink.hidden = true; + uriTooLongMsg.hidden = true; + } else { + updateCopyLink(); + } +} + +copyLink.addEventListener('click', async (e) => { + e.preventDefault(); + await navigator.clipboard.writeText(e.target.href); +}); + +function download_code() { + var filename = "highlighted.html"; + var hlcode = document.getElementById("hlcode").innerHTML + style.outerHTML; + var blob = new Blob([hlcode], {type: 'text/html'}); + if (window.navigator.msSaveOrOpenBlob) { + window.navigator.msSaveBlob(blob, filename); + } + else{ + var elem = window.document.createElement('a'); + elem.href = window.URL.createObjectURL(blob); + elem.download = filename; + document.body.appendChild(elem); + elem.click(); + document.body.removeChild(elem); + window.URL.revokeObjectURL(elem.href); + } +} diff --git a/doc/_static/favicon.ico b/doc/_static/favicon.ico Binary files differnew file mode 100644 index 0000000..777f617 --- /dev/null +++ b/doc/_static/favicon.ico diff --git a/doc/_static/github.png b/doc/_static/github.png Binary files differnew file mode 100644 index 0000000..5d146ad --- /dev/null +++ b/doc/_static/github.png diff --git a/doc/_static/logo_new.png b/doc/_static/logo_new.png Binary files differnew file mode 100644 index 0000000..0ae4b20 --- /dev/null +++ b/doc/_static/logo_new.png diff --git a/doc/_static/logo_only.png b/doc/_static/logo_only.png Binary files differnew file mode 100644 index 0000000..fdebcc4 --- /dev/null +++ b/doc/_static/logo_only.png diff --git a/doc/_static/spinner.gif b/doc/_static/spinner.gif Binary files differnew file mode 100644 index 0000000..2212db9 --- /dev/null +++ b/doc/_static/spinner.gif diff --git a/doc/_templates/demo.html b/doc/_templates/demo.html new file mode 100644 index 0000000..8e2a7c6 --- /dev/null +++ b/doc/_templates/demo.html @@ -0,0 +1,97 @@ +{% extends "layout.html" %} +{% set sidebars = sidebars + ["demo_sidebar.html"] %} + +{% block extrahead %} +{{ super() }} +<link rel="stylesheet" type="text/css" href="{{ pathto("_static/demo.css", 1) }}"> +{% endblock %} + +{% block htmltitle %}<title>Demo{{ titlesuffix }}</title>{% endblock %} + +{% block body %} +{{ body }} + +<h1>Try out Pygments!</h1> + +<noscript> + <h2>This website requires JavaScript (and WebAssembly)</h2> + + You can also try out pygments locally by running <code>pip install pygments</code>. + Then you can use <a href="{{pathto('docs/cmdline')}}">the command-line interface</a>. +</noscript> + +<div id="try"> + <p> + <label>Language + <select id="lang" autofocus> + <option value="">guess the language</option> + {% for name, info, _, _ in lexers %} + <option value="{{info.0}}">{{name}}</option> + {% endfor %} + </select> + </label> + <span id=guessed-lexer></span> + </p> + <p> + <label> + Enter some code: + <textarea id="code" rows="1" cols="60" spellcheck="false"></textarea> + </label> + </p> + <p> + <label> + Alternatively you can upload a file: + <input type="file" id="file"> + </label> + <button id="reset-file">Reset</button> + </p> + <div id="format-settings"> + <label> + Formatter + <select id=formatter> + <option value=html>HTML</option> + <option value=tokens>tokens</option> + </select> + </label> + <label>Style + <select id="style"> + <optgroup label="Good contrast"> + {% for style in styles_aa %} + <option data-wcag=aa>{{style.name}}</option> + {% endfor %} + </optgroup> + <optgroup label="Suboptimal contrast"> + {% for style in styles_sub_aa %} + <option>{{style.name}}</option> + {% endfor %} + </optgroup> + </select> + </label> + <span id=contrast-warning hidden>style may have poor contrast</span> + </div> + </form> +</div> + +<div id="loading" hidden> + <img src="{{ pathto("_static/spinner.gif", 1) }}" width="20"> + <span id="loading-text">loading Python...</span> +</div> + +<style id=css-style></style> + +<div id="hlcode"></div> + +<div id="code-header" hidden> + <div class=flex-grow-1></div> + <button onclick="download_code()">Download</button> + <a id="copylink" role="button">Copy link</a> + <span hidden id="uri-too-long">(Copy link unavailable because code too long)</span> +</div> + +<p>The highlighting here is performed in-browser using + a WebAssembly translation of the latest Pygments master branch, courtesy of + <a href="https://github.com/iodide-project/pyodide">Pyodide</a>.</p> +<p>Your content is neither sent over the web nor stored anywhere.</p> + +<script type="text/javascript" src="{{ pathto("_static/demo.js", 1) }}"></script> +{% endblock %} diff --git a/doc/_templates/demo_sidebar.html b/doc/_templates/demo_sidebar.html new file mode 100644 index 0000000..3f2a86c --- /dev/null +++ b/doc/_templates/demo_sidebar.html @@ -0,0 +1 @@ +<p><a href="#try">Back to top</a></p> diff --git a/doc/_templates/docssidebar.html b/doc/_templates/docssidebar.html new file mode 100644 index 0000000..913acaa --- /dev/null +++ b/doc/_templates/docssidebar.html @@ -0,0 +1,3 @@ +{% if pagename != 'docs/index' %} +<strong>« <a href="{{ pathto('docs/index') }}">Back to docs index</a></strong> +{% endif %} diff --git a/doc/_templates/index_with_try.html b/doc/_templates/index_with_try.html new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/doc/_templates/index_with_try.html diff --git a/doc/_templates/indexsidebar.html b/doc/_templates/indexsidebar.html new file mode 100644 index 0000000..5aa5019 --- /dev/null +++ b/doc/_templates/indexsidebar.html @@ -0,0 +1,18 @@ +<section> +<h3>Download</h3> +<p>Current version: <b>{{ version }}</b><br><a href="{{ pathto('docs/changelog') }}">Changelog</a></p> +<p>Get Pygments from the <a href="https://pypi.python.org/pypi/Pygments">Python Package + Index</a>, or install it with:</p> +<pre>pip install Pygments</pre> +</section> +<section> +<h3>Questions? Suggestions?</h3> + +<p><img src="{{ pathto("_static/github.png", 1) }}" width="24" /> + Clone at <a href="https://github.com/pygments/pygments">GitHub</a>.</p> +<p>You can also open an issue at the + <a href="https://github.com/pygments/pygments/issues">tracker</a>.</p> +</section> + +<p class="logo">A <a href="https://www.pocoo.org/"> + <img src="{{ pathto("_static/pocoo.png", 1) }}" /></a> project</a></p> diff --git a/doc/_templates/styles.html b/doc/_templates/styles.html new file mode 100644 index 0000000..137fa24 --- /dev/null +++ b/doc/_templates/styles.html @@ -0,0 +1,55 @@ +{% extends "layout.html" %} + +{% block htmltitle %}<title>Styles{{ titlesuffix }}</title>{% endblock %} + +{% block body %} +<style> +.style-gallery { + display: flex; + flex-wrap: wrap; + justify-content: space-around; +} +h2 { + margin-top: 2em; +} +.style-gallery h3 { + margin-bottom: 0.1em; +} +.style-gallery pre { + background-color: inherit; +} +</style> +{{ body }} + +<h1>Styles</h1> + +<p>Pygments comes with the following builtin styles. +For more information about styles refer to <a href="{{ pathto('docs/styles') }}">the documentation</a>. +</p> + +<div class=style-gallery> +{% for style in styles_aa %} + <div> + <h3 id="{{style.name}}">{{style.name}}</h3> + {{style.html|safe}} + </div> +{% endfor %} +</div> + +<h2>Styles with a lower contrast</h2> +<p> +The following styles do not meet the <a href="https://www.w3.org/WAI/WCAG21/Understanding/contrast-minimum.html">WCAG 2.1 AA contrast minimum</a>, +so they might be difficult to read for people with suboptimal vision. +If you want your highlighted code to be well readable for other people, you +should use one of the earlier styles instead. +</p> +<div class=style-gallery> + {% for style in styles_sub_aa %} + <div> + <h3 id="{{style.name}}">{{style.name}}</h3> + {{style.html|safe}} + </div> + {% endfor %} +</div> + +{% endblock %} diff --git a/doc/_themes/pygments14/layout.html b/doc/_themes/pygments14/layout.html new file mode 100644 index 0000000..34e86ef --- /dev/null +++ b/doc/_themes/pygments14/layout.html @@ -0,0 +1,101 @@ +{# + sphinxdoc/layout.html + ~~~~~~~~~~~~~~~~~~~~~ + + Sphinx layout template for the sphinxdoc theme. + + :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +#} +{%- extends "basic/layout.html" %} + +{# put the sidebar before the body #} +{% block sidebar1 %}{{ sidebar() }}{% endblock %} +{% block sidebar2 %}{% endblock %} + +{% block relbar1 %}{% endblock %} +{% block relbar2 %}{% endblock %} + +{% block extrahead %} + <link href='https://fonts.googleapis.com/css?family={{ theme_font|replace(' ', '+') }}:300,400,700' + rel='stylesheet' type='text/css'> +{{ super() }} +{%- if not embedded %} + <style type="text/css"> + table.right { float: right; margin-left: 20px; } + table.right td { border: 1px solid #ccc; } + {% if pagename == 'index' %} + .related { display: none; } + {% endif %} + </style> + <script type="text/javascript"> + // intelligent scrolling of the sidebar content + $(window).scroll(function() { + var sb = $('.sphinxsidebarwrapper'); + var win = $(window); + var sbh = sb.height(); + var offset = $('.sphinxsidebar').position()['top']; + var wintop = win.scrollTop(); + var winbot = wintop + win.innerHeight(); + var curtop = sb.position()['top']; + var curbot = curtop + sbh; + // does sidebar fit in window? + if (sbh < win.innerHeight()) { + // yes: easy case -- always keep at the top + sb.css('top', $u.min([$u.max([0, wintop - offset - 10]), + $(document).height() - sbh - 200])); + } else { + // no: only scroll if top/bottom edge of sidebar is at + // top/bottom edge of window + if (curtop > wintop && curbot > winbot) { + sb.css('top', $u.max([wintop - offset - 10, 0])); + } else if (curtop < wintop && curbot < winbot) { + sb.css('top', $u.min([winbot - sbh - offset - 20, + $(document).height() - sbh - 200])); + } + } + }); + </script> +{%- endif %} +{% endblock %} + +{% block header %} +<div class="outerwrapper"> +<div class="pageheader"> + <ul> + <li><a href="{{ pathto('index') }}">Home</a></li> + {% if demo_active %} + <li><a href="{{ pathto('demo') }}">Demo</a></li> + {% endif %} + <li><a href="{{ pathto('languages') }}">Languages</a></li> + <li><a href="{{ pathto('styles') }}">Styles</a></li> + <li><a href="{{ pathto('faq') }}">FAQ</a></li> + <li><a href="{{ pathto('download') }}">Get it</a></li> + <li><a href="{{ pathto('docs/index') }}">Docs</a></li> + </ul> + <div> + <a href="{{ pathto('index') }}"> + <img src="{{ pathto('_static/logo.png', 1) }}" alt="Pygments logo" /> + </a> + </div> +</div> +<div class="flexwrapper"> +{% endblock %} + +{% block footer %} + </div> {# closes "flexwrapper" div #} + <div class="footer" role="contentinfo"> + © Copyright 2006-2022, Georg Brandl and Pygments contributors. + Created using <a href="https://sphinx-doc.org/">Sphinx</a> {{ + sphinx_version }}. <br/> + Pygments logo created by <a href="https://joelunger.com">Joel Unger</a>. + Backgrounds from <a href="https://subtlepatterns.com">subtlepatterns.com</a>. + </div> + </div> {# closes "outerwrapper" div #} +{% endblock %} + +{% block sidebarrel %} +{% endblock %} + +{% block sidebarsourcelink %} +{% endblock %} diff --git a/doc/_themes/pygments14/localtoc.html b/doc/_themes/pygments14/localtoc.html new file mode 100644 index 0000000..c0e2de0 --- /dev/null +++ b/doc/_themes/pygments14/localtoc.html @@ -0,0 +1,17 @@ +{# + basic/localtoc.html + ~~~~~~~~~~~~~~~~~~~ + + Sphinx sidebar template: local table of contents. + + This file can be removed once https://github.com/sphinx-doc/sphinx/pull/9815 has landed. + + :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +#} +{%- if display_toc %} + <div> + <h3><a href="{{ pathto(root_doc)|e }}">{{ _('Table of Contents') }}</a></h3> + {{ toc }} + </div> +{%- endif %} diff --git a/doc/_themes/pygments14/relations.html b/doc/_themes/pygments14/relations.html new file mode 100644 index 0000000..372894d --- /dev/null +++ b/doc/_themes/pygments14/relations.html @@ -0,0 +1,25 @@ +{# + basic/relations.html + ~~~~~~~~~~~~~~~~~~~~ + + Sphinx sidebar template: relation links. + + This file can be removed once https://github.com/sphinx-doc/sphinx/pull/9815 has landed. + + :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +#} +{%- if prev %} +<div> + <h4>{{ _('Previous topic') }}</h4> + <p class="topless"><a href="{{ prev.link|e }}" + title="{{ _('previous chapter') }}">{{ prev.title }}</a></p> +</div> +{%- endif %} +{%- if next %} +<div> + <h4>{{ _('Next topic') }}</h4> + <p class="topless"><a href="{{ next.link|e }}" + title="{{ _('next chapter') }}">{{ next.title }}</a></p> +</div> +{%- endif %} diff --git a/doc/_themes/pygments14/static/bodybg.png b/doc/_themes/pygments14/static/bodybg.png Binary files differnew file mode 100644 index 0000000..46892b8 --- /dev/null +++ b/doc/_themes/pygments14/static/bodybg.png diff --git a/doc/_themes/pygments14/static/docbg.png b/doc/_themes/pygments14/static/docbg.png Binary files differnew file mode 100644 index 0000000..13e61f3 --- /dev/null +++ b/doc/_themes/pygments14/static/docbg.png diff --git a/doc/_themes/pygments14/static/listitem.png b/doc/_themes/pygments14/static/listitem.png Binary files differnew file mode 100644 index 0000000..e45715f --- /dev/null +++ b/doc/_themes/pygments14/static/listitem.png diff --git a/doc/_themes/pygments14/static/logo.png b/doc/_themes/pygments14/static/logo.png Binary files differnew file mode 100644 index 0000000..2c1a24d --- /dev/null +++ b/doc/_themes/pygments14/static/logo.png diff --git a/doc/_themes/pygments14/static/pocoo.png b/doc/_themes/pygments14/static/pocoo.png Binary files differnew file mode 100644 index 0000000..4174149 --- /dev/null +++ b/doc/_themes/pygments14/static/pocoo.png diff --git a/doc/_themes/pygments14/static/pygments14.css_t b/doc/_themes/pygments14/static/pygments14.css_t new file mode 100644 index 0000000..4355074 --- /dev/null +++ b/doc/_themes/pygments14/static/pygments14.css_t @@ -0,0 +1,422 @@ +/* + * pygments14.css + * ~~~~~~~~~~~~~~ + * + * Sphinx stylesheet -- pygments14 theme. Heavily copied from sphinx13. + * + * :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: {{ theme_font }}, 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; + font-size: 14px; + text-align: center; + background-image: url(bodybg.png); + background-color: {{ theme_background }}; + color: black; + padding: 0; + /* + border-right: 1px solid {{ theme_border }}; + border-left: 1px solid {{ theme_border }}; + */ + + margin: 0 auto; + max-width: 1080px; +} + +.outerwrapper { + background-image: url(docbg.png); + background-attachment: fixed; +} + +.pageheader { + text-align: left; + padding: 10px 15px; +} + +.pageheader ul { + float: right; + color: white; + list-style-type: none; + padding-left: 0; + margin-top: 40px; + margin-right: 10px; +} + +.pageheader li { + float: left; + margin: 0 0 0 10px; +} + +.pageheader li a { + border-radius: 3px; + padding: 8px 12px; + color: {{ theme_darkgray }}; + text-shadow: 0 0 5px rgba(0, 0, 0, 0.2); +} + +.pageheader li a:hover { + background-color: {{ theme_yellow }}; + color: black; + text-shadow: none; +} + +div.document { + width: 700px; + flex-grow: 100; + text-align: left; + /*border-left: 1em solid {{ theme_lightyellow }};*/ + min-width: 500px; +} + +@media screen and (max-width: 550px) { + div.document { + min-width: inherit; + } +} + +div.bodywrapper { + background-color: white; +/* border-right: 1px solid {{ theme_border }}; */ +} + +.flexwrapper { + display: flex; + gap: 15px; + flex-wrap: wrap; + padding-right: 12px; +} + +div.body { + margin: 0; + padding: 0.5em 20px 20px 20px; + width: 100%; + box-sizing: border-box; +} + +div.related { + font-size: 1em; + color: {{ theme_darkgray }}; +} + +div.related ul { + background-image: url(relbg.png); + background-repeat: repeat-y; + background-color: {{ theme_yellow }}; + height: 1.9em; + /* + border-top: 1px solid {{ theme_border }}; + border-bottom: 1px solid {{ theme_border }}; + */ +} + +div.related ul li { + margin: 0 5px 0 0; + padding: 0; + float: left; +} + +div.related ul li.right { + float: right; + margin-right: 5px; +} + +div.related ul li a { + margin: 0; + padding: 0 5px 0 5px; + line-height: 1.75em; + color: {{ theme_darkgray }}; + /*text-shadow: 0px 0px 1px rgba(0, 0, 0, 0.5);*/ +} + +div.related ul li a:hover { + text-decoration: underline; + text-shadow: 0px 0px 1px rgba(255, 255, 255, 0.5); +} + +div.sphinxsidebar { + margin: 0; + padding: 0 0px 15px 15px; + width: 210px; + float: none; + font-size: 1em; + text-align: left; + flex-grow: 1; +} + +.sphinxsidebarwrapper > * { + flex: 1 1 0px; + min-width: 200px; +} + +div.sphinxsidebar .logo { + font-size: 1.8em; + color: #666; + font-weight: 300; + text-align: center; +} + +div.sphinxsidebar .logo img { + vertical-align: middle; +} + +div.sphinxsidebar input { + border: 1px solid #aaa; + font-family: {{ theme_font }}, 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; + font-size: 1em; +} + +div.sphinxsidebar h3 { + font-size: 1.5em; + /* border-top: 1px solid {{ theme_border }}; */ + margin-top: 0; + margin-bottom: 0.5em; + padding-top: 0.5em; +} + +div.sphinxsidebar h4 { + font-size: 1.2em; + margin-bottom: 0; +} + +div.sphinxsidebar h3, div.sphinxsidebar h4 { + margin-left: -15px; + padding-right: 14px; + padding-left: 14px; + color: #333; + font-weight: 300; + /*text-shadow: 0px 0px 0.5px rgba(0, 0, 0, 0.4);*/ +} + +div.sphinxsidebarwrapper { + padding: 0; + display: flex; + flex-wrap: wrap; + gap: 15px; +} + +div.sphinxsidebarwrapper > h3:first-child { + margin-top: 0.5em; + border: none; +} + +div.sphinxsidebar h3 a { + color: #333; +} + +div.sphinxsidebar ul { + color: #444; + margin-top: 7px; + padding: 0; + line-height: 130%; +} + +div.sphinxsidebar ul ul { + margin-left: 20px; + list-style-image: url(listitem.png); +} + +div.footer { + color: {{ theme_darkgray }}; + text-shadow: 0 0 .2px rgba(255, 255, 255, 0.8); + padding: 2em; + text-align: center; + clear: both; + font-size: 0.8em; +} + +/* -- body styles ----------------------------------------------------------- */ + +p { + margin: 0.8em 0 0.5em 0; +} + +a { + color: {{ theme_darkgreen }}; + text-decoration: none; +} + +a:hover { + color: {{ theme_darkyellow }}; +} + +div.body a { + text-decoration: underline; +} + +h1 { + margin: 10px 0 0 0; + font-size: 2.4em; + color: {{ theme_darkgray }}; + font-weight: 300; +} + +h2 { + margin: 1.em 0 0.2em 0; + font-size: 1.5em; + font-weight: 300; + padding: 0; + color: {{ theme_darkgreen }}; +} + +h3 { + margin: 1em 0 -0.3em 0; + font-size: 1.3em; + font-weight: 300; +} + +div.body h1 a, div.body h2 a, div.body h3 a, div.body h4 a, div.body h5 a, div.body h6 a { + text-decoration: none; +} + +div.body h1 a tt, div.body h2 a tt, div.body h3 a tt, div.body h4 a tt, div.body h5 a tt, div.body h6 a tt { + color: {{ theme_darkgreen }} !important; + font-size: inherit !important; +} + +a.headerlink { + color: {{ theme_green }} !important; + font-size: 12px; + margin-left: 6px; + padding: 0 4px 0 4px; + text-decoration: none !important; + float: right; +} + +a.headerlink:hover { + background-color: #ccc; + color: white!important; +} + +cite, code, tt { + font-family: 'Consolas', 'DejaVu Sans Mono', + 'Bitstream Vera Sans Mono', monospace; + font-size: 14px; + letter-spacing: -0.02em; +} + +tt { + background-color: #f2f2f2; + border: 1px solid #ddd; + border-radius: 2px; + color: #333; + padding: 1px; +} + +tt.descname, tt.descclassname, tt.xref { + border: 0; +} + +hr { + border: 1px solid #abc; + margin: 2em; +} + +a tt { + border: 0; + color: {{ theme_darkgreen }}; +} + +a tt:hover { + color: {{ theme_darkyellow }}; +} + +pre { + font-family: 'Consolas', 'DejaVu Sans Mono', + 'Bitstream Vera Sans Mono', monospace; + font-size: 13px; + letter-spacing: 0.015em; + line-height: 120%; + padding: 0.5em; + border: 1px solid #ccc; + border-radius: 2px; + background-color: #f8f8f8; +} + +pre a { + color: inherit; + text-decoration: underline; +} + +td.linenos pre { + padding: 0.5em 0; +} + +div.quotebar { + background-color: #f8f8f8; + max-width: 250px; + float: right; + padding: 0px 7px; + border: 1px solid #ccc; + margin-left: 1em; +} + +div.topic { + background-color: #f8f8f8; +} + +table { + border-collapse: collapse; + margin: 0 -0.5em 0 -0.5em; +} + +table td, table th { + padding: 0.2em 0.5em 0.2em 0.5em; +} + +div.admonition, div.warning { + font-size: 0.9em; + margin: 1em 0 1em 0; + border: 1px solid #86989B; + border-radius: 2px; + background-color: #f7f7f7; + padding: 0; + padding-bottom: 0.5rem; +} + +div.admonition p, div.warning p { + margin: 0.5em 1em 0.5em 1em; + padding: 0; +} + +div.admonition pre, div.warning pre { + margin: 0.4em 1em 0.4em 1em; +} + +div.admonition p.admonition-title, +div.warning p.admonition-title { + font-weight: bold; +} + +div.warning { + border: 1px solid #940000; +/* background-color: #FFCCCF;*/ +} + +div.warning p.admonition-title { +} + +div.admonition ul, div.admonition ol, +div.warning ul, div.warning ol { + margin: 0.1em 0.5em 0.5em 3em; + padding: 0; +} + +.viewcode-back { + font-family: {{ theme_font }}, 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; +} + +div.viewcode-block:target { + background-color: #f4debf; + border-top: 1px solid #ac9; + border-bottom: 1px solid #ac9; +} diff --git a/doc/_themes/pygments14/theme.conf b/doc/_themes/pygments14/theme.conf new file mode 100644 index 0000000..8d2988f --- /dev/null +++ b/doc/_themes/pygments14/theme.conf @@ -0,0 +1,17 @@ +[theme] +inherit = basic +stylesheet = pygments14.css +pygments_style = friendly + +[options] +body_min_width = inherit +body_max_width = inherit +green = #66b55e +darkgreen = #36852e +darkgray = #666666 +border = #66b55e +yellow = #f4cd00 +darkyellow = #d4ad00 +lightyellow = #fffbe3 +background = #f9f9f9 +font = PT Sans diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 0000000..f42c355 --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,291 @@ +# +# Pygments documentation build configuration file +# + +import re, sys, os, itertools + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.insert(0, os.path.abspath('..')) + +import pygments +import pygments.formatters +import pygments.lexers +import pygments.styles +import tests.contrast.test_contrasts as test_contrasts + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'pygments.sphinxext'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'Pygments' +copyright = '2006-2022, Georg Brandl and Pygments contributors' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = pygments.__version__ +# The full version, including alpha/beta/rc tags. +release = version + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +#pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'pygments14' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +html_theme_path = ['_themes'] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +html_title = 'Pygments' + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +html_favicon = '_static/favicon.ico' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +html_sidebars = {'index': ['indexsidebar.html', 'searchbox.html']} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +html_additional_pages = { + 'styles': 'styles.html', + } + +if os.environ.get('WEBSITE_BUILD'): + html_additional_pages['demo'] = 'demo.html' + html_static_path.append('_build/pyodide') + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Pygments' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('docs/index', 'Pygments.tex', 'Pygments Documentation', + 'Pygments authors', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('docs/index', 'pygments', 'Pygments Documentation', + ['Pygments authors'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# Example configuration for intersphinx: refer to the Python standard library. +#intersphinx_mapping = {'http://docs.python.org/': None} + +rst_prolog = '.. |language_count| replace:: {}'.format(len(list(pygments.lexers.get_all_lexers()))) + +def pg_context(app, pagename, templatename, ctx, event_arg): + ctx['demo_active'] = bool(os.environ.get('WEBSITE_BUILD')) + + if pagename == 'demo': + ctx['lexers'] = sorted(pygments.lexers.get_all_lexers(plugins=False), key=lambda x: x[0].lower()) + + if pagename in ('styles', 'demo'): + with open('examples/example.py') as f: + html = f.read() + lexer = pygments.lexers.get_lexer_for_filename('example.py') + min_contrasts = test_contrasts.min_contrasts() + ctx['styles_aa'] = [] + ctx['styles_sub_aa'] = [] + # Use STYLE_MAP directly so we don't get plugins as with get_all_styles(). + for style in pygments.styles.STYLE_MAP: + if not pygments.styles.get_style_by_name(style).web_style_gallery_exclude: + aa = min_contrasts[style] >= test_contrasts.WCAG_AA_CONTRAST + bg_r, bg_g, bg_b = test_contrasts.hex2rgb(pygments.styles.get_style_by_name(style).background_color) + ctx['styles_aa' if aa else 'styles_sub_aa'].append( + dict( + name=style, + html=pygments.highlight( + html, + lexer, + pygments.formatters.HtmlFormatter(noclasses=True, style=style), + ), + # from https://en.wikipedia.org/wiki/Relative_luminance + bg_luminance=(0.2126*bg_r + 0.7152*bg_g + 0.0722*bg_b) + ) + ) + + # sort styles according to their background luminance (light styles first) + # if styles have the same background luminance sort them by their name + sortkey = lambda s: (-s['bg_luminance'], s['name']) + # the default style is always displayed first + default_style = ctx['styles_aa'].pop(0) + ctx['styles_aa'].sort(key=sortkey) + ctx['styles_aa'].insert(0, default_style) + ctx['styles_sub_aa'].sort(key=sortkey) + + +def source_read(app, docname, source): + # linkify issue / PR numbers in changelog + if docname == 'docs/changelog': + with open('../CHANGES') as f: + changelog = f.read() + + idx = changelog.find('\nVersion 2.4.2\n') + + def linkify(match): + url = 'https://github.com/pygments/pygments/issues/' + match[1] + return '`{} <{}>`_'.format(match[0], url) + + linkified = re.sub(r'(?:PR)?#([0-9]+)\b', linkify, changelog[:idx]) + source[0] = linkified + changelog[idx:] + + +def setup(app): + app.connect('html-page-context', pg_context) + app.connect('source-read', source_read) diff --git a/doc/docs/api.rst b/doc/docs/api.rst new file mode 100644 index 0000000..4d330bf --- /dev/null +++ b/doc/docs/api.rst @@ -0,0 +1,360 @@ +.. -*- mode: rst -*- + +===================== +The full Pygments API +===================== + +This page describes the Pygments API. + +High-level API +============== + +.. module:: pygments + +Functions from the :mod:`pygments` module: + +.. function:: lex(code, lexer) + + Lex `code` with the `lexer` (must be a `Lexer` instance) + and return an iterable of tokens. Currently, this only calls + `lexer.get_tokens()`. + +.. function:: format(tokens, formatter, outfile=None) + + Format a token stream (iterable of tokens) `tokens` with the + `formatter` (must be a `Formatter` instance). The result is + written to `outfile`, or if that is ``None``, returned as a + string. + +.. function:: highlight(code, lexer, formatter, outfile=None) + + This is the most high-level highlighting function. + It combines `lex` and `format` in one function. + + +.. module:: pygments.lexers + +Functions from :mod:`pygments.lexers`: + +.. function:: get_lexer_by_name(alias, **options) + + Return an instance of a `Lexer` subclass that has `alias` in its + aliases list. The lexer is given the `options` at its + instantiation. + + Will raise :exc:`pygments.util.ClassNotFound` if no lexer with that alias is + found. + +.. function:: get_lexer_for_filename(fn, **options) + + Return a `Lexer` subclass instance that has a filename pattern + matching `fn`. The lexer is given the `options` at its + instantiation. + + Will raise :exc:`pygments.util.ClassNotFound` if no lexer for that filename + is found. + +.. function:: get_lexer_for_mimetype(mime, **options) + + Return a `Lexer` subclass instance that has `mime` in its mimetype + list. The lexer is given the `options` at its instantiation. + + Will raise :exc:`pygments.util.ClassNotFound` if not lexer for that mimetype + is found. + +.. function:: load_lexer_from_file(filename, lexername="CustomLexer", **options) + + Return a `Lexer` subclass instance loaded from the provided file, relative + to the current directory. The file is expected to contain a Lexer class + named `lexername` (by default, CustomLexer). Users should be very careful with + the input, because this method is equivalent to running eval on the input file. + The lexer is given the `options` at its instantiation. + + :exc:`ClassNotFound` is raised if there are any errors loading the Lexer + + .. versionadded:: 2.2 + +.. function:: guess_lexer(text, **options) + + Return a `Lexer` subclass instance that's guessed from the text in + `text`. For that, the :meth:`.analyse_text()` method of every known lexer + class is called with the text as argument, and the lexer which returned the + highest value will be instantiated and returned. + + :exc:`pygments.util.ClassNotFound` is raised if no lexer thinks it can + handle the content. + +.. function:: guess_lexer_for_filename(filename, text, **options) + + As :func:`guess_lexer()`, but only lexers which have a pattern in `filenames` + or `alias_filenames` that matches `filename` are taken into consideration. + + :exc:`pygments.util.ClassNotFound` is raised if no lexer thinks it can + handle the content. + +.. function:: get_all_lexers() + + Return an iterable over all registered lexers, yielding tuples in the + format:: + + (longname, tuple of aliases, tuple of filename patterns, tuple of mimetypes) + + .. versionadded:: 0.6 + +.. function:: find_lexer_class_by_name(alias) + + Return the `Lexer` subclass that has `alias` in its aliases list, without + instantiating it. + + Will raise :exc:`pygments.util.ClassNotFound` if no lexer with that alias is + found. + + .. versionadded:: 2.2 + +.. function:: find_lexer_class(name) + + Return the `Lexer` subclass that with the *name* attribute as given by + the *name* argument. + + +.. module:: pygments.formatters + +Functions from :mod:`pygments.formatters`: + +.. function:: get_formatter_by_name(alias, **options) + + Return an instance of a :class:`.Formatter` subclass that has `alias` in its + aliases list. The formatter is given the `options` at its instantiation. + + Will raise :exc:`pygments.util.ClassNotFound` if no formatter with that + alias is found. + +.. function:: get_formatter_for_filename(fn, **options) + + Return a :class:`.Formatter` subclass instance that has a filename pattern + matching `fn`. The formatter is given the `options` at its instantiation. + + Will raise :exc:`pygments.util.ClassNotFound` if no formatter for that filename + is found. + +.. function:: load_formatter_from_file(filename, formattername="CustomFormatter", **options) + + Return a `Formatter` subclass instance loaded from the provided file, relative + to the current directory. The file is expected to contain a Formatter class + named ``formattername`` (by default, CustomFormatter). Users should be very + careful with the input, because this method is equivalent to running eval + on the input file. The formatter is given the `options` at its instantiation. + + :exc:`ClassNotFound` is raised if there are any errors loading the Formatter + + .. versionadded:: 2.2 + +.. module:: pygments.styles + +Functions from :mod:`pygments.styles`: + +.. function:: get_style_by_name(name) + + Return a style class by its short name. The names of the builtin styles + are listed in :data:`pygments.styles.STYLE_MAP`. + + Will raise :exc:`pygments.util.ClassNotFound` if no style of that name is + found. + +.. function:: get_all_styles() + + Return an iterable over all registered styles, yielding their names. + + .. versionadded:: 0.6 + + +.. module:: pygments.lexer + +Lexers +====== + +The base lexer class from which all lexers are derived is: + +.. class:: Lexer(**options) + + The constructor takes a \*\*keywords dictionary of options. + Every subclass must first process its own options and then call + the `Lexer` constructor, since it processes the `stripnl`, + `stripall` and `tabsize` options. + + An example looks like this: + + .. sourcecode:: python + + def __init__(self, **options): + self.compress = options.get('compress', '') + Lexer.__init__(self, **options) + + As these options must all be specifiable as strings (due to the + command line usage), there are various utility functions + available to help with that, see `Option processing`_. + + .. method:: get_tokens(text) + + This method is the basic interface of a lexer. It is called by + the `highlight()` function. It must process the text and return an + iterable of ``(tokentype, value)`` pairs from `text`. + + Normally, you don't need to override this method. The default + implementation processes the `stripnl`, `stripall` and `tabsize` + options and then yields all tokens from `get_tokens_unprocessed()`, + with the ``index`` dropped. + + .. method:: get_tokens_unprocessed(text) + + This method should process the text and return an iterable of + ``(index, tokentype, value)`` tuples where ``index`` is the starting + position of the token within the input text. + + This method must be overridden by subclasses. + + .. staticmethod:: analyse_text(text) + + A static method which is called for lexer guessing. It should analyse + the text and return a float in the range from ``0.0`` to ``1.0``. + If it returns ``0.0``, the lexer will not be selected as the most + probable one, if it returns ``1.0``, it will be selected immediately. + + .. note:: You don't have to add ``@staticmethod`` to the definition of + this method, this will be taken care of by the Lexer's metaclass. + + For a list of known tokens have a look at the :doc:`tokens` page. + + A lexer also can have the following attributes (in fact, they are mandatory + except `alias_filenames`) that are used by the builtin lookup mechanism. + + .. attribute:: name + + Full name for the lexer, in human-readable form. + + .. attribute:: aliases + + A list of short, unique identifiers that can be used to lookup + the lexer from a list, e.g. using `get_lexer_by_name()`. + + .. attribute:: filenames + + A list of `fnmatch` patterns that match filenames which contain + content for this lexer. The patterns in this list should be unique among + all lexers. + + .. attribute:: alias_filenames + + A list of `fnmatch` patterns that match filenames which may or may not + contain content for this lexer. This list is used by the + :func:`.guess_lexer_for_filename()` function, to determine which lexers + are then included in guessing the correct one. That means that + e.g. every lexer for HTML and a template language should include + ``\*.html`` in this list. + + .. attribute:: mimetypes + + A list of MIME types for content that can be lexed with this + lexer. + + +.. module:: pygments.formatter + +Formatters +========== + +A formatter is derived from this class: + + +.. class:: Formatter(**options) + + As with lexers, this constructor processes options and then must call the + base class :meth:`__init__`. + + The :class:`Formatter` class recognizes the options `style`, `full` and + `title`. It is up to the formatter class whether it uses them. + + .. method:: get_style_defs(arg='') + + This method must return statements or declarations suitable to define + the current style for subsequent highlighted text (e.g. CSS classes + in the `HTMLFormatter`). + + The optional argument `arg` can be used to modify the generation and + is formatter dependent (it is standardized because it can be given on + the command line). + + This method is called by the ``-S`` :doc:`command-line option <cmdline>`, + the `arg` is then given by the ``-a`` option. + + .. method:: format(tokensource, outfile) + + This method must format the tokens from the `tokensource` iterable and + write the formatted version to the file object `outfile`. + + Formatter options can control how exactly the tokens are converted. + + .. versionadded:: 0.7 + A formatter must have the following attributes that are used by the + builtin lookup mechanism. + + .. attribute:: name + + Full name for the formatter, in human-readable form. + + .. attribute:: aliases + + A list of short, unique identifiers that can be used to lookup + the formatter from a list, e.g. using :func:`.get_formatter_by_name()`. + + .. attribute:: filenames + + A list of :mod:`fnmatch` patterns that match filenames for which this + formatter can produce output. The patterns in this list should be unique + among all formatters. + + +.. module:: pygments.util + +Option processing +================= + +The :mod:`pygments.util` module has some utility functions usable for processing +command line options. All of the following functions get values from a +dictionary of options. If the value is already in the type expected by the +option, it is returned as-is. Otherwise, if the value is a string, it is first +converted to the expected type if possible. + +.. exception:: OptionError + + This exception will be raised by all option processing functions if + the type or value of the argument is not correct. + +.. function:: get_bool_opt(options, optname, default=None) + + Intuitively, this is `options.get(optname, default)`, but restricted to + Boolean value. The Booleans can be represented as string, in order to accept + Boolean value from the command line arguments. If the key `optname` is + present in the dictionary `options` and is not associated with a Boolean, + raise an `OptionError`. If it is absent, `default` is returned instead. + + The valid string values for ``True`` are ``1``, ``yes``, ``true`` and + ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off`` + (matched case-insensitively). + +.. function:: get_int_opt(options, optname, default=None) + + As :func:`get_bool_opt`, but interpret the value as an integer. + +.. function:: get_list_opt(options, optname, default=None) + + If the key `optname` from the dictionary `options` is a string, + split it at whitespace and return it. If it is already a list + or a tuple, it is returned as a list. + +.. function:: get_choice_opt(options, optname, allowed, default=None) + + If the key `optname` from the dictionary is not in the sequence + `allowed`, raise an error, otherwise return it. + + .. versionadded:: 0.8 diff --git a/doc/docs/authors.rst b/doc/docs/authors.rst new file mode 100644 index 0000000..f8373f0 --- /dev/null +++ b/doc/docs/authors.rst @@ -0,0 +1,4 @@ +Full contributor list +===================== + +.. include:: ../../AUTHORS diff --git a/doc/docs/changelog.rst b/doc/docs/changelog.rst new file mode 100644 index 0000000..f264cab --- /dev/null +++ b/doc/docs/changelog.rst @@ -0,0 +1 @@ +.. include:: ../../CHANGES diff --git a/doc/docs/cmdline.rst b/doc/docs/cmdline.rst new file mode 100644 index 0000000..b07b3e4 --- /dev/null +++ b/doc/docs/cmdline.rst @@ -0,0 +1,218 @@ +.. -*- mode: rst -*- + +====================== +Command Line Interface +====================== + +You can use Pygments from the shell, provided you installed the +:program:`pygmentize` script:: + + $ pygmentize test.py + print "Hello World" + +will print the file test.py to standard output, using the Python lexer +(inferred from the file name extension) and the terminal formatter (because +you didn't give an explicit formatter name). +:program:`pygmentize` attempts to +detect the maximum number of colors that the terminal supports. The difference +between color formatters for 16 and 256 colors is immense, but there is a less +noticeable difference between color formatters for 256 and 16 million colors. + +Here's the process of how it detects the maxiumum number of colors +supported by your terminal. If the ``COLORTERM`` environment variable is set to +either ``truecolor`` or ``24bit``, it will use a 16 million color representation +(like ``terminal16m``). Next, it will try to find ``256`` is anywhere in the +environment variable ``TERM``, which it will use a 256-color representaion +(such as ``terminal256``). When neither of those are found, it falls back to a +the 16 color representation (like ``terminal``). + +If you want HTML output:: + + $ pygmentize -f html -l python -o test.html test.py + +As you can see, the -l option explicitly selects a lexer. As seen above, if you +give an input file name and it has an extension that Pygments recognizes, you can +omit this option. + +The ``-o`` option gives an output file name. If it is not given, output is +written to stdout. + +The ``-f`` option selects a formatter (as with ``-l``, it can also be omitted +if an output file name is given and has a supported extension). +If no output file name is given and ``-f`` is omitted, the +:class:`.TerminalFormatter` is used. + +The above command could therefore also be given as:: + + $ pygmentize -o test.html test.py + +To create a full HTML document, including line numbers and stylesheet (using the +"emacs" style), highlighting the Python file ``test.py`` to ``test.html``:: + + $ pygmentize -O full,style=emacs,linenos=1 -o test.html test.py + + +Options and filters +------------------- + +Lexer and formatter options can be given using the ``-O`` option:: + + $ pygmentize -f html -O style=colorful,linenos=1 -l python test.py + +Be sure to enclose the option string in quotes if it contains any special shell +characters, such as spaces or expansion wildcards like ``*``. If an option +expects a list value, separate the list entries with spaces (you'll have to +quote the option value in this case too, so that the shell doesn't split it). + +Since the ``-O`` option argument is split at commas and expects the split values +to be of the form ``name=value``, you can't give an option value that contains +commas or equals signs. Therefore, an option ``-P`` is provided (as of Pygments +0.9) that works like ``-O`` but can only pass one option per ``-P``. Its value +can then contain all characters:: + + $ pygmentize -P "heading=Pygments, the Python highlighter" ... + +Filters are added to the token stream using the ``-F`` option:: + + $ pygmentize -f html -l pascal -F keywordcase:case=upper main.pas + +As you see, options for the filter are given after a colon. As for ``-O``, the +filter name and options must be one shell word, so there may not be any spaces +around the colon. + + +Generating styles +----------------- + +Formatters normally don't output full style information. For example, the HTML +formatter by default only outputs ``<span>`` tags with ``class`` attributes. +Therefore, there's a special ``-S`` option for generating style definitions. +Usage is as follows:: + + $ pygmentize -f html -S colorful -a .syntax + +generates a CSS style sheet (because you selected the HTML formatter) for +the "colorful" style prepending a ".syntax" selector to all style rules. + +For an explanation what ``-a`` means for :doc:`a particular formatter +<formatters>`, look for the `arg` argument for the formatter's +:meth:`.get_style_defs()` method. + + +Getting lexer names +------------------- + +.. versionadded:: 1.0 + +The ``-N`` option guesses a lexer name for a given filename, so that :: + + $ pygmentize -N setup.py + +will print out ``python``. It won't highlight anything yet. If no specific +lexer is known for that filename, ``text`` is printed. + +Additionally, there is the ``-C`` option, which is just like like ``-N``, except +that it prints out a lexer name based solely on a given content from standard +input. + + +Guessing the lexer from the file contents +----------------------------------------- + +The ``-g`` option will try to guess the correct lexer from the file contents, +or pass through as plain text if nothing can be guessed. This option also looks +for Vim modelines in the text, and for *some* languages, shebangs. Usage is as +follows:: + + $ pygmentize -g setup.py + +Note though, that this option is not very relaiable, and probably should be +used only if Pygments is not able to guess the correct lexer from the file's +extension. + + +Highlighting stdin until EOF +---------------------------- + +The ``-s`` option processes lines one at a time until EOF, rather than waiting +to process the entire file. This only works for stdin, only for lexers with no +line-spanning constructs, and is intended for streaming input such as you get +from `tail -f`. Usage is as follows:: + + $ tail -f sql.log | pygmentize -s -l sql + + +Custom Lexers and Formatters +---------------------------- + +.. versionadded:: 2.2 + +The ``-x`` flag enables custom lexers and formatters to be loaded +from files relative to the current directory. Create a file with a class named +CustomLexer or CustomFormatter, then specify it on the command line:: + + $ pygmentize -l your_lexer.py -f your_formatter.py -x + +You can also specify the name of your class with a colon:: + + $ pygmentize -l your_lexer.py:SomeLexer -x + +For more information, see :doc:`the Pygments documentation on Lexer development +<lexerdevelopment>`. + + +Getting help +------------ + +The ``-L`` option lists lexers, formatters, along with their short +names and supported file name extensions, styles and filters. If you want to see +only one category, give it as an argument:: + + $ pygmentize -L filters + +will list only all installed filters. + +.. versionadded:: 2.11 + +The ``--json`` option can be used in conjunction with the ``-L`` option to +output it's contents as JSON. Thus, to print all the installed styles and their +description in JSON, use the command:: + + $ pygmentize -L styles --json + +The ``-H`` option will give you detailed information (the same that can be found +in this documentation) about a lexer, formatter or filter. Usage is as follows:: + + $ pygmentize -H formatter html + +will print the help for the HTML formatter, while :: + + $ pygmentize -H lexer python + +will print the help for the Python lexer, etc. + + +A note on encodings +------------------- + +.. versionadded:: 0.9 + +Pygments tries to be smart regarding encodings in the formatting process: + +* If you give an ``encoding`` option, it will be used as the input and + output encoding. + +* If you give an ``outencoding`` option, it will override ``encoding`` + as the output encoding. + +* If you give an ``inencoding`` option, it will override ``encoding`` + as the input encoding. + +* If you don't give an encoding and have given an output file, the default + encoding for lexer and formatter is the terminal encoding or the default + locale encoding of the system. As a last resort, ``latin1`` is used (which + will pass through all non-ASCII characters). + +* If you don't give an encoding and haven't given an output file (that means + output is written to the console), the default encoding for lexer and + formatter is the terminal encoding (``sys.stdout.encoding``). diff --git a/doc/docs/filterdevelopment.rst b/doc/docs/filterdevelopment.rst new file mode 100644 index 0000000..004919e --- /dev/null +++ b/doc/docs/filterdevelopment.rst @@ -0,0 +1,75 @@ +.. -*- mode: rst -*- + +===================== +Write your own filter +===================== + +.. versionadded:: 0.7 + +Writing own filters is very easy. All you have to do is to subclass +the `Filter` class and override the `filter` method. Additionally a +filter is instantiated with some keyword arguments you can use to +adjust the behavior of your filter. + + +Subclassing Filters +=================== + +As an example, we write a filter that converts all `Name.Function` tokens +to normal `Name` tokens to make the output less colorful. + +.. sourcecode:: python + + from pygments.util import get_bool_opt + from pygments.token import Name + from pygments.filter import Filter + + class UncolorFilter(Filter): + + def __init__(self, **options): + Filter.__init__(self, **options) + self.class_too = get_bool_opt(options, 'classtoo') + + def filter(self, lexer, stream): + for ttype, value in stream: + if ttype is Name.Function or (self.class_too and + ttype is Name.Class): + ttype = Name + yield ttype, value + +Some notes on the `lexer` argument: that can be quite confusing since it doesn't +need to be a lexer instance. If a filter was added by using the `add_filter()` +function of lexers, that lexer is registered for the filter. In that case +`lexer` will refer to the lexer that has registered the filter. It *can* be used +to access options passed to a lexer. Because it could be `None` you always have +to check for that case if you access it. + + +Using a decorator +================= + +You can also use the `simplefilter` decorator from the `pygments.filter` module: + +.. sourcecode:: python + + from pygments.util import get_bool_opt + from pygments.token import Name + from pygments.filter import simplefilter + + + @simplefilter + def uncolor(self, lexer, stream, options): + class_too = get_bool_opt(options, 'classtoo') + for ttype, value in stream: + if ttype is Name.Function or (class_too and + ttype is Name.Class): + ttype = Name + yield ttype, value + + +You can instantiate this filter by calling `uncolor(classtoo=True)`, the same +way that you would have instantiated the previous filter by calling +`UncolorFilter(classtoo=True)`. Indeed, The decorator automatically ensures that +`uncolor` is a class which subclasses an internal filter class. The class +`uncolo` uses the decorated function as a method for filtering. (That's why +there is a `self` argument that you probably won't end up using in the method.) diff --git a/doc/docs/filters.rst b/doc/docs/filters.rst new file mode 100644 index 0000000..5cdcb4c --- /dev/null +++ b/doc/docs/filters.rst @@ -0,0 +1,48 @@ +.. -*- mode: rst -*- + +======= +Filters +======= + +.. versionadded:: 0.7 + +Transforming a stream of tokens into another stream is called "filtering" and is +done by filters. The most common example of filters transform each token by +applying a simple rules such as highlighting the token if it is a TODO or +another special word, or converting keywords to uppercase to enforce a style +guide. More complex filters can transform the stream of tokens, such as removing +the line indentation or merging tokens together. It should be noted that pygments +filters are entirely unrelated to Python's `filter +<https://docs.python.org/3/library/functions.html#filter>`_. + +An arbitrary number of filters can be applied to token streams coming from +lexers to improve or annotate the output. To apply a filter, you can use the +`add_filter()` method of a lexer: + +.. sourcecode:: pycon + + >>> from pygments.lexers import PythonLexer + >>> l = PythonLexer() + >>> # add a filter given by a string and options + >>> l.add_filter('codetagify', case='lower') + >>> l.filters + [<pygments.filters.CodeTagFilter object at 0xb785decc>] + >>> from pygments.filters import KeywordCaseFilter + >>> # or give an instance + >>> l.add_filter(KeywordCaseFilter(case='lower')) + +The `add_filter()` method takes keyword arguments which are forwarded to +the constructor of the filter. + +To get a list of all registered filters by name, you can use the +`get_all_filters()` function from the `pygments.filters` module that returns an +iterable for all known filters. + +If you want to write your own filter, have a look at :doc:`Write your own filter +<filterdevelopment>`. + + +Builtin Filters +=============== + +.. pygmentsdoc:: filters diff --git a/doc/docs/formatterdevelopment.rst b/doc/docs/formatterdevelopment.rst new file mode 100644 index 0000000..2bfac05 --- /dev/null +++ b/doc/docs/formatterdevelopment.rst @@ -0,0 +1,169 @@ +.. -*- mode: rst -*- + +======================== +Write your own formatter +======================== + +As well as creating :doc:`your own lexer <lexerdevelopment>`, writing a new +formatter for Pygments is easy and straightforward. + +A formatter is a class that is initialized with some keyword arguments (the +formatter options) and that must provides a `format()` method. +Additionally a formatter should provide a `get_style_defs()` method that +returns the style definitions from the style in a form usable for the +formatter's output format. + + +Quickstart +========== + +The most basic formatter shipped with Pygments is the `NullFormatter`. It just +sends the value of a token to the output stream: + +.. sourcecode:: python + + from pygments.formatter import Formatter + + class NullFormatter(Formatter): + def format(self, tokensource, outfile): + for ttype, value in tokensource: + outfile.write(value) + +As you can see, the `format()` method is passed two parameters: `tokensource` +and `outfile`. The first is an iterable of ``(token_type, value)`` tuples, +the latter a file like object with a `write()` method. + +Because the formatter is that basic it doesn't overwrite the `get_style_defs()` +method. + + +Styles +====== + +Styles aren't instantiated but their metaclass provides some class functions +so that you can access the style definitions easily. + +Styles are iterable and yield tuples in the form ``(ttype, d)`` where `ttype` +is a token and `d` is a dict with the following keys: + +``'color'`` + Hexadecimal color value (eg: ``'ff0000'`` for red) or `None` if not + defined. + +``'bold'`` + `True` if the value should be bold + +``'italic'`` + `True` if the value should be italic + +``'underline'`` + `True` if the value should be underlined + +``'bgcolor'`` + Hexadecimal color value for the background (eg: ``'eeeeeee'`` for light + gray) or `None` if not defined. + +``'border'`` + Hexadecimal color value for the border (eg: ``'0000aa'`` for a dark + blue) or `None` for no border. + +Additional keys might appear in the future, formatters should ignore all keys +they don't support. + + +HTML 3.2 Formatter +================== + +For an more complex example, let's implement a HTML 3.2 Formatter. We don't +use CSS but inline markup (``<u>``, ``<font>``, etc). Because this isn't good +style this formatter isn't in the standard library ;-) + +.. sourcecode:: python + + from pygments.formatter import Formatter + + class OldHtmlFormatter(Formatter): + + def __init__(self, **options): + Formatter.__init__(self, **options) + + # create a dict of (start, end) tuples that wrap the + # value of a token so that we can use it in the format + # method later + self.styles = {} + + # we iterate over the `_styles` attribute of a style item + # that contains the parsed style values. + for token, style in self.style: + start = end = '' + # a style item is a tuple in the following form: + # colors are readily specified in hex: 'RRGGBB' + if style['color']: + start += '<font color="#%s">' % style['color'] + end = '</font>' + end + if style['bold']: + start += '<b>' + end = '</b>' + end + if style['italic']: + start += '<i>' + end = '</i>' + end + if style['underline']: + start += '<u>' + end = '</u>' + end + self.styles[token] = (start, end) + + def format(self, tokensource, outfile): + # lastval is a string we use for caching + # because it's possible that an lexer yields a number + # of consecutive tokens with the same token type. + # to minimize the size of the generated html markup we + # try to join the values of same-type tokens here + lastval = '' + lasttype = None + + # wrap the whole output with <pre> + outfile.write('<pre>') + + for ttype, value in tokensource: + # if the token type doesn't exist in the stylemap + # we try it with the parent of the token type + # eg: parent of Token.Literal.String.Double is + # Token.Literal.String + while ttype not in self.styles: + ttype = ttype.parent + if ttype == lasttype: + # the current token type is the same of the last + # iteration. cache it + lastval += value + else: + # not the same token as last iteration, but we + # have some data in the buffer. wrap it with the + # defined style and write it to the output file + if lastval: + stylebegin, styleend = self.styles[lasttype] + outfile.write(stylebegin + lastval + styleend) + # set lastval/lasttype to current values + lastval = value + lasttype = ttype + + # if something is left in the buffer, write it to the + # output file, then close the opened <pre> tag + if lastval: + stylebegin, styleend = self.styles[lasttype] + outfile.write(stylebegin + lastval + styleend) + outfile.write('</pre>\n') + +The comments should explain it. Again, this formatter doesn't override the +`get_style_defs()` method. If we would have used CSS classes instead of +inline HTML markup, we would need to generate the CSS first. For that +purpose the `get_style_defs()` method exists: + + +Generating Style Definitions +============================ + +Some formatters like the `LatexFormatter` and the `HtmlFormatter` don't +output inline markup but reference either macros or css classes. Because +the definitions of those are not part of the output, the `get_style_defs()` +method exists. It is passed one parameter (if it's used and how it's used +is up to the formatter) and has to return a string or ``None``. diff --git a/doc/docs/formatters.rst b/doc/docs/formatters.rst new file mode 100644 index 0000000..9e7074e --- /dev/null +++ b/doc/docs/formatters.rst @@ -0,0 +1,48 @@ +.. -*- mode: rst -*- + +==================== +Available formatters +==================== + +This page lists all builtin formatters. + +Common options +============== + +All formatters support these options: + +`encoding` + If given, must be an encoding name (such as ``"utf-8"``). This will + be used to convert the token strings (which are Unicode strings) + to byte strings in the output (default: ``None``). + It will also be written in an encoding declaration suitable for the + document format if the `full` option is given (e.g. a ``meta + content-type`` directive in HTML or an invocation of the `inputenc` + package in LaTeX). + + If this is ``""`` or ``None``, Unicode strings will be written + to the output file, which most file-like objects do not support. + For example, `pygments.highlight()` will return a Unicode string if + called with no `outfile` argument and a formatter that has `encoding` + set to ``None`` because it uses a `StringIO.StringIO` object that + supports Unicode arguments to `write()`. Using a regular file object + wouldn't work. + + .. versionadded:: 0.6 + +`outencoding` + When using Pygments from the command line, any `encoding` option given is + passed to the lexer and the formatter. This is sometimes not desirable, + for example if you want to set the input encoding to ``"guess"``. + Therefore, `outencoding` has been introduced which overrides `encoding` + for the formatter if given. + + .. versionadded:: 0.7 + + +Formatter classes +================= + +All these classes are importable from :mod:`pygments.formatters`. + +.. pygmentsdoc:: formatters diff --git a/doc/docs/index.rst b/doc/docs/index.rst new file mode 100644 index 0000000..d35fe6f --- /dev/null +++ b/doc/docs/index.rst @@ -0,0 +1,64 @@ +Pygments documentation +====================== + +**Starting with Pygments** + +.. toctree:: + :maxdepth: 1 + + ../download + quickstart + cmdline + +**Builtin components** + +.. toctree:: + :maxdepth: 1 + + lexers + filters + formatters + styles + +**Reference** + +.. toctree:: + :maxdepth: 1 + + unicode + tokens + api + terminal-sessions + +**Hacking for Pygments** + +.. toctree:: + :maxdepth: 1 + + lexerdevelopment + formatterdevelopment + filterdevelopment + styledevelopment + plugins + +**Hints and tricks** + +.. toctree:: + :maxdepth: 1 + + rstdirective + moinmoin + java + integrate + +**About Pygments** + +.. toctree:: + :maxdepth: 1 + + changelog + authors + security + +If you find bugs or have suggestions for the documentation, please submit them +on `GitHub <https://github.com/pygments/pygments>`_. diff --git a/doc/docs/integrate.rst b/doc/docs/integrate.rst new file mode 100644 index 0000000..2a030b7 --- /dev/null +++ b/doc/docs/integrate.rst @@ -0,0 +1,40 @@ +.. -*- mode: rst -*- + +=================================== +Using Pygments in various scenarios +=================================== + +Markdown +-------- + +Since Pygments 0.9, the distribution ships Markdown_ preprocessor sample code +that uses Pygments to render source code in +:file:`external/markdown-processor.py`. You can copy and adapt it to your +liking. + +.. _Markdown: https://pypi.org/project/Markdown/ + +TextMate +-------- + +Antonio Cangiano has created a Pygments bundle for TextMate that allows to +colorize code via a simple menu option. It can be found here_. + +.. _here: https://programmingzen.com/pygments-textmate-bundle/ + +Bash completion +--------------- + +The source distribution contains a file ``external/pygments.bashcomp`` that +sets up completion for the ``pygmentize`` command in bash. + +Wrappers for other languages +---------------------------- + +These libraries provide Pygments highlighting for users of other languages +than Python: + +* `pygments.rb <https://github.com/pygments/pygments.rb>`_, a pygments wrapper for Ruby +* `Clygments <https://github.com/bfontaine/clygments>`_, a pygments wrapper for + Clojure +* `PHPygments <https://github.com/capynet/PHPygments>`_, a pygments wrapper for PHP diff --git a/doc/docs/java.rst b/doc/docs/java.rst new file mode 100644 index 0000000..a8a5beb --- /dev/null +++ b/doc/docs/java.rst @@ -0,0 +1,70 @@ +===================== +Use Pygments in Java +===================== + +Thanks to `Jython <https://www.jython.org/>`_ it is possible to use Pygments in +Java. + +This page is a simple tutorial to get an idea of how this works. You can +then look at the `Jython documentation <https://jython.readthedocs.io/en/latest/>`_ for more +advanced uses. + +Since version 1.5, Pygments is deployed on `Maven Central +<https://repo1.maven.org/maven2/org/pygments/pygments/>`_ as a JAR, as is Jython +which makes it a lot easier to create a Java project. + +Here is an example of a `Maven <https://maven.apache.org/>`_ ``pom.xml`` file for a +project running Pygments: + +.. sourcecode:: xml + + <?xml version="1.0" encoding="UTF-8"?> + + <project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 + http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> + <groupId>example</groupId> + <artifactId>example</artifactId> + <version>1.0-SNAPSHOT</version> + <dependencies> + <dependency> + <groupId>org.python</groupId> + <artifactId>jython-standalone</artifactId> + <version>2.5.3</version> + </dependency> + <dependency> + <groupId>org.pygments</groupId> + <artifactId>pygments</artifactId> + <version>1.5</version> + <scope>runtime</scope> + </dependency> + </dependencies> + </project> + +The following Java example: + +.. sourcecode:: java + + PythonInterpreter interpreter = new PythonInterpreter(); + + // Set a variable with the content you want to work with + interpreter.set("code", code); + + // Simple use Pygments as you would in Python + interpreter.exec("from pygments import highlight\n" + + "from pygments.lexers import PythonLexer\n" + + "from pygments.formatters import HtmlFormatter\n" + + "\nresult = highlight(code, PythonLexer(), HtmlFormatter())"); + + // Get the result that has been set in a variable + System.out.println(interpreter.get("result", String.class)); + +will print something like: + +.. sourcecode:: html + + <div class="highlight"> + <pre><span class="k">print</span> <span class="s">"Hello World"</span></pre> + </div> diff --git a/doc/docs/lexerdevelopment.rst b/doc/docs/lexerdevelopment.rst new file mode 100644 index 0000000..354b1d4 --- /dev/null +++ b/doc/docs/lexerdevelopment.rst @@ -0,0 +1,748 @@ +.. -*- mode: rst -*- + +.. highlight:: python + +==================== +Write your own lexer +==================== + +If a lexer for your favorite language is missing in the Pygments package, you +can easily write your own and extend Pygments. + +All you need can be found inside the :mod:`pygments.lexer` module. As you can +read in the :doc:`API documentation <api>`, a lexer is a class that is +initialized with some keyword arguments (the lexer options) and that provides a +:meth:`.get_tokens_unprocessed()` method which is given a string or unicode +object with the data to lex. + +The :meth:`.get_tokens_unprocessed()` method must return an iterator or iterable +containing tuples in the form ``(index, token, value)``. Normally you don't +need to do this since there are base lexers that do most of the work and that +you can subclass. + +RegexLexer +========== + +The lexer base class used by almost all of Pygments' lexers is the +:class:`RegexLexer`. This class allows you to define lexing rules in terms of +*regular expressions* for different *states*. + +States are groups of regular expressions that are matched against the input +string at the *current position*. If one of these expressions matches, a +corresponding action is performed (such as yielding a token with a specific +type, or changing state), the current position is set to where the last match +ended and the matching process continues with the first regex of the current +state. + +Lexer states are kept on a stack: each time a new state is entered, the new +state is pushed onto the stack. The most basic lexers (like the `DiffLexer`) +just need one state. + +Each state is defined as a list of tuples in the form (`regex`, `action`, +`new_state`) where the last item is optional. In the most basic form, `action` +is a token type (like `Name.Builtin`). That means: When `regex` matches, emit a +token with the match text and type `tokentype` and push `new_state` on the state +stack. If the new state is ``'#pop'``, the topmost state is popped from the +stack instead. To pop more than one state, use ``'#pop:2'`` and so on. +``'#push'`` is a synonym for pushing a second time the current state on top of +the stack. + +The following example shows the `DiffLexer` from the builtin lexers. Note that +it contains some additional attributes `name`, `aliases` and `filenames` which +aren't required for a lexer. They are used by the builtin lexer lookup +functions. :: + + from pygments.lexer import RegexLexer + from pygments.token import * + + class DiffLexer(RegexLexer): + name = 'Diff' + aliases = ['diff'] + filenames = ['*.diff'] + + tokens = { + 'root': [ + (r' .*\n', Text), + (r'\+.*\n', Generic.Inserted), + (r'-.*\n', Generic.Deleted), + (r'@.*\n', Generic.Subheading), + (r'Index.*\n', Generic.Heading), + (r'=.*\n', Generic.Heading), + (r'.*\n', Text), + ] + } + +As you can see this lexer only uses one state. When the lexer starts scanning +the text, it first checks if the current character is a space. If this is true +it scans everything until newline and returns the data as a `Text` token (which +is the "no special highlighting" token). + +If this rule doesn't match, it checks if the current char is a plus sign. And +so on. + +If no rule matches at the current position, the current char is emitted as an +`Error` token that indicates a lexing error, and the position is increased by +one. + + +Adding and testing a new lexer +============================== + +The easiest way to use a new lexer is to use Pygments' support for loading +the lexer from a file relative to your current directory. + +First, change the name of your lexer class to CustomLexer: + +.. code-block:: python + + from pygments.lexer import RegexLexer + from pygments.token import * + + class CustomLexer(RegexLexer): + """All your lexer code goes here!""" + +Then you can load and test the lexer from the command line with the additional +flag ``-x``: + +.. code-block:: console + + $ python -m pygments -x -l your_lexer_file.py <inputfile> + +To specify a class name other than CustomLexer, append it with a colon: + +.. code-block:: console + + $ python -m pygments -x -l your_lexer.py:SomeLexer <inputfile> + +Or, using the Python API: + +.. code-block:: python + + # For a lexer named CustomLexer + your_lexer = load_lexer_from_file(filename, **options) + + # For a lexer named MyNewLexer + your_named_lexer = load_lexer_from_file(filename, "MyNewLexer", **options) + +When loading custom lexers and formatters, be extremely careful to use only +trusted files; Pygments will perform the equivalent of ``eval`` on them. + +If you only want to use your lexer with the Pygments API, you can import and +instantiate the lexer yourself, then pass it to :func:`pygments.highlight`. + +Use the ``-f`` flag to select a different output format than terminal +escape sequences. The :class:`pygments.formatters.html.HtmlFormatter` helps +you with debugging your lexer. You can use the ``debug_token_types`` option +to display the token types assigned to each part of your input file: + +.. code-block:: console + + $ python -m pygments -x -f html -Ofull,debug_token_types -l your_lexer.py:SomeLexer <inputfile> + +Hover over each token to see the token type displayed as a tooltip. + +To prepare your new lexer for inclusion in the Pygments distribution, so that it +will be found when passing filenames or lexer aliases from the command line, you +have to perform the following steps. + +First, change to the current directory containing the Pygments source code. You +will need to have either an unpacked source tarball, or (preferably) a copy +cloned from GitHub. + +.. code-block:: console + + $ cd pygments + +Select a matching module under ``pygments/lexers``, or create a new module for +your lexer class. + +.. note:: + + We encourage you to put your lexer class into its own module, unless it's a + very small derivative of an already existing lexer. + +Next, make sure the lexer is known from outside of the module. All modules in +the ``pygments.lexers`` package specify ``__all__``. For example, +``esoteric.py`` sets:: + + __all__ = ['BrainfuckLexer', 'BefungeLexer', ...] + +Add the name of your lexer class to this list (or create the list if your lexer +is the only class in the module). + +Finally the lexer can be made publicly known by rebuilding the lexer mapping. +In the root directory of the source (where the ``Makefile`` is located), run: + +.. code-block:: console + + $ make mapfiles + +To test the new lexer, store an example file in +``tests/examplefiles/<alias>``. For example, to test your +``DiffLexer``, add a ``tests/examplefiles/diff/example.diff`` containing a +sample diff output. To (re)generate the lexer output which the file is checked +against, use the command ``pytest tests/examplefiles/diff --update-goldens``. + +Now you can use ``python -m pygments`` from the current root of the checkout to +render your example to HTML: + +.. code-block:: console + + $ python -m pygments -O full -f html -o /tmp/example.html tests/examplefiles/diff/example.diff + +Note that this explicitly calls the ``pygments`` module in the current +directory. This ensures your modifications are used. Otherwise a possibly +already installed, unmodified version without your new lexer would have been +called from the system search path (``$PATH``). + +To view the result, open ``/tmp/example.html`` in your browser. + +Once the example renders as expected, you should run the complete test suite: + +.. code-block:: console + + $ make test + +It also tests that your lexer fulfills the lexer API and certain invariants, +such as that the concatenation of all token text is the same as the input text. + + +Regex Flags +=========== + +You can either define regex flags locally in the regex (``r'(?x)foo bar'``) or +globally by adding a `flags` attribute to your lexer class. If no attribute is +defined, it defaults to `re.MULTILINE`. For more information about regular +expression flags see the page about `regular expressions`_ in the Python +documentation. + +.. _regular expressions: https://docs.python.org/library/re.html#regular-expression-syntax + + +Scanning multiple tokens at once +================================ + +So far, the `action` element in the rule tuple of regex, action and state has +been a single token type. Now we look at the first of several other possible +values. + +Here is a more complex lexer that highlights INI files. INI files consist of +sections, comments and ``key = value`` pairs:: + + from pygments.lexer import RegexLexer, bygroups + from pygments.token import * + + class IniLexer(RegexLexer): + name = 'INI' + aliases = ['ini', 'cfg'] + filenames = ['*.ini', '*.cfg'] + + tokens = { + 'root': [ + (r'\s+', Text), + (r';.*?$', Comment), + (r'\[.*?\]$', Keyword), + (r'(.*?)(\s*)(=)(\s*)(.*?)$', + bygroups(Name.Attribute, Text, Operator, Text, String)) + ] + } + +The lexer first looks for whitespace, comments and section names. Later it +looks for a line that looks like a key, value pair, separated by an ``'='`` +sign, and optional whitespace. + +The `bygroups` helper yields each capturing group in the regex with a different +token type. First the `Name.Attribute` token, then a `Text` token for the +optional whitespace, after that a `Operator` token for the equals sign. Then a +`Text` token for the whitespace again. The rest of the line is returned as +`String`. + +Note that for this to work, every part of the match must be inside a capturing +group (a ``(...)``), and there must not be any nested capturing groups. If you +nevertheless need a group, use a non-capturing group defined using this syntax: +``(?:some|words|here)`` (note the ``?:`` after the beginning parenthesis). + +If you find yourself needing a capturing group inside the regex which shouldn't +be part of the output but is used in the regular expressions for backreferencing +(eg: ``r'(<(foo|bar)>)(.*?)(</\2>)'``), you can pass `None` to the bygroups +function and that group will be skipped in the output. + + +Changing states +=============== + +Many lexers need multiple states to work as expected. For example, some +languages allow multiline comments to be nested. Since this is a recursive +pattern it's impossible to lex just using regular expressions. + +Here is a lexer that recognizes C++ style comments (multi-line with ``/* */`` +and single-line with ``//`` until end of line):: + + from pygments.lexer import RegexLexer + from pygments.token import * + + class CppCommentLexer(RegexLexer): + name = 'Example Lexer with states' + + tokens = { + 'root': [ + (r'[^/]+', Text), + (r'/\*', Comment.Multiline, 'comment'), + (r'//.*?$', Comment.Singleline), + (r'/', Text) + ], + 'comment': [ + (r'[^*/]+', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline) + ] + } + +This lexer starts lexing in the ``'root'`` state. It tries to match as much as +possible until it finds a slash (``'/'``). If the next character after the slash +is an asterisk (``'*'``) the `RegexLexer` sends those two characters to the +output stream marked as `Comment.Multiline` and continues lexing with the rules +defined in the ``'comment'`` state. + +If there wasn't an asterisk after the slash, the `RegexLexer` checks if it's a +Singleline comment (i.e. followed by a second slash). If this also wasn't the +case it must be a single slash, which is not a comment starter (the separate +regex for a single slash must also be given, else the slash would be marked as +an error token). + +Inside the ``'comment'`` state, we do the same thing again. Scan until the +lexer finds a star or slash. If it's the opening of a multiline comment, push +the ``'comment'`` state on the stack and continue scanning, again in the +``'comment'`` state. Else, check if it's the end of the multiline comment. If +yes, pop one state from the stack. + +Note: If you pop from an empty stack you'll get an `IndexError`. (There is an +easy way to prevent this from happening: don't ``'#pop'`` in the root state). + +If the `RegexLexer` encounters a newline that is flagged as an error token, the +stack is emptied and the lexer continues scanning in the ``'root'`` state. This +can help producing error-tolerant highlighting for erroneous input, e.g. when a +single-line string is not closed. + + +Advanced state tricks +===================== + +There are a few more things you can do with states: + +- You can push multiple states onto the stack if you give a tuple instead of a + simple string as the third item in a rule tuple. For example, if you want to + match a comment containing a directive, something like: + + .. code-block:: text + + /* <processing directive> rest of comment */ + + you can use this rule:: + + tokens = { + 'root': [ + (r'/\* <', Comment, ('comment', 'directive')), + ... + ], + 'directive': [ + (r'[^>]+', Comment.Directive), + (r'>', Comment, '#pop'), + ], + 'comment': [ + (r'[^*]+', Comment), + (r'\*/', Comment, '#pop'), + (r'\*', Comment), + ] + } + + When this encounters the above sample, first ``'comment'`` and ``'directive'`` + are pushed onto the stack, then the lexer continues in the directive state + until it finds the closing ``>``, then it continues in the comment state until + the closing ``*/``. Then, both states are popped from the stack again and + lexing continues in the root state. + + .. versionadded:: 0.9 + The tuple can contain the special ``'#push'`` and ``'#pop'`` (but not + ``'#pop:n'``) directives. + + +- You can include the rules of a state in the definition of another. This is + done by using `include` from `pygments.lexer`:: + + from pygments.lexer import RegexLexer, bygroups, include + from pygments.token import * + + class ExampleLexer(RegexLexer): + tokens = { + 'comments': [ + (r'(?s)/\*.*?\*/', Comment), + (r'//.*?\n', Comment), + ], + 'root': [ + include('comments'), + (r'(function)( )(\w+)( )({)', + bygroups(Keyword, Whitespace, Name, Whitespace, Punctuation), 'function'), + (r'.*\n', Text), + ], + 'function': [ + (r'[^}/]+', Text), + include('comments'), + (r'/', Text), + (r'\}', Punctuation, '#pop'), + ] + } + + This is a hypothetical lexer for a language that consist of functions and + comments. Because comments can occur at toplevel and in functions, we need + rules for comments in both states. As you can see, the `include` helper saves + repeating rules that occur more than once (in this example, the state + ``'comment'`` will never be entered by the lexer, as it's only there to be + included in ``'root'`` and ``'function'``). + +- Sometimes, you may want to "combine" a state from existing ones. This is + possible with the `combined` helper from `pygments.lexer`. + + If you, instead of a new state, write ``combined('state1', 'state2')`` as the + third item of a rule tuple, a new anonymous state will be formed from state1 + and state2 and if the rule matches, the lexer will enter this state. + + This is not used very often, but can be helpful in some cases, such as the + `PythonLexer`'s string literal processing. + +- If you want your lexer to start lexing in a different state you can modify the + stack by overriding the `get_tokens_unprocessed()` method:: + + from pygments.lexer import RegexLexer + + class ExampleLexer(RegexLexer): + tokens = {...} + + def get_tokens_unprocessed(self, text, stack=('root', 'otherstate')): + for item in RegexLexer.get_tokens_unprocessed(self, text, stack): + yield item + + Some lexers like the `PhpLexer` use this to make the leading ``<?php`` + preprocessor comments optional. Note that you can crash the lexer easily by + putting values into the stack that don't exist in the token map. Also + removing ``'root'`` from the stack can result in strange errors! + +- In some lexers, a state should be popped if anything is encountered that isn't + matched by a rule in the state. You could use an empty regex at the end of + the state list, but Pygments provides a more obvious way of spelling that: + ``default('#pop')`` is equivalent to ``('', Text, '#pop')``. + + .. versionadded:: 2.0 + + +Subclassing lexers derived from RegexLexer +========================================== + +.. versionadded:: 1.6 + +Sometimes multiple languages are very similar, but should still be lexed by +different lexer classes. + +When subclassing a lexer derived from RegexLexer, the ``tokens`` dictionaries +defined in the parent and child class are merged. For example:: + + from pygments.lexer import RegexLexer, inherit + from pygments.token import * + + class BaseLexer(RegexLexer): + tokens = { + 'root': [ + ('[a-z]+', Name), + (r'/\*', Comment, 'comment'), + ('"', String, 'string'), + (r'\s+', Text), + ], + 'string': [ + ('[^"]+', String), + ('"', String, '#pop'), + ], + 'comment': [ + ... + ], + } + + class DerivedLexer(BaseLexer): + tokens = { + 'root': [ + ('[0-9]+', Number), + inherit, + ], + 'string': [ + (r'[^"\\]+', String), + (r'\\.', String.Escape), + ('"', String, '#pop'), + ], + } + +The `BaseLexer` defines two states, lexing names and strings. The +`DerivedLexer` defines its own tokens dictionary, which extends the definitions +of the base lexer: + +* The "root" state has an additional rule and then the special object `inherit`, + which tells Pygments to insert the token definitions of the parent class at + that point. + +* The "string" state is replaced entirely, since there is not `inherit` rule. + +* The "comment" state is inherited entirely. + + +Using multiple lexers +===================== + +Using multiple lexers for the same input can be tricky. One of the easiest +combination techniques is shown here: You can replace the action entry in a rule +tuple with a lexer class. The matched text will then be lexed with that lexer, +and the resulting tokens will be yielded. + +For example, look at this stripped-down HTML lexer:: + + from pygments.lexer import RegexLexer, bygroups, using + from pygments.token import * + from pygments.lexers.javascript import JavascriptLexer + + class HtmlLexer(RegexLexer): + name = 'HTML' + aliases = ['html'] + filenames = ['*.html', '*.htm'] + + flags = re.IGNORECASE | re.DOTALL + tokens = { + 'root': [ + ('[^<&]+', Text), + ('&.*?;', Name.Entity), + (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')), + (r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'), + (r'<\s*/\s*[a-zA-Z0-9:]+\s*>', Name.Tag), + ], + 'script-content': [ + (r'(.+?)(<\s*/\s*script\s*>)', + bygroups(using(JavascriptLexer), Name.Tag), + '#pop'), + ] + } + +Here the content of a ``<script>`` tag is passed to a newly created instance of +a `JavascriptLexer` and not processed by the `HtmlLexer`. This is done using +the `using` helper that takes the other lexer class as its parameter. + +Note the combination of `bygroups` and `using`. This makes sure that the +content up to the ``</script>`` end tag is processed by the `JavascriptLexer`, +while the end tag is yielded as a normal token with the `Name.Tag` type. + +Also note the ``(r'<\s*script\s*', Name.Tag, ('script-content', 'tag'))`` rule. +Here, two states are pushed onto the state stack, ``'script-content'`` and +``'tag'``. That means that first ``'tag'`` is processed, which will lex +attributes and the closing ``>``, then the ``'tag'`` state is popped and the +next state on top of the stack will be ``'script-content'``. + +Since you cannot refer to the class currently being defined, use `this` +(imported from `pygments.lexer`) to refer to the current lexer class, i.e. +``using(this)``. This construct may seem unnecessary, but this is often the +most obvious way of lexing arbitrary syntax between fixed delimiters without +introducing deeply nested states. + +The `using()` helper has a special keyword argument, `state`, which works as +follows: if given, the lexer to use initially is not in the ``"root"`` state, +but in the state given by this argument. This does not work with advanced +`RegexLexer` subclasses such as `ExtendedRegexLexer` (see below). + +Any other keywords arguments passed to `using()` are added to the keyword +arguments used to create the lexer. + + +Delegating Lexer +================ + +Another approach for nested lexers is the `DelegatingLexer` which is for example +used for the template engine lexers. It takes two lexers as arguments on +initialisation: a `root_lexer` and a `language_lexer`. + +The input is processed as follows: First, the whole text is lexed with the +`language_lexer`. All tokens yielded with the special type of ``Other`` are +then concatenated and given to the `root_lexer`. The language tokens of the +`language_lexer` are then inserted into the `root_lexer`'s token stream at the +appropriate positions. :: + + from pygments.lexer import DelegatingLexer + from pygments.lexers.web import HtmlLexer, PhpLexer + + class HtmlPhpLexer(DelegatingLexer): + def __init__(self, **options): + super().__init__(HtmlLexer, PhpLexer, **options) + +This procedure ensures that e.g. HTML with template tags in it is highlighted +correctly even if the template tags are put into HTML tags or attributes. + +If you want to change the needle token ``Other`` to something else, you can give +the lexer another token type as the third parameter:: + + DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options) + + +Callbacks +========= + +Sometimes the grammar of a language is so complex that a lexer would be unable +to process it just by using regular expressions and stacks. + +For this, the `RegexLexer` allows callbacks to be given in rule tuples, instead +of token types (`bygroups` and `using` are nothing else but preimplemented +callbacks). The callback must be a function taking two arguments: + +* the lexer itself +* the match object for the last matched rule + +The callback must then return an iterable of (or simply yield) ``(index, +tokentype, value)`` tuples, which are then just passed through by +`get_tokens_unprocessed()`. The ``index`` here is the position of the token in +the input string, ``tokentype`` is the normal token type (like `Name.Builtin`), +and ``value`` the associated part of the input string. + +You can see an example here:: + + from pygments.lexer import RegexLexer + from pygments.token import Generic + + class HypotheticLexer(RegexLexer): + + def headline_callback(lexer, match): + equal_signs = match.group(1) + text = match.group(2) + yield match.start(), Generic.Headline, equal_signs + text + equal_signs + + tokens = { + 'root': [ + (r'(=+)(.*?)(\1)', headline_callback) + ] + } + +If the regex for the `headline_callback` matches, the function is called with +the match object. Note that after the callback is done, processing continues +normally, that is, after the end of the previous match. The callback has no +possibility to influence the position. + +There are not really any simple examples for lexer callbacks, but you can see +them in action e.g. in the `SMLLexer` class in `ml.py`_. + +.. _ml.py: https://github.com/pygments/pygments/blob/master/pygments/lexers/ml.py + + +The ExtendedRegexLexer class +============================ + +The `RegexLexer`, even with callbacks, unfortunately isn't powerful enough for +the funky syntax rules of languages such as Ruby. + +But fear not; even then you don't have to abandon the regular expression +approach: Pygments has a subclass of `RegexLexer`, the `ExtendedRegexLexer`. +All features known from RegexLexers are available here too, and the tokens are +specified in exactly the same way, *except* for one detail: + +The `get_tokens_unprocessed()` method holds its internal state data not as local +variables, but in an instance of the `pygments.lexer.LexerContext` class, and +that instance is passed to callbacks as a third argument. This means that you +can modify the lexer state in callbacks. + +The `LexerContext` class has the following members: + +* `text` -- the input text +* `pos` -- the current starting position that is used for matching regexes +* `stack` -- a list containing the state stack +* `end` -- the maximum position to which regexes are matched, this defaults to + the length of `text` + +Additionally, the `get_tokens_unprocessed()` method can be given a +`LexerContext` instead of a string and will then process this context instead of +creating a new one for the string argument. + +Note that because you can set the current position to anything in the callback, +it won't be automatically be set by the caller after the callback is finished. +For example, this is how the hypothetical lexer above would be written with the +`ExtendedRegexLexer`:: + + from pygments.lexer import ExtendedRegexLexer + from pygments.token import Generic + + class ExHypotheticLexer(ExtendedRegexLexer): + + def headline_callback(lexer, match, ctx): + equal_signs = match.group(1) + text = match.group(2) + yield match.start(), Generic.Headline, equal_signs + text + equal_signs + ctx.pos = match.end() + + tokens = { + 'root': [ + (r'(=+)(.*?)(\1)', headline_callback) + ] + } + +This might sound confusing (and it can really be). But it is needed, and for an +example look at the Ruby lexer in `ruby.py`_. + +.. _ruby.py: https://github.com/pygments/pygments/blob/master/pygments/lexers/ruby.py + + +Handling Lists of Keywords +========================== + +For a relatively short list (hundreds) you can construct an optimized regular +expression directly using ``words()`` (longer lists, see next section). This +function handles a few things for you automatically, including escaping +metacharacters and Python's first-match rather than longest-match in +alternations. Feel free to put the lists themselves in +``pygments/lexers/_$lang_builtins.py`` (see examples there), and generated by +code if possible. + +An example of using ``words()`` is something like:: + + from pygments.lexer import RegexLexer, words, Name + + class MyLexer(RegexLexer): + + tokens = { + 'root': [ + (words(('else', 'elseif'), suffix=r'\b'), Name.Builtin), + (r'\w+', Name), + ], + } + +As you can see, you can add ``prefix`` and ``suffix`` parts to the constructed +regex. + + +Modifying Token Streams +======================= + +Some languages ship a lot of builtin functions (for example PHP). The total +amount of those functions differs from system to system because not everybody +has every extension installed. In the case of PHP there are over 3000 builtin +functions. That's an incredibly huge amount of functions, much more than you +want to put into a regular expression. + +But because only `Name` tokens can be function names this is solvable by +overriding the ``get_tokens_unprocessed()`` method. The following lexer +subclasses the `PythonLexer` so that it highlights some additional names as +pseudo keywords:: + + from pygments.lexers.python import PythonLexer + from pygments.token import Name, Keyword + + class MyPythonLexer(PythonLexer): + EXTRA_KEYWORDS = set(('foo', 'bar', 'foobar', 'barfoo', 'spam', 'eggs')) + + def get_tokens_unprocessed(self, text): + for index, token, value in PythonLexer.get_tokens_unprocessed(self, text): + if token is Name and value in self.EXTRA_KEYWORDS: + yield index, Keyword.Pseudo, value + else: + yield index, token, value + +The `PhpLexer` and `LuaLexer` use this method to resolve builtin functions. diff --git a/doc/docs/lexers.rst b/doc/docs/lexers.rst new file mode 100644 index 0000000..446c5a9 --- /dev/null +++ b/doc/docs/lexers.rst @@ -0,0 +1,69 @@ +.. -*- mode: rst -*- + +================ +Available lexers +================ + +This page lists all available builtin lexers and the options they take. + +Currently, **all lexers** support these options: + +`stripnl` + Strip leading and trailing newlines from the input (default: ``True``) + +`stripall` + Strip all leading and trailing whitespace from the input (default: + ``False``). + +`ensurenl` + Make sure that the input ends with a newline (default: ``True``). This + is required for some lexers that consume input linewise. + + .. versionadded:: 1.3 + +`tabsize` + If given and greater than 0, expand tabs in the input (default: ``0``). + +`encoding` + If given, must be an encoding name (such as ``"utf-8"``). This encoding + will be used to convert the input string to Unicode (if it is not already + a Unicode string). The default is ``"guess"``. + + If this option is set to ``"guess"``, a simple UTF-8 vs. Latin-1 + detection is used, if it is set to ``"chardet"``, the + `chardet library <https://chardet.github.io/>`_ is used to + guess the encoding of the input. + + .. versionadded:: 0.6 + + +The "Short Names" field lists the identifiers that can be used with the +`get_lexer_by_name()` function. + +These lexers are builtin and can be imported from `pygments.lexers`: + +.. pygmentsdoc:: lexers + + +Iterating over all lexers +------------------------- + +.. versionadded:: 0.6 + +To get all lexers (both the builtin and the plugin ones), you can +use the `get_all_lexers()` function from the `pygments.lexers` +module: + +.. sourcecode:: pycon + + >>> from pygments.lexers import get_all_lexers + >>> i = get_all_lexers() + >>> i.next() + ('Diff', ('diff',), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch')) + >>> i.next() + ('Delphi', ('delphi', 'objectpascal', 'pas', 'pascal'), ('*.pas',), ('text/x-pascal',)) + >>> i.next() + ('XML+Ruby', ('xml+ruby', 'xml+erb'), (), ()) + +As you can see, the return value is an iterator which yields tuples +in the form ``(name, aliases, filetypes, mimetypes)``. diff --git a/doc/docs/moinmoin.rst b/doc/docs/moinmoin.rst new file mode 100644 index 0000000..80ed25c --- /dev/null +++ b/doc/docs/moinmoin.rst @@ -0,0 +1,39 @@ +.. -*- mode: rst -*- + +============================ +Using Pygments with MoinMoin +============================ + +From Pygments 0.7, the source distribution ships a `Moin`_ parser plugin that +can be used to get Pygments highlighting in Moin wiki pages. + +To use it, copy the file `external/moin-parser.py` from the Pygments +distribution to the `data/plugin/parser` subdirectory of your Moin instance. +Edit the options at the top of the file (currently ``ATTACHMENTS`` and +``INLINESTYLES``) and rename the file to the name that the parser directive +should have. For example, if you name the file ``code.py``, you can get a +highlighted Python code sample with this Wiki markup:: + + {{{ + #!code python + [...] + }}} + +where ``python`` is the Pygments name of the lexer to use. + +Additionally, if you set the ``ATTACHMENTS`` option to True, Pygments will also +be called for all attachments for whose filenames there is no other parser +registered. + +You are responsible for including CSS rules that will map the Pygments CSS +classes to colors. You can output a stylesheet file with `pygmentize`, put it +into the `htdocs` directory of your Moin instance and then include it in the +`stylesheets` configuration option in the Moin config, e.g.:: + + stylesheets = [('screen', '/htdocs/pygments.css')] + +If you do not want to do that and are willing to accept larger HTML output, you +can set the ``INLINESTYLES`` option to True. + + +.. _Moin: https://moinmo.in/ diff --git a/doc/docs/plugins.rst b/doc/docs/plugins.rst new file mode 100644 index 0000000..6738860 --- /dev/null +++ b/doc/docs/plugins.rst @@ -0,0 +1,122 @@ +======= +Plugins +======= + +If you want to extend Pygments without hacking the sources, but want to +use the lexer/formatter/style/filter lookup functions (`lexers.get_lexer_by_name` +et al.), you can use `setuptools`_ entrypoints to add new lexers, formatters +or styles as if they were in the Pygments core. + +.. _setuptools: https://pypi.org/project/setuptools/ + +That means you can use your highlighter modules with the `pygmentize` script, +which relies on the mentioned functions. + + +Plugin discovery +================ + +At runtime, discovering plugins is preferentially done using Python's +standard library module `importlib.metadata`_, available in Python 3.8 +and higher. In earlier Python versions, Pygments attempts to use the +`importlib_metadata`_ backport, if available. If not available, a +fallback is attempted on the older `pkg_resources`_ module. Finally, if +``pkg_resources`` is not available, no plugins will be loaded at +all. Note that ``pkg_resources`` is distributed with `setuptools`_, and +thus available on most Python environments. However, ``pkg_resources`` +is considerably slower than ``importlib.metadata`` or its +``importlib_metadata`` backport. For this reason, if you run Pygments +under Python older than 3.8, it is recommended to install +``importlib-metadata``. Pygments defines a ``plugins`` packaging extra, +so you can ensure it is installed with best plugin support (i.e., that +``importlib-metadata`` is also installed in case you are running Python +earlier than 3.8) by specifying ``pygments[plugins]`` as the +requirement, for example, with ``pip``: + +.. sourcecode:: shell + + $ python -m pip install --user pygments[plugins] + +.. _importlib.metadata: https://docs.python.org/3.10/library/importlib.metadata.html +.. _importlib_metadata: https://pypi.org/project/importlib-metadata +.. _pkg_resources: https://setuptools.pypa.io/en/latest/pkg_resources.html + + +Defining plugins through entrypoints +==================================== + +Here is a list of setuptools entrypoints that Pygments understands: + +`pygments.lexers` + + This entrypoint is used for adding new lexers to the Pygments core. + The name of the entrypoint values doesn't really matter, Pygments extracts + required metadata from the class definition: + + .. sourcecode:: ini + + [pygments.lexers] + yourlexer = yourmodule:YourLexer + + Note that you have to define ``name``, ``aliases`` and ``filename`` + attributes so that you can use the highlighter from the command line: + + .. sourcecode:: python + + class YourLexer(...): + name = 'Name Of Your Lexer' + aliases = ['alias'] + filenames = ['*.ext'] + + +`pygments.formatters` + + You can use this entrypoint to add new formatters to Pygments. The + name of an entrypoint item is the name of the formatter. If you + prefix the name with a slash it's used as a filename pattern: + + .. sourcecode:: ini + + [pygments.formatters] + yourformatter = yourmodule:YourFormatter + /.ext = yourmodule:YourFormatter + + +`pygments.styles` + + To add a new style you can use this entrypoint. The name of the entrypoint + is the name of the style: + + .. sourcecode:: ini + + [pygments.styles] + yourstyle = yourmodule:YourStyle + + +`pygments.filters` + + Use this entrypoint to register a new filter. The name of the + entrypoint is the name of the filter: + + .. sourcecode:: ini + + [pygments.filters] + yourfilter = yourmodule:YourFilter + + +How To Use Entrypoints +====================== + +This documentation doesn't explain how to use those entrypoints because this is +covered in the `setuptools documentation`_. That page should cover everything +you need to write a plugin. + +.. _setuptools documentation: https://setuptools.readthedocs.io/en/latest/ + + +Extending The Core +================== + +If you have written a Pygments plugin that is open source, please inform us +about that. There is a high chance that we'll add it to the Pygments +distribution. diff --git a/doc/docs/quickstart.rst b/doc/docs/quickstart.rst new file mode 100644 index 0000000..b2a9c29 --- /dev/null +++ b/doc/docs/quickstart.rst @@ -0,0 +1,205 @@ +.. -*- mode: rst -*- + +=========================== +Introduction and Quickstart +=========================== + + +Welcome to Pygments! This document explains the basic concepts and terms and +gives a few examples of how to use the library. + + +Architecture +============ + +There are four types of components that work together highlighting a piece of +code: + +* A **lexer** splits the source into tokens, fragments of the source that + have a token type that determines what the text represents semantically + (e.g., keyword, string, or comment). There is a lexer for every language + or markup format that Pygments supports. +* The token stream can be piped through **filters**, which usually modify + the token types or text fragments, e.g. uppercasing all keywords. +* A **formatter** then takes the token stream and writes it to an output + file, in a format such as HTML, LaTeX or RTF. +* While writing the output, a **style** determines how to highlight all the + different token types. It maps them to attributes like "red and bold". + + +Example +======= + +Here is a small example for highlighting Python code: + +.. sourcecode:: python + + from pygments import highlight + from pygments.lexers import PythonLexer + from pygments.formatters import HtmlFormatter + + code = 'print "Hello World"' + print(highlight(code, PythonLexer(), HtmlFormatter())) + +which prints something like this: + +.. sourcecode:: html + + <div class="highlight"> + <pre><span class="k">print</span> <span class="s">"Hello World"</span></pre> + </div> + +As you can see, Pygments uses CSS classes (by default, but you can change that) +instead of inline styles in order to avoid outputting redundant style information over +and over. A CSS stylesheet that contains all CSS classes possibly used in the output +can be produced by: + +.. sourcecode:: python + + print(HtmlFormatter().get_style_defs('.highlight')) + +The argument to :func:`get_style_defs` is used as an additional CSS selector: +the output may look like this: + +.. sourcecode:: css + + .highlight .k { color: #AA22FF; font-weight: bold } + .highlight .s { color: #BB4444 } + ... + + +Options +======= + +The :func:`highlight()` function supports a fourth argument called *outfile*, it +must be a file object if given. The formatted output will then be written to +this file instead of being returned as a string. + +Lexers and formatters both support options. They are given to them as keyword +arguments either to the class or to the lookup method: + +.. sourcecode:: python + + from pygments import highlight + from pygments.lexers import get_lexer_by_name + from pygments.formatters import HtmlFormatter + + lexer = get_lexer_by_name("python", stripall=True) + formatter = HtmlFormatter(linenos=True, cssclass="source") + result = highlight(code, lexer, formatter) + +This makes the lexer strip all leading and trailing whitespace from the input +(`stripall` option), lets the formatter output line numbers (`linenos` option), +and sets the wrapping ``<div>``'s class to ``source`` (instead of +``highlight``). + +Important options include: + +`encoding` : for lexers and formatters + Since Pygments uses Unicode strings internally, this determines which + encoding will be used to convert to or from byte strings. +`style` : for formatters + The name of the style to use when writing the output. + + +For an overview of builtin lexers and formatters and their options, visit the +:doc:`lexer <lexers>` and :doc:`formatters <formatters>` lists. + +For a documentation on filters, see :doc:`this page <filters>`. + + +Lexer and formatter lookup +========================== + +If you want to lookup a built-in lexer by its alias or a filename, you can use +one of the following methods: + +.. sourcecode:: pycon + + >>> from pygments.lexers import (get_lexer_by_name, + ... get_lexer_for_filename, get_lexer_for_mimetype) + + >>> get_lexer_by_name('python') + <pygments.lexers.PythonLexer> + + >>> get_lexer_for_filename('spam.rb') + <pygments.lexers.RubyLexer> + + >>> get_lexer_for_mimetype('text/x-perl') + <pygments.lexers.PerlLexer> + +All these functions accept keyword arguments; they will be passed to the lexer +as options. + +A similar API is available for formatters: use :func:`.get_formatter_by_name()` +and :func:`.get_formatter_for_filename()` from the :mod:`pygments.formatters` +module for this purpose. + + +Guessing lexers +=============== + +If you don't know the content of the file, or you want to highlight a file +whose extension is ambiguous, such as ``.html`` (which could contain plain HTML +or some template tags), use these functions: + +.. sourcecode:: pycon + + >>> from pygments.lexers import guess_lexer, guess_lexer_for_filename + + >>> guess_lexer('#!/usr/bin/python\nprint "Hello World!"') + <pygments.lexers.PythonLexer> + + >>> guess_lexer_for_filename('test.py', 'print "Hello World!"') + <pygments.lexers.PythonLexer> + +:func:`.guess_lexer()` passes the given content to the lexer classes' +:meth:`analyse_text()` method and returns the one for which it returns the +highest number. + +All lexers have two different filename pattern lists: the primary and the +secondary one. The :func:`.get_lexer_for_filename()` function only uses the +primary list, whose entries are supposed to be unique among all lexers. +:func:`.guess_lexer_for_filename()`, however, will first loop through all lexers +and look at the primary and secondary filename patterns if the filename matches. +If only one lexer matches, it is returned, else the guessing mechanism of +:func:`.guess_lexer()` is used with the matching lexers. + +As usual, keyword arguments to these functions are given to the created lexer +as options. + + +Command line usage +================== + +You can use Pygments from the command line, using the :program:`pygmentize` +script:: + + $ pygmentize test.py + +will highlight the Python file test.py using ANSI escape sequences +(a.k.a. terminal colors) and print the result to standard output. + +To output HTML, use the ``-f`` option:: + + $ pygmentize -f html -o test.html test.py + +to write an HTML-highlighted version of test.py to the file test.html. +Note that it will only be a snippet of HTML, if you want a full HTML document, +use the "full" option:: + + $ pygmentize -f html -O full -o test.html test.py + +This will produce a full HTML document with included stylesheet. + +A style can be selected with ``-O style=<name>``. + +If you need a stylesheet for an existing HTML file using Pygments CSS classes, +it can be created with:: + + $ pygmentize -S default -f html > style.css + +where ``default`` is the style name. + +More options and tricks can be found in the :doc:`command line reference +<cmdline>`. diff --git a/doc/docs/rstdirective.rst b/doc/docs/rstdirective.rst new file mode 100644 index 0000000..edc117d --- /dev/null +++ b/doc/docs/rstdirective.rst @@ -0,0 +1,22 @@ +.. -*- mode: rst -*- + +================================ +Using Pygments in ReST documents +================================ + +Many Python people use `ReST`_ for documentation their sourcecode, programs, +scripts et cetera. This also means that documentation often includes sourcecode +samples or snippets. + +You can easily enable Pygments support for your ReST texts using a custom +directive -- this is also how this documentation displays source code. + +From Pygments 0.9, the directive is shipped in the distribution as +`external/rst-directive.py`. You can copy and adapt this code to your liking. + +.. removed -- too confusing + *Loosely related note:* The ReST lexer now recognizes ``.. sourcecode::`` and + ``.. code::`` directives and highlights the contents in the specified language + if the `handlecodeblocks` option is true. + +.. _ReST: https://docutils.sourceforge.io/rst.html diff --git a/doc/docs/security.rst b/doc/docs/security.rst new file mode 100644 index 0000000..72f2d05 --- /dev/null +++ b/doc/docs/security.rst @@ -0,0 +1,31 @@ +Security considerations +----------------------- + +Pygments provides no guarantees on execution time, which needs to be taken +into consideration when using Pygments to process arbitrary user inputs. For +example, if you have a web service which uses Pygments for highlighting, there +may be inputs which will cause the Pygments process to run "forever" and/or use +significant amounts of memory. This can subsequently be used to perform a +remote denial-of-service attack on the server if the processes are not +terminated quickly. + +Unfortunately, it's practically impossible to harden Pygments itself against +those issues: Some regular expressions can result in "catastrophic +backtracking", but other bugs like incorrect matchers can also +cause similar problems, and there is no way to find them in an automated fashion +(short of solving the halting problem.) Pygments has extensive unit tests, +automated randomized testing, and is also tested by `OSS-Fuzz <https://github.com/google/oss-fuzz/tree/master/projects/pygments>`_, +but we will never be able to eliminate all bugs in this area. + +Our recommendations are: + +* Ensure that the Pygments process is *terminated* after a reasonably short + timeout. In general Pygments should take seconds at most for reasonably-sized + input. +* *Limit* the number of concurrent Pygments processes to avoid oversubscription + of resources. + +The Pygments authors will treat any bug resulting in long processing times with +high priority -- it's one of those things that will be fixed in a patch release. +When reporting a bug where you suspect super-linear execution times, please make +sure to attach an input to reproduce it.
\ No newline at end of file diff --git a/doc/docs/styledevelopment.rst b/doc/docs/styledevelopment.rst new file mode 100644 index 0000000..8c4ec2d --- /dev/null +++ b/doc/docs/styledevelopment.rst @@ -0,0 +1,96 @@ +.. -*- mode: rst -*- + +.. _creating-own-styles: + +Creating Own Styles +=================== + +So, how to create a style? All you have to do is to subclass `Style` and +define some styles: + +.. sourcecode:: python + + from pygments.style import Style + from pygments.token import Token, Comment, Keyword, Name, String, \ + Error, Generic, Number, Operator + + + class YourStyle(Style): + + styles = { + Token: '', + Comment: 'italic #888', + Keyword: 'bold #005', + Name: '#f00', + Name.Class: 'bold #0f0', + Name.Function: '#0f0', + String: 'bg:#eee #111' + } + +That's it, save it as ``your.py``. There are just a few rules. When you define a style for `Name` +the style automatically also affects `Name.Function` and so on. If you +defined ``'bold'`` and you don't want boldface for a subtoken use ``'nobold'``. + +(Philosophy: the styles aren't written in CSS syntax since this way +they can be used for a variety of formatters.) + +``Token`` is the default style inherited by all token types. + +To make the style usable for Pygments, you must + +* either register it as a plugin (see :doc:`the plugin docs <plugins>`) +* or update the ``pygments.styles`` subpackage directory. For example: + + * add ``your.py`` file + * register the new style by adding a line to the ``__init__.py`` file: + + .. sourcecode:: python + + STYLE_MAP = { + ... + 'your': 'your::YourStyle', + +.. note:: + + You should *only* add it to the ``pygments.styles`` subdirectory if you are + working on a contribution to Pygments. You should not use that + method to extend an already existing copy of Pygments, use the plugins + mechanism for that. + + +Style Rules +=========== + +Here a small overview of all allowed styles: + +``bold`` + render text as bold +``nobold`` + don't render text as bold (to prevent subtokens being highlighted bold) +``italic`` + render text italic +``noitalic`` + don't render text as italic +``underline`` + render text underlined +``nounderline`` + don't render text underlined +``bg:`` + transparent background +``bg:#000000`` + background color (black) +``border:`` + no border +``border:#ffffff`` + border color (white) +``#ff0000`` + text color (red) +``noinherit`` + don't inherit styles from supertoken + +Note that there may not be a space between ``bg:`` and the color value +since the style definition string is split at whitespace. +Also, using named colors is not allowed since the supported color names +vary for different formatters. + +Furthermore, not all lexers might support every style. diff --git a/doc/docs/styles.rst b/doc/docs/styles.rst new file mode 100644 index 0000000..91689d3 --- /dev/null +++ b/doc/docs/styles.rst @@ -0,0 +1,157 @@ +.. -*- mode: rst -*- + +====== +Styles +====== + +Pygments comes with :doc:`some builtin styles </styles/>` that work for both the +HTML and LaTeX formatter. + +The builtin styles can be looked up with the `get_style_by_name` function: + +.. sourcecode:: pycon + + >>> from pygments.styles import get_style_by_name + >>> get_style_by_name('colorful') + <class 'pygments.styles.colorful.ColorfulStyle'> + +You can pass a instance of a `Style` class to a formatter as the `style` +option in form of a string: + +.. sourcecode:: pycon + + >>> from pygments.styles import get_style_by_name + >>> from pygments.formatters import HtmlFormatter + >>> HtmlFormatter(style='colorful').style + <class 'pygments.styles.colorful.ColorfulStyle'> + +Or you can also import your own style (which must be a subclass of +`pygments.style.Style`) and pass it to the formatter: + +.. sourcecode:: pycon + + >>> from yourapp.yourmodule import YourStyle + >>> from pygments.formatters import HtmlFormatter + >>> HtmlFormatter(style=YourStyle).style + <class 'yourapp.yourmodule.YourStyle'> + + +Creating Own Styles +=================== + +See :ref:`creating-own-styles`. + + +Builtin Styles +============== + +Pygments ships some builtin styles which are maintained by the Pygments team. + +To get a list of known styles you can use this snippet: + +.. sourcecode:: pycon + + >>> from pygments.styles import STYLE_MAP + >>> STYLE_MAP.keys() + ['default', 'emacs', 'friendly', 'colorful'] + + +Getting a list of available styles +================================== + +.. versionadded:: 0.6 + +Because it could be that a plugin registered a style, there is +a way to iterate over all styles: + +.. sourcecode:: pycon + + >>> from pygments.styles import get_all_styles + >>> styles = list(get_all_styles()) + + +.. _AnsiTerminalStyle: + +Terminal Styles +=============== + +.. versionadded:: 2.2 + +Custom styles used with the 256-color terminal formatter can also map colors to +use the 8 default ANSI colors. To do so, use ``ansigreen``, ``ansibrightred`` or +any other colors defined in :attr:`pygments.style.ansicolors`. Foreground ANSI +colors will be mapped to the corresponding `escape codes 30 to 37 +<https://en.wikipedia.org/wiki/ANSI_escape_code#Colors>`_ thus respecting any +custom color mapping and themes provided by many terminal emulators. Light +variants are treated as foreground color with and an added bold flag. +``bg:ansi<color>`` will also be respected, except the light variant will be the +same shade as their dark variant. + +See the following example where the color of the string ``"hello world"`` is +governed by the escape sequence ``\x1b[34;01m`` (Ansi bright blue, Bold, 41 being red +background) instead of an extended foreground & background color. + +.. sourcecode:: pycon + + >>> from pygments import highlight + >>> from pygments.style import Style + >>> from pygments.token import Token + >>> from pygments.lexers import Python3Lexer + >>> from pygments.formatters import Terminal256Formatter + + >>> class MyStyle(Style): + styles = { + Token.String: 'ansibrightblue bg:ansibrightred', + } + + >>> code = 'print("Hello World")' + >>> result = highlight(code, Python3Lexer(), Terminal256Formatter(style=MyStyle)) + >>> print(result.encode()) + b'\x1b[34;41;01m"\x1b[39;49;00m\x1b[34;41;01mHello World\x1b[39;49;00m\x1b[34;41;01m"\x1b[39;49;00m' + +Colors specified using ``ansi*`` are converted to a default set of RGB colors +when used with formatters other than the terminal-256 formatter. + +By definition of ANSI, the following colors are considered "light" colors, and +will be rendered by most terminals as bold: + +- "brightblack" (darkgrey), "brightred", "brightgreen", "brightyellow", "brightblue", + "brightmagenta", "brightcyan", "white" + +The following are considered "dark" colors and will be rendered as non-bold: + +- "black", "red", "green", "yellow", "blue", "magenta", "cyan", + "gray" + +Exact behavior might depends on the terminal emulator you are using, and its +settings. + +.. _new-ansi-color-names: + +.. versionchanged:: 2.4 + +The definition of the ANSI color names has changed. +New names are easier to understand and align to the colors used in other projects. + +===================== ==================== +New names Pygments up to 2.3 +===================== ==================== +``ansiblack`` ``#ansiblack`` +``ansired`` ``#ansidarkred`` +``ansigreen`` ``#ansidarkgreen`` +``ansiyellow`` ``#ansibrown`` +``ansiblue`` ``#ansidarkblue`` +``ansimagenta`` ``#ansipurple`` +``ansicyan`` ``#ansiteal`` +``ansigray`` ``#ansilightgray`` +``ansibrightblack`` ``#ansidarkgray`` +``ansibrightred`` ``#ansired`` +``ansibrightgreen`` ``#ansigreen`` +``ansibrightyellow`` ``#ansiyellow`` +``ansibrightblue`` ``#ansiblue`` +``ansibrightmagenta`` ``#ansifuchsia`` +``ansibrightcyan`` ``#ansiturquoise`` +``ansiwhite`` ``#ansiwhite`` +===================== ==================== + +Old ANSI color names are deprecated but will still work. diff --git a/doc/docs/terminal-sessions.rst b/doc/docs/terminal-sessions.rst new file mode 100644 index 0000000..45af0eb --- /dev/null +++ b/doc/docs/terminal-sessions.rst @@ -0,0 +1,46 @@ +Interactive terminal/shell sessions +----------------------------------- + +To highlight an interactive terminal or shell session, prefix your code snippet +with a specially formatted prompt. + +Supported shells with examples are shown below. In each example, prompt parts in +brackets ``[any]`` represent optional parts of the prompt, and prompt parts +without brackets or in parenthesis ``(any)`` represent required parts of the +prompt. + +* **Bash Session** (console, shell-session): + + .. code-block:: console + + [any@any]$ ls -lh + [any@any]# ls -lh + [any@any]% ls -lh + $ ls -lh + # ls -lh + % ls -lh + > ls -lh + +* **MSDOS Session** (doscon): + + .. code-block:: doscon + + [any]> dir + > dir + More? dir + +* **Tcsh Session** (tcshcon): + + .. code-block:: tcshcon + + (any)> ls -lh + ? ls -lh + +* **PowerShell Session** (ps1con): + + .. code-block:: ps1con + + PS[any]> Get-ChildItem + PS> Get-ChildItem + >> Get-ChildItem + diff --git a/doc/docs/tokens.rst b/doc/docs/tokens.rst new file mode 100644 index 0000000..0bc7586 --- /dev/null +++ b/doc/docs/tokens.rst @@ -0,0 +1,376 @@ +.. -*- mode: rst -*- + +============== +Builtin Tokens +============== + +.. module:: pygments.token + +In the :mod:`pygments.token` module, there is a special object called `Token` +that is used to create token types. + +You can create a new token type by accessing an attribute of `Token` whose +name starts with an uppercase letter: + +.. sourcecode:: pycon + + >>> from pygments.token import Token + >>> Token.String + Token.String + >>> Token.String is Token.String + True + +Note that tokens are singletons so you can use the ``is`` operator for comparing +token types. + +You can also use the ``in`` operator to perform set tests: + +.. sourcecode:: pycon + + >>> from pygments.token import Comment + >>> Comment.Single in Comment + True + >>> Comment in Comment.Multi + False + +This can be useful in :doc:`filters <filters>` and if you write lexers on your +own without using the base lexers. + +You can also split a token type into a hierarchy, and get the parent of it: + +.. sourcecode:: pycon + + >>> String.split() + [Token, Token.Literal, Token.Literal.String] + >>> String.parent + Token.Literal + +In principle, you can create an unlimited number of token types but nobody can +guarantee that a style would define style rules for a token type. Because of +that, Pygments proposes some global token types defined in the +`pygments.token.STANDARD_TYPES` dict. + +For some tokens aliases are already defined: + +.. sourcecode:: pycon + + >>> from pygments.token import String + >>> String + Token.Literal.String + +Inside the :mod:`pygments.token` module the following aliases are defined: + +============= ============================ ==================================== +`Text` `Token.Text` for any type of text data +`Whitespace` `Token.Text.Whitespace` for whitespace +`Error` `Token.Error` represents lexer errors +`Other` `Token.Other` special token for data not + matched by a parser (e.g. HTML + markup in PHP code) +`Keyword` `Token.Keyword` any kind of keywords +`Name` `Token.Name` variable/function names +`Literal` `Token.Literal` Any literals +`String` `Token.Literal.String` string literals +`Number` `Token.Literal.Number` number literals +`Operator` `Token.Operator` operators (``+``, ``not``...) +`Punctuation` `Token.Punctuation` punctuation (``[``, ``(``...) +`Comment` `Token.Comment` any kind of comments +`Generic` `Token.Generic` generic tokens (have a look at + the explanation below) +============= ============================ ==================================== + +Normally you just create token types using the already defined aliases. For each +of those token aliases, a number of subtypes exists (excluding the special tokens +`Token.Text`, `Token.Error` and `Token.Other`) + +It's also possible to convert strings to token types (for example +if you want to supply a token from the command line): + +.. sourcecode:: pycon + + >>> from pygments.token import String, string_to_tokentype + >>> string_to_tokentype("String") + Token.Literal.String + >>> string_to_tokentype("Token.Literal.String") + Token.Literal.String + >>> string_to_tokentype(String) + Token.Literal.String + + +Keyword Tokens +============== + +`Keyword` + For any kind of keyword (especially if it doesn't match any of the + subtypes of course). + +`Keyword.Constant` + For keywords that are constants (e.g. ``None`` in future Python versions). + +`Keyword.Declaration` + For keywords used for variable declaration (e.g. ``var`` in some programming + languages like JavaScript). + +`Keyword.Namespace` + For keywords used for namespace declarations (e.g. ``import`` in Python and + Java and ``package`` in Java). + +`Keyword.Pseudo` + For keywords that aren't really keywords (e.g. ``None`` in old Python + versions). + +`Keyword.Reserved` + For reserved keywords. + +`Keyword.Type` + For builtin types that can't be used as identifiers (e.g. ``int``, + ``char`` etc. in C). + + +Name Tokens +=========== + +`Name` + For any name (variable names, function names, classes). + +`Name.Attribute` + For all attributes (e.g. in HTML tags). + +`Name.Builtin` + Builtin names; names that are available in the global namespace. + +`Name.Builtin.Pseudo` + Builtin names that are implicit (e.g. ``self`` in Ruby, ``this`` in Java). + +`Name.Class` + Class names. Because no lexer can know if a name is a class or a function + or something else this token is meant for class declarations. + +`Name.Constant` + Token type for constants. In some languages you can recognise a token by the + way it's defined (the value after a ``const`` keyword for example). In + other languages constants are uppercase by definition (Ruby). + +`Name.Decorator` + Token type for decorators. Decorators are syntactic elements in the Python + language. Similar syntax elements exist in C# and Java. + +`Name.Entity` + Token type for special entities. (e.g. `` `` in HTML). + +`Name.Exception` + Token type for exception names (e.g. ``RuntimeError`` in Python). Some languages + define exceptions in the function signature (Java). You can highlight + the name of that exception using this token then. + +`Name.Function` + Token type for function names. + +`Name.Function.Magic` + same as `Name.Function` but for special function names that have an implicit use + in a language (e.g. ``__init__`` method in Python). + +`Name.Label` + Token type for label names (e.g. in languages that support ``goto``). + +`Name.Namespace` + Token type for namespaces. (e.g. import paths in Java/Python), names following + the ``module``/``namespace`` keyword in other languages. + +`Name.Other` + Other names. Normally unused. + +`Name.Property` + Additional token type occasionally used for class attributes. + +`Name.Tag` + Tag names (in HTML/XML markup or configuration files). + +`Name.Variable` + Token type for variables. Some languages have prefixes for variable names + (PHP, Ruby, Perl). You can highlight them using this token. + +`Name.Variable.Class` + same as `Name.Variable` but for class variables (also static variables). + +`Name.Variable.Global` + same as `Name.Variable` but for global variables (used in Ruby, for + example). + +`Name.Variable.Instance` + same as `Name.Variable` but for instance variables. + +`Name.Variable.Magic` + same as `Name.Variable` but for special variable names that have an implicit use + in a language (e.g. ``__doc__`` in Python). + + +Literals +======== + +`Literal` + For any literal (if not further defined). + +`Literal.Date` + for date literals (e.g. ``42d`` in Boo). + + +`String` + For any string literal. + +`String.Affix` + Token type for affixes that further specify the type of the string they're + attached to (e.g. the prefixes ``r`` and ``u8`` in ``r"foo"`` and ``u8"foo"``). + +`String.Backtick` + Token type for strings enclosed in backticks. + +`String.Char` + Token type for single characters (e.g. Java, C). + +`String.Delimiter` + Token type for delimiting identifiers in "heredoc", raw and other similar + strings (e.g. the word ``END`` in Perl code ``print <<'END';``). + +`String.Doc` + Token type for documentation strings (for example Python). + +`String.Double` + Double quoted strings. + +`String.Escape` + Token type for escape sequences in strings. + +`String.Heredoc` + Token type for "heredoc" strings (e.g. in Ruby or Perl). + +`String.Interpol` + Token type for interpolated parts in strings (e.g. ``#{foo}`` in Ruby). + +`String.Other` + Token type for any other strings (for example ``%q{foo}`` string constructs + in Ruby). + +`String.Regex` + Token type for regular expression literals (e.g. ``/foo/`` in JavaScript). + +`String.Single` + Token type for single quoted strings. + +`String.Symbol` + Token type for symbols (e.g. ``:foo`` in LISP or Ruby). + + +`Number` + Token type for any number literal. + +`Number.Bin` + Token type for binary literals (e.g. ``0b101010``). + +`Number.Float` + Token type for float literals (e.g. ``42.0``). + +`Number.Hex` + Token type for hexadecimal number literals (e.g. ``0xdeadbeef``). + +`Number.Integer` + Token type for integer literals (e.g. ``42``). + +`Number.Integer.Long` + Token type for long integer literals (e.g. ``42L`` in Python). + +`Number.Oct` + Token type for octal literals. + + +Operators +========= + +`Operator` + For any punctuation operator (e.g. ``+``, ``-``). + +`Operator.Word` + For any operator that is a word (e.g. ``not``). + + +Punctuation +=========== + +.. versionadded:: 0.7 + +`Punctuation` + For any punctuation which is not an operator (e.g. ``[``, ``(``...) + +`Punctuation.Marker` + For markers that point to a location (e.g., carets in Python + tracebacks for syntax errors). + + .. versionadded:: 2.10 + + +Comments +======== + +`Comment` + Token type for any comment. + +`Comment.Hashbang` + Token type for hashbang comments (i.e. first lines of files that start with + ``#!``). + +`Comment.Multiline` + Token type for multiline comments. + +`Comment.Preproc` + Token type for preprocessor comments (also ``<?php``/``<%`` constructs). + +`Comment.PreprocFile` + Token type for filenames in preprocessor comments, such as include files in C/C++. + +`Comment.Single` + Token type for comments that end at the end of a line (e.g. ``# foo``). + +`Comment.Special` + Special data in comments. For example code tags, author and license + information, etc. + + +Generic Tokens +============== + +Generic tokens are for special lexers like the `DiffLexer` that doesn't really +highlight a programming language but a patch file. + + +`Generic` + A generic, unstyled token. Normally you don't use this token type. + +`Generic.Deleted` + Marks the token value as deleted. + +`Generic.Emph` + Marks the token value as emphasized. + +`Generic.Error` + Marks the token value as an error message. + +`Generic.Heading` + Marks the token value as headline. + +`Generic.Inserted` + Marks the token value as inserted. + +`Generic.Output` + Marks the token value as program output (e.g. for python cli lexer). + +`Generic.Prompt` + Marks the token value as command prompt (e.g. bash lexer). + +`Generic.Strong` + Marks the token value as bold (e.g. for rst lexer). + +`Generic.Subheading` + Marks the token value as subheadline. + +`Generic.Traceback` + Marks the token value as a part of an error traceback. diff --git a/doc/docs/unicode.rst b/doc/docs/unicode.rst new file mode 100644 index 0000000..dca9111 --- /dev/null +++ b/doc/docs/unicode.rst @@ -0,0 +1,58 @@ +===================== +Unicode and Encodings +===================== + +Since Pygments 0.6, all lexers use unicode strings internally. Because of that +you might encounter the occasional :exc:`UnicodeDecodeError` if you pass strings +with the wrong encoding. + +Per default all lexers have their input encoding set to `guess`. This means +that the following encodings are tried: + +* UTF-8 (including BOM handling) +* The locale encoding (i.e. the result of `locale.getpreferredencoding()`) +* As a last resort, `latin1` + +If you pass a lexer a byte string object (not unicode), it tries to decode the +data using this encoding. + +You can override the encoding using the `encoding` or `inencoding` lexer +options. If you have the `chardet`_ library installed and set the encoding to +``chardet`` if will analyse the text and use the encoding it thinks is the +right one automatically: + +.. sourcecode:: python + + from pygments.lexers import PythonLexer + lexer = PythonLexer(encoding='chardet') + +The best way is to pass Pygments unicode objects. In that case you can't get +unexpected output. + +The formatters now send Unicode objects to the stream if you don't set the +output encoding. You can do so by passing the formatters an `encoding` option: + +.. sourcecode:: python + + from pygments.formatters import HtmlFormatter + f = HtmlFormatter(encoding='utf-8') + +**You will have to set this option if you have non-ASCII characters in the +source and the output stream does not accept Unicode written to it!** +This is the case for all regular files and for terminals. + +Note: The Terminal formatter tries to be smart: if its output stream has an +`encoding` attribute, and you haven't set the option, it will encode any +Unicode string with this encoding before writing it. This is the case for +`sys.stdout`, for example. The other formatters don't have that behavior. + +Another note: If you call Pygments via the command line (`pygmentize`), +encoding is handled differently, see :doc:`the command line docs <cmdline>`. + +.. versionadded:: 0.7 + The formatters now also accept an `outencoding` option which will override + the `encoding` option if given. This makes it possible to use a single + options dict with lexers and formatters, and still have different input and + output encodings. + +.. _chardet: https://chardet.github.io/ diff --git a/doc/download.rst b/doc/download.rst new file mode 100644 index 0000000..7ac0868 --- /dev/null +++ b/doc/download.rst @@ -0,0 +1,39 @@ +Download and installation +========================= + +The current release is version |version|. + +Packaged versions +----------------- + +You can download it `from the Python Package Index +<https://pypi.python.org/pypi/Pygments>`_. For installation of packages from +PyPI, we recommend `Pip <https://www.pip-installer.org>`_, which works on all +major platforms. + +Under Linux, most distributions include a package for Pygments, usually called +``pygments`` or ``python-pygments``. You can install it with the package +manager as usual. + +Development sources +------------------- + +We're using the Git version control system. You can get the development source +using this command:: + + git clone https://github.com/pygments/pygments + +Development takes place at `GitHub <https://github.com/pygments/pygments>`_. + +The latest changes in the development source code are listed in the `changelog +<https://github.com/pygments/pygments/blob/master/CHANGES>`_. + +.. Documentation + ------------- + +.. XXX todo + + You can download the <a href="/docs/">documentation</a> either as + a bunch of rst files from the Git repository, see above, or + as a tar.gz containing rendered HTML files:</p> + <p><a href="/docs/download/pygmentsdocs.tar.gz">pygmentsdocs.tar.gz</a></p> diff --git a/doc/examples/example.py b/doc/examples/example.py new file mode 100644 index 0000000..6c9e2f1 --- /dev/null +++ b/doc/examples/example.py @@ -0,0 +1,14 @@ +from typing import Iterator + +# This is an example +class Math: + @staticmethod + def fib(n: int) -> Iterator[int]: + """Fibonacci series up to n.""" + a, b = 0, 1 + while a < n: + yield a + a, b = b, a + b + +result = sum(Math.fib(42)) +print("The answer is {}".format(result)) diff --git a/doc/faq.rst b/doc/faq.rst new file mode 100644 index 0000000..4e078dc --- /dev/null +++ b/doc/faq.rst @@ -0,0 +1,142 @@ +:orphan: + +Pygments FAQ +============= + +What is Pygments? +----------------- + +Pygments is a syntax highlighting engine written in Python. That means, it will +take source code (or other markup) in a supported language and output a +processed version (in different formats) containing syntax highlighting markup. + +Its features include: + +* a wide range of common :doc:`languages and markup formats <languages>` is supported +* new languages and formats are added easily +* a number of output formats is available, including: + + - HTML + - ANSI sequences (console output) + - LaTeX + - RTF + +* it is usable as a command-line tool and as a library +* parsing and formatting is fast + +Pygments is licensed under the BSD license. + +Where does the name Pygments come from? +--------------------------------------- + +*Py* of course stands for Python, while *pigments* are used for coloring paint, +and in this case, source code! + +What are the system requirements? +--------------------------------- + +Pygments only needs a standard Python install, version 3.6 or higher. No +additional libraries are needed. + +How can I use Pygments? +----------------------- + +Pygments is usable as a command-line tool as well as a library. + +From the command-line, usage looks like this (assuming the pygmentize script is +properly installed):: + + pygmentize -f html /path/to/file.py + +This will print a HTML-highlighted version of /path/to/file.py to standard output. + +For a complete help, please run ``pygmentize -h``. + +Usage as a library is thoroughly demonstrated in the Documentation section. + +How do I make a new style? +-------------------------- + +Please see the :doc:`documentation on styles <docs/styles>`. + +How can I report a bug or suggest a feature? +-------------------------------------------- + +Please report bugs and feature wishes in the tracker at GitHub. + +You can also e-mail the authors, see the contact details. + +I want this support for this language! +-------------------------------------- + +Instead of waiting for others to include language support, why not write it +yourself? All you have to know is :doc:`outlined in the docs +<docs/lexerdevelopment>`. + +Can I use Pygments for programming language processing? +------------------------------------------------------- + +The Pygments lexing machinery is quite powerful can be used to build lexers for +basically all languages. However, parsing them is not possible, though some +lexers go some steps in this direction in order to e.g. highlight function names +differently. + +Also, error reporting is not the scope of Pygments. It focuses on correctly +highlighting syntactically valid documents, not finding and compensating errors. + +Who uses Pygments? +------------------ + +This is an (incomplete) list of projects and sites known to use the Pygments highlighter. + +* `Wikipedia <https://en.wikipedia.org/>`_ +* `BitBucket <https://bitbucket.org/>`_, a Mercurial and Git hosting site +* `The Sphinx documentation builder <https://sphinx-doc.org/>`_, for embedded source examples +* `rst2pdf <https://github.com/ralsina/rst2pdf>`_, a reStructuredText to PDF converter +* `Codecov <https://codecov.io/>`_, a code coverage CI service +* `Trac <https://trac.edgewall.org/>`_, the universal project management tool +* `AsciiDoc <https://www.methods.co.nz/asciidoc/>`_, a text-based documentation generator +* `ActiveState Code <https://code.activestate.com/>`_, the Python Cookbook successor +* `ViewVC <http://viewvc.org/>`_, a web-based version control repository browser +* `BzrFruit <https://repo.or.cz/w/bzrfruit.git>`_, a Bazaar branch viewer +* `QBzr <http://bazaar-vcs.org/QBzr>`_, a cross-platform Qt-based GUI front end for Bazaar +* `Review Board <https://www.reviewboard.org/>`_, a collaborative code reviewing tool +* `Diamanda <https://code.google.com/archive/p/diamanda/>`_, a Django powered wiki system with support for Pygments +* `Progopedia <http://progopedia.ru/>`_ (`English <http://progopedia.com/>`_), + an encyclopedia of programming languages +* `Bruce <https://sites.google.com/site/r1chardj0n3s/bruce>`_, a reStructuredText presentation tool +* `PIDA <http://pida.co.uk/>`_, a universal IDE written in Python +* `BPython <https://bpython-interpreter.org/>`_, a curses-based intelligent Python shell +* `PuDB <https://pypi.org/project/pudb/>`_, a console Python debugger +* `XWiki <https://www.xwiki.org/>`_, a wiki-based development framework in Java, using Jython +* `roux <http://ananelson.com/software/roux/>`_, a script for running R scripts + and creating beautiful output including graphs +* `hurl <http://hurl.it/>`_, a web service for making HTTP requests +* `wxHTMLPygmentizer <http://colinbarnette.net/projects/wxHTMLPygmentizer>`_ is + a GUI utility, used to make code-colorization easier +* `Postmarkup <https://code.google.com/archive/p/postmarkup/>`_, a BBCode to XHTML generator +* `WpPygments <http://blog.mirotin.net/?page_id=49>`_, and `WPygments + <https://github.com/capynet/WPygments>`_, highlighter plugins for WordPress +* `Siafoo <http://siafoo.net>`_, a tool for sharing and storing useful code and programming experience +* `D source <http://www.dsource.org/>`_, a community for the D programming language +* `dpaste.com <http://dpaste.com/>`_, another Django pastebin +* `Django snippets <https://djangosnippets.org/>`_, a pastebin for Django code +* `Fayaa <http://www.fayaa.com/code/>`_, a Chinese pastebin +* `Incollo.com <http://incollo.com>`_, a free collaborative debugging tool +* `PasteBox <https://p.boxnet.eu/>`_, a pastebin focused on privacy +* `hilite.me <http://www.hilite.me/>`_, a site to highlight code snippets +* `patx.me <http://patx.me/paste>`_, a pastebin +* `Fluidic <https://github.com/richsmith/fluidic>`_, an experiment in + integrating shells with a GUI +* `pygments.rb <https://github.com/pygments/pygments.rb>`_, a pygments wrapper for Ruby +* `Clygments <https://github.com/bfontaine/clygments>`_, a pygments wrapper for + Clojure +* `PHPygments <https://github.com/capynet/PHPygments>`_, a pygments wrapper for PHP +* `Spyder <https://www.spyder-ide.org/>`_, the Scientific Python Development + Environment, uses pygments for the multi-language syntax highlighting in its + `editor <https://docs.spyder-ide.org/editor.html>`_. +* `snippet.host <https://snippet.host>`_, minimal text and code snippet hosting +* `sourcehut <https://sourcehut.org>`_, the hacker's forge + +If you have a project or web site using Pygments, `open an issue or PR +<https://github.com/pygments/pygments>`_ and we'll add a line here. diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 0000000..dbd1596 --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,47 @@ +Welcome! +======== + +This is the home of Pygments. It is a generic syntax highlighter suitable for +use in code hosting, forums, wikis or other applications that need to prettify +source code. Highlights are: + +* a wide range of |language_count| languages and other text formats is supported +* special attention is paid to details that increase highlighting quality +* support for new languages and formats are added easily; most languages use a + simple regex-based lexing mechanism +* a number of output formats is available, among them HTML, RTF, LaTeX and ANSI + sequences +* it is usable as a command-line tool and as a library + +Read more in the :doc:`FAQ list <faq>` or the :doc:`documentation <docs/index>`, +or `download the latest release <https://pypi.python.org/pypi/Pygments>`_. + +.. _contribute: + +Contribute +---------- + +Like every open-source project, we are always looking for volunteers to help us +with programming. Python knowledge is required, but don't fear: Python is a very +clear and easy to learn language. + +Development takes place on `GitHub <https://github.com/pygments/pygments>`_. + +If you found a bug, just open a ticket in the GitHub tracker. Be sure to log +in to be notified when the issue is fixed -- development is not fast-paced as +the library is quite stable. You can also send an e-mail to the developers, see +below. + +The authors +----------- + +Pygments is maintained by **Georg Brandl**, e-mail address *georg*\ *@*\ *python.org*, **Matthäus Chajdas** and **Jean Abou-Samra**. + +Many lexers and fixes have been contributed by **Armin Ronacher**, the rest of +the `Pocoo <https://dev.pocoo.org/>`_ team and **Tim Hatch**. + +.. toctree:: + :maxdepth: 1 + :hidden: + + docs/index diff --git a/doc/languages.rst b/doc/languages.rst new file mode 100644 index 0000000..8136442 --- /dev/null +++ b/doc/languages.rst @@ -0,0 +1,18 @@ +:orphan: + +Languages +========= + +.. pygmentsdoc:: lexers_overview + +... that's all? +--------------- + +Well, why not write your own? Contributing to Pygments is easy and fun. Take a +look at the :doc:`docs on lexer development <docs/lexerdevelopment>`. Pull +requests are welcome on `GitHub <https://github.com/pygments/pygments>`_. + +.. note:: + + The languages listed here are supported in the development version. The + latest release may lack a few of them. diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..8803c98 --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,190 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^<target^>` where ^<target^> is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Pygments.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Pygments.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end diff --git a/doc/pygmentize.1 b/doc/pygmentize.1 new file mode 100644 index 0000000..5ac8fe6 --- /dev/null +++ b/doc/pygmentize.1 @@ -0,0 +1,112 @@ +.TH PYGMENTIZE 1 "January 20, 2021" + +.SH NAME +pygmentize \- highlights the input file + +.SH SYNOPSIS +.B \fBpygmentize\fP +.RI [-l\ \fI<lexer>\fP\ |\ -g]\ [-F\ \fI<filter>\fP[:\fI<options>\fP]]\ [-f\ \fI<formatter>\fP] +.RI [-O\ \fI<options>\fP]\ [-P\ \fI<option=value>\fP]\ [-o\ \fI<outfile>\fP]\ [\fI<infile>\fP] +.br +.B \fBpygmentize\fP +.RI -S\ \fI<style>\fP\ -f\ \fI<formatter>\fP\ [-a\ \fI<arg>\fP]\ [-O\ \fI<options>\fP]\ [-P\ \fI<option=value>\fP] +.br +.B \fBpygmentize\fP +.RI -L\ [\fI<which>\fP\ ...] +.br +.B \fBpygmentize\fP +.RI -N\ \fI<filename>\fP +.br +.B \fBpygmentize\fP +.RI -C +.br +.B \fBpygmentize\fP +.RI -H\ \fI<type>\fP\ \fI<name>\fP +.br +.B \fBpygmentize\fP +.RI -h\ |\ -V + +.SH DESCRIPTION +Pygments is a generic syntax highlighter for general use in all kinds +of software such as forum systems, wikis or other applications that need to +prettify source code. +.PP +Its highlights are: + * a wide range of common languages and markup formats is supported + * special attention is paid to details, increasing quality by a fair amount + * support for new languages and formats are added easily + * a number of output formats, presently HTML, LaTeX and ANSI sequences + * it is usable as a command-line tool and as a library + * ... and it highlights even Brainfuck! +.PP +\fBpygmentize\fP is a command that uses Pygments to highlight the input file and +write the result to \fI<outfile>\fP. If no \fI<infile>\fP is given, stdin is used. +.SH OPTIONS +A summary of options is included below. +.TP +.B \-l \fI<lexer>\fP +Set the lexer name. If not given, the lexer is guessed from the extension of the +input file name (this obviously doesn't work if the input is stdin). +.TP +.B \-g +Attempt to guess the lexer from the file contents, or pass through as plain text +if this fails (this option works for highlighting standard input). +.TP +.B \-F \fI<filter>\fP[:\fI<options>\fP] +Add a filter to the token stream. You can give options in the same way as for +-O after a colon (note: there must not be spaces around the colon). +This option can be given multiple times. +.TP +.B \-f \fI<formatter>\fP +Set the formatter name. If not given, it will be guessed from the extension of +the output file name. If no output file is given, the terminal formatter will be +used by default. +.TP +.B \-o \fI<outfile>\fP +Set output file. If not given, stdout is used. +.TP +.B \-O \fI<options>\fP +With this option, you can give the lexer and formatter a comma-separated list of +options, e.g. "-O bg=light,python=cool". Which options are valid for which +lexers and formatters can be found in the documentation. +This option can be given multiple times. +.TP +.B \-P \fI<option=value>\fP +This option adds lexer and formatter options like the -O option, but +you can only give one option per -P. That way, the option value may contain +commas and equals signs, which it can't with -O. +.TP +.B \-S \fI<style>\fP +Print out style definitions for style \fI<style>\fP and for formatter \fI<formatter>\fP. +The meaning of the argument given by +.B \-a \fI<arg>\fP +is formatter dependent and can be found in the documentation. +.TP +.B \-L [\fI<which>\fP ...] +List lexers, formatters, styles or filters. Set \fI<which>\fP to the thing you want +to list (e.g. "styles"), or omit it to list everything. +.TP +.B \-N \fI<filename>\fP +Guess and print out a lexer name based solely on the given filename. Does not +take input or highlight anything. If no specific lexer can be found, "text" +is printed. +.TP +.B \-C +Like \fI-N\fP, but guess a lexer based on content read from standard input. +.TP +.B \-H \fI<type>\fP \fI<name>\fP +Print detailed help for the object \fI<name>\fP of type \fI<type>\fP, where \fI<type>\fP is one +of "lexer", "formatter" or "filter". +.TP +.B \-h +Show help screen. +.TP +.B \-V +Show version of the Pygments package. +.SH SEE ALSO +/usr/share/doc/python-pygments/index.html +.SH AUTHOR +pygmentize was written by Georg Brandl <g.brandl@gmx.net>. +.PP +This manual page was written by Piotr Ozarowski <ozarow@gmail.com>, +for the Debian project (but may be used by others). diff --git a/doc/pyodide/Dockerfile b/doc/pyodide/Dockerfile new file mode 100644 index 0000000..969651c --- /dev/null +++ b/doc/pyodide/Dockerfile @@ -0,0 +1,20 @@ +# Dockerfile for building Pyodide with a Pygmenets version from the current checkout. +# For an example of how to use this image, see the `pyodide` target in the documentation's Makefile. +FROM ghcr.io/pyodide/pyodide:0.20.0 AS build-stage + +WORKDIR pyodide + +# Copy new meta with path to local Pygments instead of pypi url. +COPY doc/pyodide/meta.yaml packages/Pygments/ + +COPY . /pygments + +# Add Pygments to the Pyodide build. +ENV PYODIDE_PACKAGES=Pygments + +# Build Pyodide. +RUN make + +FROM scratch AS export-stage + +COPY --from=build-stage /src/pyodide/build / diff --git a/doc/pyodide/meta.yaml b/doc/pyodide/meta.yaml new file mode 100644 index 0000000..d58e1d5 --- /dev/null +++ b/doc/pyodide/meta.yaml @@ -0,0 +1,8 @@ +package: + name: Pygments + version: '2.99' +source: + path: /pygments +test: + imports: + - pygments diff --git a/doc/styles.rst b/doc/styles.rst new file mode 100644 index 0000000..a1bb019 --- /dev/null +++ b/doc/styles.rst @@ -0,0 +1,5 @@ +:orphan: + +This file is overridden by _templates/styles.html and just exists to allow the +Styles gallery to be reliably linked from the documentation +(since its location varies between `make html` and `make dirhtml`). |