diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 11:33:32 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 11:33:32 +0000 |
commit | 1f403ad2197fc7442409f434ee574f3e6b46fb73 (patch) | |
tree | 0299c6dd11d5edfa918a29b6456bc1875f1d288c /tests/test_words.py | |
parent | Initial commit. (diff) | |
download | pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.tar.xz pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.zip |
Adding upstream version 2.14.0+dfsg.upstream/2.14.0+dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests/test_words.py')
-rw-r--r-- | tests/test_words.py | 366 |
1 files changed, 366 insertions, 0 deletions
diff --git a/tests/test_words.py b/tests/test_words.py new file mode 100644 index 0000000..9a8730a --- /dev/null +++ b/tests/test_words.py @@ -0,0 +1,366 @@ +""" + Pygments tests for words() + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, words +from pygments.token import Token + + +class MyLexer(RegexLexer): + tokens = { + "root": [ + ( + words( + [ + "a-word", + "another-word", + # Test proper escaping of a few things that can occur + # in regular expressions. They are all matched literally. + "[", + "]", + "^", + "\\", + "(", + ")", + "(?:", + "-", + "|", + r"\w", + ] + ), + Token.Name, + ), + (words(["space-allowed-before-this"], prefix=" ?"), Token.Name), + (words(["space-allowed-after-this"], suffix=" ?"), Token.Name), + ( + words(["space-required-before-and-after-this"], prefix=" ", suffix=" "), + Token.Name, + ), + # prefix and suffix can be regexes. + (words(["one-whitespace-allowed-before-this"], prefix=r"\s?"), Token.Name), + (words(["all-whitespace-allowed-after-this"], suffix=r"\s*"), Token.Name), + ( + words( + ["all-whitespace-allowed-one-required-after-this"], suffix=r"\s+" + ), + Token.Name, + ), + (r"\n", Token.Text), + ], + } + + +def test_basic(): + s = "a-word this-is-not-in-the-list another-word" + assert list(MyLexer().get_tokens(s)) == [ + (Token.Name, "a-word"), + (Token.Error, " "), + (Token.Error, "t"), + (Token.Error, "h"), + (Token.Error, "i"), + (Token.Error, "s"), + (Token.Name, "-"), + (Token.Error, "i"), + (Token.Error, "s"), + (Token.Name, "-"), + (Token.Error, "n"), + (Token.Error, "o"), + (Token.Error, "t"), + (Token.Name, "-"), + (Token.Error, "i"), + (Token.Error, "n"), + (Token.Name, "-"), + (Token.Error, "t"), + (Token.Error, "h"), + (Token.Error, "e"), + (Token.Name, "-"), + (Token.Error, "l"), + (Token.Error, "i"), + (Token.Error, "s"), + (Token.Error, "t"), + (Token.Error, " "), + (Token.Name, "another-word"), + (Token.Text, "\n"), + ] + + +def test_special_characters(): + s = """ +[ +] +^ +\\ +( +) +(?: +- +| +\\w +""" + assert list(MyLexer().get_tokens(s)) == [ + (Token.Name, "["), + (Token.Text, "\n"), + (Token.Name, "]"), + (Token.Text, "\n"), + (Token.Name, "^"), + (Token.Text, "\n"), + (Token.Name, "\\"), + (Token.Text, "\n"), + (Token.Name, "("), + (Token.Text, "\n"), + (Token.Name, ")"), + (Token.Text, "\n"), + (Token.Name, "(?:"), + (Token.Text, "\n"), + (Token.Name, "-"), + (Token.Text, "\n"), + (Token.Name, "|"), + (Token.Text, "\n"), + (Token.Name, "\\w"), + (Token.Text, "\n"), + ] + + +def test_affixes(): + s = """ +space-allowed-after-this | +space-allowed-before-this +space-allowed-after-this + space-required-before-and-after-this | +space-required-before-and-after-this | + space-required-before-and-after-this<= no space after +""" + assert list(MyLexer().get_tokens(s)) == [ + (Token.Name, "space-allowed-after-this "), + (Token.Name, "|"), + (Token.Text, "\n"), + (Token.Name, "space-allowed-before-this"), + (Token.Text, "\n"), + (Token.Name, "space-allowed-after-this"), + (Token.Text, "\n"), + (Token.Name, " space-required-before-and-after-this "), + (Token.Name, "|"), + (Token.Text, "\n"), + (Token.Error, "s"), + (Token.Error, "p"), + (Token.Error, "a"), + (Token.Error, "c"), + (Token.Error, "e"), + (Token.Name, "-"), + (Token.Error, "r"), + (Token.Error, "e"), + (Token.Error, "q"), + (Token.Error, "u"), + (Token.Error, "i"), + (Token.Error, "r"), + (Token.Error, "e"), + (Token.Error, "d"), + (Token.Name, "-"), + (Token.Error, "b"), + (Token.Error, "e"), + (Token.Error, "f"), + (Token.Error, "o"), + (Token.Error, "r"), + (Token.Error, "e"), + (Token.Name, "-"), + (Token.Error, "a"), + (Token.Error, "n"), + (Token.Error, "d"), + (Token.Name, "-"), + (Token.Error, "a"), + (Token.Error, "f"), + (Token.Error, "t"), + (Token.Error, "e"), + (Token.Error, "r"), + (Token.Name, "-"), + (Token.Error, "t"), + (Token.Error, "h"), + (Token.Error, "i"), + (Token.Error, "s"), + (Token.Error, " "), + (Token.Name, "|"), + (Token.Text, "\n"), + (Token.Error, " "), + (Token.Error, "s"), + (Token.Error, "p"), + (Token.Error, "a"), + (Token.Error, "c"), + (Token.Error, "e"), + (Token.Name, "-"), + (Token.Error, "r"), + (Token.Error, "e"), + (Token.Error, "q"), + (Token.Error, "u"), + (Token.Error, "i"), + (Token.Error, "r"), + (Token.Error, "e"), + (Token.Error, "d"), + (Token.Name, "-"), + (Token.Error, "b"), + (Token.Error, "e"), + (Token.Error, "f"), + (Token.Error, "o"), + (Token.Error, "r"), + (Token.Error, "e"), + (Token.Name, "-"), + (Token.Error, "a"), + (Token.Error, "n"), + (Token.Error, "d"), + (Token.Name, "-"), + (Token.Error, "a"), + (Token.Error, "f"), + (Token.Error, "t"), + (Token.Error, "e"), + (Token.Error, "r"), + (Token.Name, "-"), + (Token.Error, "t"), + (Token.Error, "h"), + (Token.Error, "i"), + (Token.Error, "s"), + (Token.Error, "<"), + (Token.Error, "="), + (Token.Error, " "), + (Token.Error, "n"), + (Token.Error, "o"), + (Token.Error, " "), + (Token.Error, "s"), + (Token.Error, "p"), + (Token.Error, "a"), + (Token.Error, "c"), + (Token.Error, "e"), + (Token.Error, " "), + (Token.Error, "a"), + (Token.Error, "f"), + (Token.Error, "t"), + (Token.Error, "e"), + (Token.Error, "r"), + (Token.Text, "\n"), + ] + + +def test_affixes_regexes(): + s = """ + one-whitespace-allowed-before-this +NOT-WHITESPACEone-whitespace-allowed-before-this +all-whitespace-allowed-after-this \n \t +all-whitespace-allowed-after-thisNOT-WHITESPACE +all-whitespace-allowed-one-required-after-thisNOT-WHITESPACE""" + assert list(MyLexer().get_tokens(s)) == [ + (Token.Name, " one-whitespace-allowed-before-this"), + (Token.Text, "\n"), + (Token.Error, "N"), + (Token.Error, "O"), + (Token.Error, "T"), + (Token.Name, "-"), + (Token.Error, "W"), + (Token.Error, "H"), + (Token.Error, "I"), + (Token.Error, "T"), + (Token.Error, "E"), + (Token.Error, "S"), + (Token.Error, "P"), + (Token.Error, "A"), + (Token.Error, "C"), + (Token.Error, "E"), + (Token.Name, "one-whitespace-allowed-before-this"), + (Token.Text, "\n"), + (Token.Name, "all-whitespace-allowed-after-this \n \t\n"), + (Token.Name, "all-whitespace-allowed-after-this"), + (Token.Error, "N"), + (Token.Error, "O"), + (Token.Error, "T"), + (Token.Name, "-"), + (Token.Error, "W"), + (Token.Error, "H"), + (Token.Error, "I"), + (Token.Error, "T"), + (Token.Error, "E"), + (Token.Error, "S"), + (Token.Error, "P"), + (Token.Error, "A"), + (Token.Error, "C"), + (Token.Error, "E"), + (Token.Text, "\n"), + (Token.Error, "a"), + (Token.Error, "l"), + (Token.Error, "l"), + (Token.Name, "-"), + (Token.Error, "w"), + (Token.Error, "h"), + (Token.Error, "i"), + (Token.Error, "t"), + (Token.Error, "e"), + (Token.Error, "s"), + (Token.Error, "p"), + (Token.Error, "a"), + (Token.Error, "c"), + (Token.Error, "e"), + (Token.Name, "-"), + (Token.Error, "a"), + (Token.Error, "l"), + (Token.Error, "l"), + (Token.Error, "o"), + (Token.Error, "w"), + (Token.Error, "e"), + (Token.Error, "d"), + (Token.Name, "-"), + (Token.Error, "o"), + (Token.Error, "n"), + (Token.Error, "e"), + (Token.Name, "-"), + (Token.Error, "r"), + (Token.Error, "e"), + (Token.Error, "q"), + (Token.Error, "u"), + (Token.Error, "i"), + (Token.Error, "r"), + (Token.Error, "e"), + (Token.Error, "d"), + (Token.Name, "-"), + (Token.Error, "a"), + (Token.Error, "f"), + (Token.Error, "t"), + (Token.Error, "e"), + (Token.Error, "r"), + (Token.Name, "-"), + (Token.Error, "t"), + (Token.Error, "h"), + (Token.Error, "i"), + (Token.Error, "s"), + (Token.Error, "N"), + (Token.Error, "O"), + (Token.Error, "T"), + (Token.Name, "-"), + (Token.Error, "W"), + (Token.Error, "H"), + (Token.Error, "I"), + (Token.Error, "T"), + (Token.Error, "E"), + (Token.Error, "S"), + (Token.Error, "P"), + (Token.Error, "A"), + (Token.Error, "C"), + (Token.Error, "E"), + (Token.Text, "\n"), + ] + + +class MySecondLexer(RegexLexer): + tokens = { + "root": [ + (words(["[", "x"]), Token.Name), + ], + } + + +def test_bracket_escape(): + s = "whatever" + # This used to emit a FutureWarning. + assert list(MySecondLexer().get_tokens("x")) == [ + (Token.Name, "x"), + (Token.Text.Whitespace, "\n"), + ] |