summaryrefslogtreecommitdiffstats
path: root/tests/test_html_lexer.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 11:33:32 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 11:33:32 +0000
commit1f403ad2197fc7442409f434ee574f3e6b46fb73 (patch)
tree0299c6dd11d5edfa918a29b6456bc1875f1d288c /tests/test_html_lexer.py
parentInitial commit. (diff)
downloadpygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.tar.xz
pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.zip
Adding upstream version 2.14.0+dfsg.upstream/2.14.0+dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests/test_html_lexer.py')
-rw-r--r--tests/test_html_lexer.py131
1 files changed, 131 insertions, 0 deletions
diff --git a/tests/test_html_lexer.py b/tests/test_html_lexer.py
new file mode 100644
index 0000000..fe99149
--- /dev/null
+++ b/tests/test_html_lexer.py
@@ -0,0 +1,131 @@
+"""
+ HTML Lexer Tests
+ ~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import time
+
+import pytest
+
+from pygments.lexers.html import HtmlLexer
+from pygments.token import Token
+
+MAX_HL_TIME = 10
+
+
+@pytest.fixture(scope='module')
+def lexer_html():
+ yield HtmlLexer()
+
+
+def test_happy_javascript_fragment(lexer_html):
+ """valid, even long Javascript fragments should still get parsed ok"""
+
+ fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*2000+"</script>"
+ start_time = time.time()
+ tokens = list(lexer_html.get_tokens(fragment))
+ assert all(x[1] != Token.Error for x in tokens)
+ assert time.time() - start_time < MAX_HL_TIME, \
+ 'The HTML lexer might have an expensive happy-path script case'
+
+
+def test_happy_css_fragment(lexer_html):
+ """valid, even long CSS fragments should still get parsed ok"""
+
+ fragment = "<style>"+".ui-helper-hidden{display:none}"*2000+"</style>"
+ start_time = time.time()
+ tokens = list(lexer_html.get_tokens(fragment))
+ assert all(x[1] != Token.Error for x in tokens)
+ assert time.time() - start_time < MAX_HL_TIME, \
+ 'The HTML lexer might have an expensive happy-path style case'
+
+
+def test_long_unclosed_javascript_fragment(lexer_html):
+ """unclosed, long Javascript fragments should parse quickly"""
+
+ reps = 2000
+ fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*reps
+ start_time = time.time()
+ tokens = list(lexer_html.get_tokens(fragment))
+ assert time.time() - start_time < MAX_HL_TIME, \
+ 'The HTML lexer might have an expensive error script case'
+ tokens_intro = [
+ (Token.Punctuation, '<'),
+ (Token.Name.Tag, 'script'),
+ (Token.Text, ' '),
+ (Token.Name.Attribute, 'type'),
+ (Token.Operator, '='),
+ (Token.Literal.String, '"text/javascript"'),
+ (Token.Punctuation, '>'),
+ ]
+ tokens_body = [
+ (Token.Name.Other, 'alert'),
+ (Token.Punctuation, '('),
+ (Token.Literal.String.Double, '"hi"'),
+ (Token.Punctuation, ')'),
+ (Token.Punctuation, ';'),
+ ]
+
+ # make sure we get the right opening tokens
+ assert tokens[:len(tokens_intro)] == tokens_intro
+ # and make sure we get the right body tokens even though the script is
+ # unclosed
+ assert tokens[len(tokens_intro):-1] == tokens_body * reps
+ # and of course, the newline we get for free from get_tokens
+ assert tokens[-1] == (Token.Text.Whitespace, "\n")
+
+
+def test_long_unclosed_css_fragment(lexer_html):
+ """unclosed, long CSS fragments should parse quickly"""
+
+ reps = 2000
+ fragment = "<style>"+".ui-helper-hidden{display:none}"*reps
+ start_time = time.time()
+ tokens = list(lexer_html.get_tokens(fragment))
+ assert time.time() - start_time < MAX_HL_TIME, \
+ 'The HTML lexer might have an expensive error style case'
+
+ tokens_intro = [
+ (Token.Punctuation, '<'),
+ (Token.Name.Tag, 'style'),
+ (Token.Punctuation, '>'),
+ ]
+ tokens_body = [
+ (Token.Punctuation, '.'),
+ (Token.Name.Class, 'ui-helper-hidden'),
+ (Token.Punctuation, '{'),
+ (Token.Keyword, 'display'),
+ (Token.Punctuation, ':'),
+ (Token.Keyword.Constant, 'none'),
+ (Token.Punctuation, '}'),
+ ]
+
+ # make sure we get the right opening tokens
+ assert tokens[:len(tokens_intro)] == tokens_intro
+ # and make sure we get the right body tokens even though the style block is
+ # unclosed
+ assert tokens[len(tokens_intro):-1] == tokens_body * reps
+ # and of course, the newline we get for free from get_tokens
+ assert tokens[-1] == (Token.Text.Whitespace, "\n")
+
+
+def test_unclosed_fragment_with_newline_recovery(lexer_html):
+ """unclosed Javascript fragments should recover on the next line"""
+
+ fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*20+"\n<div>hi</div>"
+ tokens = list(lexer_html.get_tokens(fragment))
+ recovery_tokens = [
+ (Token.Punctuation, '<'),
+ (Token.Name.Tag, 'div'),
+ (Token.Punctuation, '>'),
+ (Token.Text, 'hi'),
+ (Token.Punctuation, '<'),
+ (Token.Punctuation, '/'),
+ (Token.Name.Tag, 'div'),
+ (Token.Punctuation, '>'),
+ (Token.Text, '\n'),
+ ]
+ assert tokens[-1*len(recovery_tokens):] == recovery_tokens