Adding upstream version 2.14.0+dfsg.upstream/2.14.0+dfsg upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 11:33:32 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 11:33:32 +0000
commit: 1f403ad2197fc7442409f434ee574f3e6b46fb73 (patch)
tree: 0299c6dd11d5edfa918a29b6456bc1875f1d288c /tests/test_html_lexer.py
parent: Initial commit. (diff)
download: pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.tar.xz
pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.zip
1 files changed, 131 insertions, 0 deletions
diff --git a/tests/test_html_lexer.py b/tests/test_html_lexer.py
new file mode 100644
index 0000000..fe99149
--- /dev/null
+++ b/tests/test_html_lexer.py
@@ -0,0 +1,131 @@
+"""
+    HTML Lexer Tests
+    ~~~~~~~~~~~~~~~~
+
+    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import time
+
+import pytest
+
+from pygments.lexers.html import HtmlLexer
+from pygments.token import Token
+
+MAX_HL_TIME = 10
+
+
+@pytest.fixture(scope='module')
+def lexer_html():
+    yield HtmlLexer()
+
+
+def test_happy_javascript_fragment(lexer_html):
+    """valid, even long Javascript fragments should still get parsed ok"""
+
+    fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*2000+"</script>"
+    start_time = time.time()
+    tokens = list(lexer_html.get_tokens(fragment))
+    assert all(x[1] != Token.Error for x in tokens)
+    assert time.time() - start_time < MAX_HL_TIME, \
+        'The HTML lexer might have an expensive happy-path script case'
+
+
+def test_happy_css_fragment(lexer_html):
+    """valid, even long CSS fragments should still get parsed ok"""
+
+    fragment = "<style>"+".ui-helper-hidden{display:none}"*2000+"</style>"
+    start_time = time.time()
+    tokens = list(lexer_html.get_tokens(fragment))
+    assert all(x[1] != Token.Error for x in tokens)
+    assert time.time() - start_time < MAX_HL_TIME, \
+        'The HTML lexer might have an expensive happy-path style case'
+
+
+def test_long_unclosed_javascript_fragment(lexer_html):
+    """unclosed, long Javascript fragments should parse quickly"""
+
+    reps = 2000
+    fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*reps
+    start_time = time.time()
+    tokens = list(lexer_html.get_tokens(fragment))
+    assert time.time() - start_time < MAX_HL_TIME, \
+        'The HTML lexer might have an expensive error script case'
+    tokens_intro = [
+        (Token.Punctuation, '<'),
+        (Token.Name.Tag, 'script'),
+        (Token.Text, ' '),
+        (Token.Name.Attribute, 'type'),
+        (Token.Operator, '='),
+        (Token.Literal.String, '"text/javascript"'),
+        (Token.Punctuation, '>'),
+    ]
+    tokens_body = [
+        (Token.Name.Other, 'alert'),
+        (Token.Punctuation, '('),
+        (Token.Literal.String.Double, '"hi"'),
+        (Token.Punctuation, ')'),
+        (Token.Punctuation, ';'),
+    ]
+
+    # make sure we get the right opening tokens
+    assert tokens[:len(tokens_intro)] == tokens_intro
+    # and make sure we get the right body tokens even though the script is
+    # unclosed
+    assert tokens[len(tokens_intro):-1] == tokens_body * reps
+    # and of course, the newline we get for free from get_tokens
+    assert tokens[-1] == (Token.Text.Whitespace, "\n")
+
+
+def test_long_unclosed_css_fragment(lexer_html):
+    """unclosed, long CSS fragments should parse quickly"""
+
+    reps = 2000
+    fragment = "<style>"+".ui-helper-hidden{display:none}"*reps
+    start_time = time.time()
+    tokens = list(lexer_html.get_tokens(fragment))
+    assert time.time() - start_time < MAX_HL_TIME, \
+        'The HTML lexer might have an expensive error style case'
+
+    tokens_intro = [
+        (Token.Punctuation, '<'),
+        (Token.Name.Tag, 'style'),
+        (Token.Punctuation, '>'),
+    ]
+    tokens_body = [
+        (Token.Punctuation, '.'),
+        (Token.Name.Class, 'ui-helper-hidden'),
+        (Token.Punctuation, '{'),
+        (Token.Keyword, 'display'),
+        (Token.Punctuation, ':'),
+        (Token.Keyword.Constant, 'none'),
+        (Token.Punctuation, '}'),
+    ]
+
+    # make sure we get the right opening tokens
+    assert tokens[:len(tokens_intro)] == tokens_intro
+    # and make sure we get the right body tokens even though the style block is
+    # unclosed
+    assert tokens[len(tokens_intro):-1] == tokens_body * reps
+    # and of course, the newline we get for free from get_tokens
+    assert tokens[-1] == (Token.Text.Whitespace, "\n")
+
+
+def test_unclosed_fragment_with_newline_recovery(lexer_html):
+    """unclosed Javascript fragments should recover on the next line"""
+
+    fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*20+"\n<div>hi</div>"
+    tokens = list(lexer_html.get_tokens(fragment))
+    recovery_tokens = [
+        (Token.Punctuation, '<'),
+        (Token.Name.Tag, 'div'),
+        (Token.Punctuation, '>'),
+        (Token.Text, 'hi'),
+        (Token.Punctuation, '<'),
+        (Token.Punctuation, '/'),
+        (Token.Name.Tag, 'div'),
+        (Token.Punctuation, '>'),
+        (Token.Text, '\n'),
+    ]
+    assert tokens[-1*len(recovery_tokens):] == recovery_tokens
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 11:33:32 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 11:33:32 +0000
commit	1f403ad2197fc7442409f434ee574f3e6b46fb73 (patch)
tree	0299c6dd11d5edfa918a29b6456bc1875f1d288c /tests/test_html_lexer.py
parent	Initial commit. (diff)
download	pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.tar.xz pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.zip