diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 11:33:32 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 11:33:32 +0000 |
commit | 1f403ad2197fc7442409f434ee574f3e6b46fb73 (patch) | |
tree | 0299c6dd11d5edfa918a29b6456bc1875f1d288c /tests/test_html_lexer.py | |
parent | Initial commit. (diff) | |
download | pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.tar.xz pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.zip |
Adding upstream version 2.14.0+dfsg.upstream/2.14.0+dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests/test_html_lexer.py')
-rw-r--r-- | tests/test_html_lexer.py | 131 |
1 files changed, 131 insertions, 0 deletions
diff --git a/tests/test_html_lexer.py b/tests/test_html_lexer.py new file mode 100644 index 0000000..fe99149 --- /dev/null +++ b/tests/test_html_lexer.py @@ -0,0 +1,131 @@ +""" + HTML Lexer Tests + ~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import time + +import pytest + +from pygments.lexers.html import HtmlLexer +from pygments.token import Token + +MAX_HL_TIME = 10 + + +@pytest.fixture(scope='module') +def lexer_html(): + yield HtmlLexer() + + +def test_happy_javascript_fragment(lexer_html): + """valid, even long Javascript fragments should still get parsed ok""" + + fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*2000+"</script>" + start_time = time.time() + tokens = list(lexer_html.get_tokens(fragment)) + assert all(x[1] != Token.Error for x in tokens) + assert time.time() - start_time < MAX_HL_TIME, \ + 'The HTML lexer might have an expensive happy-path script case' + + +def test_happy_css_fragment(lexer_html): + """valid, even long CSS fragments should still get parsed ok""" + + fragment = "<style>"+".ui-helper-hidden{display:none}"*2000+"</style>" + start_time = time.time() + tokens = list(lexer_html.get_tokens(fragment)) + assert all(x[1] != Token.Error for x in tokens) + assert time.time() - start_time < MAX_HL_TIME, \ + 'The HTML lexer might have an expensive happy-path style case' + + +def test_long_unclosed_javascript_fragment(lexer_html): + """unclosed, long Javascript fragments should parse quickly""" + + reps = 2000 + fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*reps + start_time = time.time() + tokens = list(lexer_html.get_tokens(fragment)) + assert time.time() - start_time < MAX_HL_TIME, \ + 'The HTML lexer might have an expensive error script case' + tokens_intro = [ + (Token.Punctuation, '<'), + (Token.Name.Tag, 'script'), + (Token.Text, ' '), + (Token.Name.Attribute, 'type'), + (Token.Operator, '='), + (Token.Literal.String, '"text/javascript"'), + (Token.Punctuation, '>'), + ] + tokens_body = [ + (Token.Name.Other, 'alert'), + (Token.Punctuation, '('), + (Token.Literal.String.Double, '"hi"'), + (Token.Punctuation, ')'), + (Token.Punctuation, ';'), + ] + + # make sure we get the right opening tokens + assert tokens[:len(tokens_intro)] == tokens_intro + # and make sure we get the right body tokens even though the script is + # unclosed + assert tokens[len(tokens_intro):-1] == tokens_body * reps + # and of course, the newline we get for free from get_tokens + assert tokens[-1] == (Token.Text.Whitespace, "\n") + + +def test_long_unclosed_css_fragment(lexer_html): + """unclosed, long CSS fragments should parse quickly""" + + reps = 2000 + fragment = "<style>"+".ui-helper-hidden{display:none}"*reps + start_time = time.time() + tokens = list(lexer_html.get_tokens(fragment)) + assert time.time() - start_time < MAX_HL_TIME, \ + 'The HTML lexer might have an expensive error style case' + + tokens_intro = [ + (Token.Punctuation, '<'), + (Token.Name.Tag, 'style'), + (Token.Punctuation, '>'), + ] + tokens_body = [ + (Token.Punctuation, '.'), + (Token.Name.Class, 'ui-helper-hidden'), + (Token.Punctuation, '{'), + (Token.Keyword, 'display'), + (Token.Punctuation, ':'), + (Token.Keyword.Constant, 'none'), + (Token.Punctuation, '}'), + ] + + # make sure we get the right opening tokens + assert tokens[:len(tokens_intro)] == tokens_intro + # and make sure we get the right body tokens even though the style block is + # unclosed + assert tokens[len(tokens_intro):-1] == tokens_body * reps + # and of course, the newline we get for free from get_tokens + assert tokens[-1] == (Token.Text.Whitespace, "\n") + + +def test_unclosed_fragment_with_newline_recovery(lexer_html): + """unclosed Javascript fragments should recover on the next line""" + + fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*20+"\n<div>hi</div>" + tokens = list(lexer_html.get_tokens(fragment)) + recovery_tokens = [ + (Token.Punctuation, '<'), + (Token.Name.Tag, 'div'), + (Token.Punctuation, '>'), + (Token.Text, 'hi'), + (Token.Punctuation, '<'), + (Token.Punctuation, '/'), + (Token.Name.Tag, 'div'), + (Token.Punctuation, '>'), + (Token.Text, '\n'), + ] + assert tokens[-1*len(recovery_tokens):] == recovery_tokens |