diff options
Diffstat (limited to 'tests/test_encode.py')
-rw-r--r-- | tests/test_encode.py | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/tests/test_encode.py b/tests/test_encode.py new file mode 100644 index 0000000..7414bac --- /dev/null +++ b/tests/test_encode.py @@ -0,0 +1,50 @@ +import pytest + +from mdurl import encode + + +@pytest.mark.parametrize( + "input_,expected", + [ + pytest.param("%%%", "%25%25%25", id="should encode percent"), + pytest.param("\r\n", "%0D%0A", id="should encode control chars"), + pytest.param("?#", "?#", id="should not encode parts of an url"), + pytest.param("[]^", "%5B%5D%5E", id="should not encode []^ - commonmark tests"), + pytest.param("my url", "my%20url", id="should encode spaces"), + pytest.param("φου", "%CF%86%CE%BF%CF%85", id="should encode unicode"), + pytest.param( + "%FG", "%25FG", id="should encode % if it doesn't start a valid escape seq" + ), + pytest.param( + "%00%FF", "%00%FF", id="should preserve non-utf8 encoded characters" + ), + pytest.param( + "\x00\x7F\x80", + "%00%7F%C2%80", + id="should encode characters on the cache borders", + ), # protects against off-by-one in cache implementation + ], +) +def test_encode(input_, expected): + assert encode(input_) == expected + + +def test_encode_arguments(): + assert encode("!@#$", exclude="@$") == "%21@%23$" + assert encode("%20%2G", keep_escaped=True) == "%20%252G" + assert encode("%20%2G", keep_escaped=False) == "%2520%252G" + assert encode("!@%25", exclude="@", keep_escaped=False) == "%21@%2525" + + +def test_encode_surrogates(): + # bad surrogates (high) + assert encode("\uD800foo") == "%EF%BF%BDfoo" + assert encode("foo\uD800") == "foo%EF%BF%BD" + + # bad surrogates (low) + assert encode("\uDD00foo") == "%EF%BF%BDfoo" + assert encode("foo\uDD00") == "foo%EF%BF%BD" + + # valid one + # (the codepoint is "D800 DD00" in UTF-16BE) + assert encode("𐄀") == "%F0%90%84%80" |