diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-29 04:25:33 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-29 04:25:33 +0000 |
commit | 3c33e01482cb0481e2472ee49fa55b0d7f818c26 (patch) | |
tree | e1bc734976912ad573bb83e8c338bc3285afe50e /tests | |
parent | Initial commit. (diff) | |
download | mdurl-upstream.tar.xz mdurl-upstream.zip |
Adding upstream version 0.1.2.upstream/0.1.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/__init__.py | 0 | ||||
-rw-r--r-- | tests/decode.js | 123 | ||||
-rw-r--r-- | tests/fixtures/__init__.py | 0 | ||||
-rw-r--r-- | tests/fixtures/url.py | 610 | ||||
-rw-r--r-- | tests/requirements.txt | 3 | ||||
-rw-r--r-- | tests/test_decode.py | 5 | ||||
-rw-r--r-- | tests/test_encode.py | 50 | ||||
-rw-r--r-- | tests/test_format.py | 10 | ||||
-rw-r--r-- | tests/test_parse.py | 26 |
9 files changed, 827 insertions, 0 deletions
diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/__init__.py diff --git a/tests/decode.js b/tests/decode.js new file mode 100644 index 0000000..c9457ba --- /dev/null +++ b/tests/decode.js @@ -0,0 +1,123 @@ +// TODO: port to Python +'use strict'; + + +var assert = require('assert'); +var decode = require('../decode'); + +function encodeBinary(str) { + var result = ''; + + str = str.replace(/\s+/g, ''); + while (str.length) { + result = '%' + ('0' + parseInt(str.slice(-8), 2).toString(16)).slice(-2) + result; + str = str.slice(0, -8); + } + + return result; +} + +var samples = { + '00000000': true, + '01010101': true, + '01111111': true, + + // invalid as 1st byte + '10000000': true, + '10111111': true, + + // invalid sequences, 2nd byte should be >= 0x80 + '11000111 01010101': false, + '11100011 01010101': false, + '11110001 01010101': false, + + // invalid sequences, 2nd byte should be < 0xc0 + '11000111 11000000': false, + '11100011 11000000': false, + '11110001 11000000': false, + + // invalid 3rd byte + '11100011 10010101 01010101': false, + '11110001 10010101 01010101': false, + + // invalid 4th byte + '11110001 10010101 10010101 01010101': false, + + // valid sequences + '11000111 10101010': true, + '11100011 10101010 10101010': true, + '11110001 10101010 10101010 10101010': true, + + // minimal chars with given length + '11000010 10000000': true, + '11100000 10100000 10000000': true, + + // impossible sequences + '11000001 10111111': false, + '11100000 10011111 10111111': false, + '11000001 10000000': false, + '11100000 10010000 10000000': false, + + // maximum chars with given length + '11011111 10111111': true, + '11101111 10111111 10111111': true, + + '11110000 10010000 10000000 10000000': true, + '11110000 10010000 10001111 10001111': true, + '11110100 10001111 10110000 10000000': true, + '11110100 10001111 10111111 10111111': true, + + // too low + '11110000 10001111 10111111 10111111': false, + + // too high + '11110100 10010000 10000000 10000000': false, + '11110100 10011111 10111111 10111111': false, + + // surrogate range + '11101101 10011111 10111111': true, + '11101101 10100000 10000000': false, + '11101101 10111111 10111111': false, + '11101110 10000000 10000000': true +}; + +describe('decode', function() { + it('should decode %xx', function() { + assert.equal(decode('x%20xx%20%2520'), 'x xx %20'); + }); + + it('should not decode invalid sequences', function() { + assert.equal(decode('%2g%z1%%'), '%2g%z1%%'); + }); + + it('should not decode reservedSet', function() { + assert.equal(decode('%20%25%20', '%'), ' %25 '); + assert.equal(decode('%20%25%20', ' '), '%20%%20'); + assert.equal(decode('%20%25%20', ' %'), '%20%25%20'); + }); + + describe('utf8', function() { + Object.keys(samples).forEach(function(k) { + it(k, function() { + var res1, res2, + er = null, + str = encodeBinary(k); + + try { + res1 = decodeURIComponent(str); + } catch(e) { + er = e; + } + + res2 = decode(str); + + if (er) { + assert.notEqual(res2.indexOf('\ufffd'), -1); + } else { + assert.equal(res1, res2); + assert.equal(res2.indexOf('\ufffd'), -1); + } + }); + }); + }); +}); diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/fixtures/__init__.py diff --git a/tests/fixtures/url.py b/tests/fixtures/url.py new file mode 100644 index 0000000..29431ec --- /dev/null +++ b/tests/fixtures/url.py @@ -0,0 +1,610 @@ +# Copyright Joyent, Inc. and other Node contributors. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the +# following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +# USE OR OTHER DEALINGS IN THE SOFTWARE. + + +# URLs to parse, and expected data +# { url : parsed } +PARSED = { + "//some_path": {"pathname": "//some_path"}, + "HTTP://www.example.com/": { + "protocol": "HTTP:", + "slashes": True, + "hostname": "www.example.com", + "pathname": "/", + }, + "HTTP://www.example.com": { + "protocol": "HTTP:", + "slashes": True, + "hostname": "www.example.com", + "pathname": "", + }, + "http://www.ExAmPlE.com/": { + "protocol": "http:", + "slashes": True, + "hostname": "www.ExAmPlE.com", + "pathname": "/", + }, + "http://user:pw@www.ExAmPlE.com/": { + "protocol": "http:", + "slashes": True, + "auth": "user:pw", + "hostname": "www.ExAmPlE.com", + "pathname": "/", + }, + "http://USER:PW@www.ExAmPlE.com/": { + "protocol": "http:", + "slashes": True, + "auth": "USER:PW", + "hostname": "www.ExAmPlE.com", + "pathname": "/", + }, + "http://user@www.example.com/": { + "protocol": "http:", + "slashes": True, + "auth": "user", + "hostname": "www.example.com", + "pathname": "/", + }, + "http://user%3Apw@www.example.com/": { + "protocol": "http:", + "slashes": True, + "auth": "user%3Apw", + "hostname": "www.example.com", + "pathname": "/", + }, + "http://x.com/path?that's#all, folks": { + "protocol": "http:", + "hostname": "x.com", + "slashes": True, + "search": "?that's", + "pathname": "/path", + "hash": "#all, folks", + }, + "HTTP://X.COM/Y": { + "protocol": "HTTP:", + "slashes": True, + "hostname": "X.COM", + "pathname": "/Y", + }, + # + not an invalid host character + # per https://url.spec.whatwg.org/#host-parsing + "http://x.y.com+a/b/c": { + "protocol": "http:", + "slashes": True, + "hostname": "x.y.com+a", + "pathname": "/b/c", + }, + # an unexpected invalid char in the hostname. + "HtTp://x.y.cOm;a/b/c?d=e#f g<h>i": { + "protocol": "HtTp:", + "slashes": True, + "hostname": "x.y.cOm", + "pathname": ";a/b/c", + "search": "?d=e", + "hash": "#f g<h>i", + }, + # make sure that we don't accidentally lcast the path parts. + "HtTp://x.y.cOm;A/b/c?d=e#f g<h>i": { + "protocol": "HtTp:", + "slashes": True, + "hostname": "x.y.cOm", + "pathname": ";A/b/c", + "search": "?d=e", + "hash": "#f g<h>i", + }, + "http://x...y...#p": { + "protocol": "http:", + "slashes": True, + "hostname": "x...y...", + "hash": "#p", + "pathname": "", + }, + 'http://x/p/"quoted"': { + "protocol": "http:", + "slashes": True, + "hostname": "x", + "pathname": '/p/"quoted"', + }, + "<http://goo.corn/bread> Is a URL!": { + "pathname": "<http://goo.corn/bread> Is a URL!" + }, + "http://www.narwhaljs.org/blog/categories?id=news": { + "protocol": "http:", + "slashes": True, + "hostname": "www.narwhaljs.org", + "search": "?id=news", + "pathname": "/blog/categories", + }, + "http://mt0.google.com/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=": { + "protocol": "http:", + "slashes": True, + "hostname": "mt0.google.com", + "pathname": "/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=", + }, + "http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=": { + "protocol": "http:", + "slashes": True, + "hostname": "mt0.google.com", + "search": "???&hl=en&src=api&x=2&y=2&z=3&s=", + "pathname": "/vt/lyrs=m@114", + }, + "http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=": { + "protocol": "http:", + "slashes": True, + "auth": "user:pass", + "hostname": "mt0.google.com", + "search": "???&hl=en&src=api&x=2&y=2&z=3&s=", + "pathname": "/vt/lyrs=m@114", + }, + "file:///etc/passwd": { + "slashes": True, + "protocol": "file:", + "pathname": "/etc/passwd", + "hostname": "", + }, + "file://localhost/etc/passwd": { + "protocol": "file:", + "slashes": True, + "pathname": "/etc/passwd", + "hostname": "localhost", + }, + "file://foo/etc/passwd": { + "protocol": "file:", + "slashes": True, + "pathname": "/etc/passwd", + "hostname": "foo", + }, + "file:///etc/node/": { + "slashes": True, + "protocol": "file:", + "pathname": "/etc/node/", + "hostname": "", + }, + "file://localhost/etc/node/": { + "protocol": "file:", + "slashes": True, + "pathname": "/etc/node/", + "hostname": "localhost", + }, + "file://foo/etc/node/": { + "protocol": "file:", + "slashes": True, + "pathname": "/etc/node/", + "hostname": "foo", + }, + "http:/baz/../foo/bar": {"protocol": "http:", "pathname": "/baz/../foo/bar"}, + "http://user:pass@example.com:8000/foo/bar?baz=quux#frag": { + "protocol": "http:", + "slashes": True, + "auth": "user:pass", + "port": "8000", + "hostname": "example.com", + "hash": "#frag", + "search": "?baz=quux", + "pathname": "/foo/bar", + }, + "//user:pass@example.com:8000/foo/bar?baz=quux#frag": { + "slashes": True, + "auth": "user:pass", + "port": "8000", + "hostname": "example.com", + "hash": "#frag", + "search": "?baz=quux", + "pathname": "/foo/bar", + }, + "/foo/bar?baz=quux#frag": { + "hash": "#frag", + "search": "?baz=quux", + "pathname": "/foo/bar", + }, + "http:/foo/bar?baz=quux#frag": { + "protocol": "http:", + "hash": "#frag", + "search": "?baz=quux", + "pathname": "/foo/bar", + }, + "mailto:foo@bar.com?subject=hello": { + "protocol": "mailto:", + "auth": "foo", + "hostname": "bar.com", + "search": "?subject=hello", + }, + "javascript:alert('hello');": { + "protocol": "javascript:", + "pathname": "alert('hello');", + }, + "xmpp:isaacschlueter@jabber.org": { + "protocol": "xmpp:", + "auth": "isaacschlueter", + "hostname": "jabber.org", + }, + "http://atpass:foo%40bar@127.0.0.1:8080/path?search=foo#bar": { + "protocol": "http:", + "slashes": True, + "auth": "atpass:foo%40bar", + "hostname": "127.0.0.1", + "port": "8080", + "pathname": "/path", + "search": "?search=foo", + "hash": "#bar", + }, + "svn+ssh://foo/bar": { + "hostname": "foo", + "protocol": "svn+ssh:", + "pathname": "/bar", + "slashes": True, + }, + "dash-test://foo/bar": { + "hostname": "foo", + "protocol": "dash-test:", + "pathname": "/bar", + "slashes": True, + }, + "dash-test:foo/bar": { + "hostname": "foo", + "protocol": "dash-test:", + "pathname": "/bar", + }, + "dot.test://foo/bar": { + "hostname": "foo", + "protocol": "dot.test:", + "pathname": "/bar", + "slashes": True, + }, + "dot.test:foo/bar": { + "hostname": "foo", + "protocol": "dot.test:", + "pathname": "/bar", + }, + # IDNA tests + "http://www.日本語.com/": { + "protocol": "http:", + "slashes": True, + "hostname": "www.日本語.com", + "pathname": "/", + }, + "http://example.Bücher.com/": { + "protocol": "http:", + "slashes": True, + "hostname": "example.Bücher.com", + "pathname": "/", + }, + "http://www.Äffchen.com/": { + "protocol": "http:", + "slashes": True, + "hostname": "www.Äffchen.com", + "pathname": "/", + }, + "http://www.Äffchen.cOm;A/b/c?d=e#f g<h>i": { + "protocol": "http:", + "slashes": True, + "hostname": "www.Äffchen.cOm", + "pathname": ";A/b/c", + "search": "?d=e", + "hash": "#f g<h>i", + }, + "http://SÉLIER.COM/": { + "protocol": "http:", + "slashes": True, + "hostname": "SÉLIER.COM", + "pathname": "/", + }, + "http://ليهمابتكلموشعربي؟.ي؟/": { + "protocol": "http:", + "slashes": True, + "hostname": "ليهمابتكلموشعربي؟.ي؟", + "pathname": "/", + }, + "http://➡.ws/➡": { + "protocol": "http:", + "slashes": True, + "hostname": "➡.ws", + "pathname": "/➡", + }, + "http://bucket_name.s3.amazonaws.com/image.jpg": { + "protocol": "http:", + "slashes": True, + "hostname": "bucket_name.s3.amazonaws.com", + "pathname": "/image.jpg", + }, + "git+http://github.com/joyent/node.git": { + "protocol": "git+http:", + "slashes": True, + "hostname": "github.com", + "pathname": "/joyent/node.git", + }, + # if local1@domain1 is uses as a relative URL it may + # be parse into auth@hostname, but here there is no + # way to make it work in url.parse, I add the test to be explicit + "local1@domain1": {"pathname": "local1@domain1"}, + # While this may seem counter-intuitive, a browser will parse + # <a href='www.google.com'> as a path. + "www.example.com": {"pathname": "www.example.com"}, + # ipv6 support + "[fe80::1]": {"pathname": "[fe80::1]"}, + "coap://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]": { + "protocol": "coap:", + "slashes": True, + "hostname": "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210", + }, + "coap://[1080:0:0:0:8:800:200C:417A]:61616/": { + "protocol": "coap:", + "slashes": True, + "port": "61616", + "hostname": "1080:0:0:0:8:800:200C:417A", + "pathname": "/", + }, + "http://user:password@[3ffe:2a00:100:7031::1]:8080": { + "protocol": "http:", + "slashes": True, + "auth": "user:password", + "port": "8080", + "hostname": "3ffe:2a00:100:7031::1", + "pathname": "", + }, + "coap://u:p@[::192.9.5.5]:61616/.well-known/r?n=Temperature": { + "protocol": "coap:", + "slashes": True, + "auth": "u:p", + "port": "61616", + "hostname": "::192.9.5.5", + "search": "?n=Temperature", + "pathname": "/.well-known/r", + }, + # empty port + "http://example.com:": { + "protocol": "http:", + "slashes": True, + "hostname": "example.com", + "pathname": ":", + }, + "http://example.com:/a/b.html": { + "protocol": "http:", + "slashes": True, + "hostname": "example.com", + "pathname": ":/a/b.html", + }, + "http://example.com:?a=b": { + "protocol": "http:", + "slashes": True, + "hostname": "example.com", + "search": "?a=b", + "pathname": ":", + }, + "http://example.com:#abc": { + "protocol": "http:", + "slashes": True, + "hostname": "example.com", + "hash": "#abc", + "pathname": ":", + }, + "http://[fe80::1]:/a/b?a=b#abc": { + "protocol": "http:", + "slashes": True, + "hostname": "fe80::1", + "search": "?a=b", + "hash": "#abc", + "pathname": ":/a/b", + }, + "http://-lovemonsterz.tumblr.com/rss": { + "protocol": "http:", + "slashes": True, + "hostname": "-lovemonsterz.tumblr.com", + "pathname": "/rss", + }, + "http://-lovemonsterz.tumblr.com:80/rss": { + "protocol": "http:", + "slashes": True, + "port": "80", + "hostname": "-lovemonsterz.tumblr.com", + "pathname": "/rss", + }, + "http://user:pass@-lovemonsterz.tumblr.com/rss": { + "protocol": "http:", + "slashes": True, + "auth": "user:pass", + "hostname": "-lovemonsterz.tumblr.com", + "pathname": "/rss", + }, + "http://user:pass@-lovemonsterz.tumblr.com:80/rss": { + "protocol": "http:", + "slashes": True, + "auth": "user:pass", + "port": "80", + "hostname": "-lovemonsterz.tumblr.com", + "pathname": "/rss", + }, + "http://_jabber._tcp.google.com/test": { + "protocol": "http:", + "slashes": True, + "hostname": "_jabber._tcp.google.com", + "pathname": "/test", + }, + "http://user:pass@_jabber._tcp.google.com/test": { + "protocol": "http:", + "slashes": True, + "auth": "user:pass", + "hostname": "_jabber._tcp.google.com", + "pathname": "/test", + }, + "http://_jabber._tcp.google.com:80/test": { + "protocol": "http:", + "slashes": True, + "port": "80", + "hostname": "_jabber._tcp.google.com", + "pathname": "/test", + }, + "http://user:pass@_jabber._tcp.google.com:80/test": { + "protocol": "http:", + "slashes": True, + "auth": "user:pass", + "port": "80", + "hostname": "_jabber._tcp.google.com", + "pathname": "/test", + }, + "http://x:1/' <>\"`/{}|\\^~`/": { + "protocol": "http:", + "slashes": True, + "port": "1", + "hostname": "x", + "pathname": "/' <>\"`/{}|\\^~`/", + }, + "http://a@b@c/": { + "protocol": "http:", + "slashes": True, + "auth": "a@b", + "hostname": "c", + "pathname": "/", + }, + "http://a@b?@c": { + "protocol": "http:", + "slashes": True, + "auth": "a", + "hostname": "b", + "pathname": "", + "search": "?@c", + }, + "http://a\r\" \t\n<'b:b@c\r\nd/e?f": { + "protocol": "http:", + "slashes": True, + "auth": "a\r\" \t\n<'b:b", + "hostname": "c", + "search": "?f", + "pathname": "\r\nd/e", + }, + # git urls used by npm + "git+ssh://git@github.com:npm/npm": { + "protocol": "git+ssh:", + "slashes": True, + "auth": "git", + "hostname": "github.com", + "pathname": ":npm/npm", + }, + "http://example.com?foo=bar#frag": { + "protocol": "http:", + "slashes": True, + "hostname": "example.com", + "hash": "#frag", + "search": "?foo=bar", + "pathname": "", + }, + "http://example.com?foo=@bar#frag": { + "protocol": "http:", + "slashes": True, + "hostname": "example.com", + "hash": "#frag", + "search": "?foo=@bar", + "pathname": "", + }, + "http://example.com?foo=/bar/#frag": { + "protocol": "http:", + "slashes": True, + "hostname": "example.com", + "hash": "#frag", + "search": "?foo=/bar/", + "pathname": "", + }, + "http://example.com?foo=?bar/#frag": { + "protocol": "http:", + "slashes": True, + "hostname": "example.com", + "hash": "#frag", + "search": "?foo=?bar/", + "pathname": "", + }, + "http://example.com#frag=?bar/#frag": { + "protocol": "http:", + "slashes": True, + "hostname": "example.com", + "hash": "#frag=?bar/#frag", + "pathname": "", + }, + 'http://google.com" onload="alert(42)/': { + "hostname": "google.com", + "protocol": "http:", + "slashes": True, + "pathname": '" onload="alert(42)/', + }, + "http://a.com/a/b/c?s#h": { + "protocol": "http:", + "slashes": True, + "pathname": "/a/b/c", + "hostname": "a.com", + "hash": "#h", + "search": "?s", + }, + "http://atpass:foo%40bar@127.0.0.1/": { + "auth": "atpass:foo%40bar", + "slashes": True, + "hostname": "127.0.0.1", + "protocol": "http:", + "pathname": "/", + }, + "http://atslash%2F%40:%2F%40@foo/": { + "auth": "atslash%2F%40:%2F%40", + "hostname": "foo", + "protocol": "http:", + "pathname": "/", + "slashes": True, + }, + # ipv6 support + "coap:u:p@[::1]:61616/.well-known/r?n=Temperature": { + "protocol": "coap:", + "auth": "u:p", + "hostname": "::1", + "port": "61616", + "pathname": "/.well-known/r", + "search": "?n=Temperature", + }, + "coap:[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:61616/s/stopButton": { + "hostname": "fedc:ba98:7654:3210:fedc:ba98:7654:3210", + "port": "61616", + "protocol": "coap:", + "pathname": "/s/stopButton", + }, + # encode context-specific delimiters in path and query, but do not touch + # other non-delimiter chars like `%`. + # <https://github.com/joyent/node/issues/4082> + # `?` and `#` in path and search + "http://ex.com/foo%3F100%m%23r?abc=the%231?&foo=bar#frag": { + "protocol": "http:", + "hostname": "ex.com", + "hash": "#frag", + "search": "?abc=the%231?&foo=bar", + "pathname": "/foo%3F100%m%23r", + "slashes": True, + }, + # `?` and `#` in search only + "http://ex.com/fooA100%mBr?abc=the%231?&foo=bar#frag": { + "protocol": "http:", + "hostname": "ex.com", + "hash": "#frag", + "search": "?abc=the%231?&foo=bar", + "pathname": "/fooA100%mBr", + "slashes": True, + }, + # + "http://": { + "protocol": "http:", + "hostname": "", + "slashes": True, + }, +} diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..6f05550 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,3 @@ +pytest +pytest-randomly +pytest-cov diff --git a/tests/test_decode.py b/tests/test_decode.py new file mode 100644 index 0000000..bc58ce0 --- /dev/null +++ b/tests/test_decode.py @@ -0,0 +1,5 @@ +from mdurl import decode + + +def test_decode_multi_byte(): + assert decode("https://host.invalid/%F0%9F%91%A9") == "https://host.invalid/👩" diff --git a/tests/test_encode.py b/tests/test_encode.py new file mode 100644 index 0000000..7414bac --- /dev/null +++ b/tests/test_encode.py @@ -0,0 +1,50 @@ +import pytest + +from mdurl import encode + + +@pytest.mark.parametrize( + "input_,expected", + [ + pytest.param("%%%", "%25%25%25", id="should encode percent"), + pytest.param("\r\n", "%0D%0A", id="should encode control chars"), + pytest.param("?#", "?#", id="should not encode parts of an url"), + pytest.param("[]^", "%5B%5D%5E", id="should not encode []^ - commonmark tests"), + pytest.param("my url", "my%20url", id="should encode spaces"), + pytest.param("φου", "%CF%86%CE%BF%CF%85", id="should encode unicode"), + pytest.param( + "%FG", "%25FG", id="should encode % if it doesn't start a valid escape seq" + ), + pytest.param( + "%00%FF", "%00%FF", id="should preserve non-utf8 encoded characters" + ), + pytest.param( + "\x00\x7F\x80", + "%00%7F%C2%80", + id="should encode characters on the cache borders", + ), # protects against off-by-one in cache implementation + ], +) +def test_encode(input_, expected): + assert encode(input_) == expected + + +def test_encode_arguments(): + assert encode("!@#$", exclude="@$") == "%21@%23$" + assert encode("%20%2G", keep_escaped=True) == "%20%252G" + assert encode("%20%2G", keep_escaped=False) == "%2520%252G" + assert encode("!@%25", exclude="@", keep_escaped=False) == "%21@%2525" + + +def test_encode_surrogates(): + # bad surrogates (high) + assert encode("\uD800foo") == "%EF%BF%BDfoo" + assert encode("foo\uD800") == "foo%EF%BF%BD" + + # bad surrogates (low) + assert encode("\uDD00foo") == "%EF%BF%BDfoo" + assert encode("foo\uDD00") == "foo%EF%BF%BD" + + # valid one + # (the codepoint is "D800 DD00" in UTF-16BE) + assert encode("𐄀") == "%F0%90%84%80" diff --git a/tests/test_format.py b/tests/test_format.py new file mode 100644 index 0000000..0cf1219 --- /dev/null +++ b/tests/test_format.py @@ -0,0 +1,10 @@ +import pytest + +from mdurl import format, parse +from tests.fixtures.url import PARSED as FIXTURES + + +@pytest.mark.parametrize("url", FIXTURES.keys()) +def test_format(url): + parsed = parse(url) + assert format(parsed) == url diff --git a/tests/test_parse.py b/tests/test_parse.py new file mode 100644 index 0000000..aa4ae44 --- /dev/null +++ b/tests/test_parse.py @@ -0,0 +1,26 @@ +import pytest + +from mdurl import parse +from tests.fixtures.url import PARSED as FIXTURES + + +def is_url_and_dict_equal(url, url_dict): + return ( + url.protocol == url_dict.get("protocol") + and url.slashes == url_dict.get("slashes", False) + and url.auth == url_dict.get("auth") + and url.port == url_dict.get("port") + and url.hostname == url_dict.get("hostname") + and url.hash == url_dict.get("hash") + and url.search == url_dict.get("search") + and url.pathname == url_dict.get("pathname") + ) + + +@pytest.mark.parametrize( + "url,expected_dict", + FIXTURES.items(), +) +def test_parse(url, expected_dict): + parsed = parse(url) + assert is_url_and_dict_equal(parsed, expected_dict) |