summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:25:33 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:25:33 +0000
commit3c33e01482cb0481e2472ee49fa55b0d7f818c26 (patch)
treee1bc734976912ad573bb83e8c338bc3285afe50e /tests
parentInitial commit. (diff)
downloadmdurl-upstream.tar.xz
mdurl-upstream.zip
Adding upstream version 0.1.2.upstream/0.1.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests')
-rw-r--r--tests/__init__.py0
-rw-r--r--tests/decode.js123
-rw-r--r--tests/fixtures/__init__.py0
-rw-r--r--tests/fixtures/url.py610
-rw-r--r--tests/requirements.txt3
-rw-r--r--tests/test_decode.py5
-rw-r--r--tests/test_encode.py50
-rw-r--r--tests/test_format.py10
-rw-r--r--tests/test_parse.py26
9 files changed, 827 insertions, 0 deletions
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/__init__.py
diff --git a/tests/decode.js b/tests/decode.js
new file mode 100644
index 0000000..c9457ba
--- /dev/null
+++ b/tests/decode.js
@@ -0,0 +1,123 @@
+// TODO: port to Python
+'use strict';
+
+
+var assert = require('assert');
+var decode = require('../decode');
+
+function encodeBinary(str) {
+ var result = '';
+
+ str = str.replace(/\s+/g, '');
+ while (str.length) {
+ result = '%' + ('0' + parseInt(str.slice(-8), 2).toString(16)).slice(-2) + result;
+ str = str.slice(0, -8);
+ }
+
+ return result;
+}
+
+var samples = {
+ '00000000': true,
+ '01010101': true,
+ '01111111': true,
+
+ // invalid as 1st byte
+ '10000000': true,
+ '10111111': true,
+
+ // invalid sequences, 2nd byte should be >= 0x80
+ '11000111 01010101': false,
+ '11100011 01010101': false,
+ '11110001 01010101': false,
+
+ // invalid sequences, 2nd byte should be < 0xc0
+ '11000111 11000000': false,
+ '11100011 11000000': false,
+ '11110001 11000000': false,
+
+ // invalid 3rd byte
+ '11100011 10010101 01010101': false,
+ '11110001 10010101 01010101': false,
+
+ // invalid 4th byte
+ '11110001 10010101 10010101 01010101': false,
+
+ // valid sequences
+ '11000111 10101010': true,
+ '11100011 10101010 10101010': true,
+ '11110001 10101010 10101010 10101010': true,
+
+ // minimal chars with given length
+ '11000010 10000000': true,
+ '11100000 10100000 10000000': true,
+
+ // impossible sequences
+ '11000001 10111111': false,
+ '11100000 10011111 10111111': false,
+ '11000001 10000000': false,
+ '11100000 10010000 10000000': false,
+
+ // maximum chars with given length
+ '11011111 10111111': true,
+ '11101111 10111111 10111111': true,
+
+ '11110000 10010000 10000000 10000000': true,
+ '11110000 10010000 10001111 10001111': true,
+ '11110100 10001111 10110000 10000000': true,
+ '11110100 10001111 10111111 10111111': true,
+
+ // too low
+ '11110000 10001111 10111111 10111111': false,
+
+ // too high
+ '11110100 10010000 10000000 10000000': false,
+ '11110100 10011111 10111111 10111111': false,
+
+ // surrogate range
+ '11101101 10011111 10111111': true,
+ '11101101 10100000 10000000': false,
+ '11101101 10111111 10111111': false,
+ '11101110 10000000 10000000': true
+};
+
+describe('decode', function() {
+ it('should decode %xx', function() {
+ assert.equal(decode('x%20xx%20%2520'), 'x xx %20');
+ });
+
+ it('should not decode invalid sequences', function() {
+ assert.equal(decode('%2g%z1%%'), '%2g%z1%%');
+ });
+
+ it('should not decode reservedSet', function() {
+ assert.equal(decode('%20%25%20', '%'), ' %25 ');
+ assert.equal(decode('%20%25%20', ' '), '%20%%20');
+ assert.equal(decode('%20%25%20', ' %'), '%20%25%20');
+ });
+
+ describe('utf8', function() {
+ Object.keys(samples).forEach(function(k) {
+ it(k, function() {
+ var res1, res2,
+ er = null,
+ str = encodeBinary(k);
+
+ try {
+ res1 = decodeURIComponent(str);
+ } catch(e) {
+ er = e;
+ }
+
+ res2 = decode(str);
+
+ if (er) {
+ assert.notEqual(res2.indexOf('\ufffd'), -1);
+ } else {
+ assert.equal(res1, res2);
+ assert.equal(res2.indexOf('\ufffd'), -1);
+ }
+ });
+ });
+ });
+});
diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/fixtures/__init__.py
diff --git a/tests/fixtures/url.py b/tests/fixtures/url.py
new file mode 100644
index 0000000..29431ec
--- /dev/null
+++ b/tests/fixtures/url.py
@@ -0,0 +1,610 @@
+# Copyright Joyent, Inc. and other Node contributors.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the
+# following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+# USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+# URLs to parse, and expected data
+# { url : parsed }
+PARSED = {
+ "//some_path": {"pathname": "//some_path"},
+ "HTTP://www.example.com/": {
+ "protocol": "HTTP:",
+ "slashes": True,
+ "hostname": "www.example.com",
+ "pathname": "/",
+ },
+ "HTTP://www.example.com": {
+ "protocol": "HTTP:",
+ "slashes": True,
+ "hostname": "www.example.com",
+ "pathname": "",
+ },
+ "http://www.ExAmPlE.com/": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "www.ExAmPlE.com",
+ "pathname": "/",
+ },
+ "http://user:pw@www.ExAmPlE.com/": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "user:pw",
+ "hostname": "www.ExAmPlE.com",
+ "pathname": "/",
+ },
+ "http://USER:PW@www.ExAmPlE.com/": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "USER:PW",
+ "hostname": "www.ExAmPlE.com",
+ "pathname": "/",
+ },
+ "http://user@www.example.com/": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "user",
+ "hostname": "www.example.com",
+ "pathname": "/",
+ },
+ "http://user%3Apw@www.example.com/": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "user%3Apw",
+ "hostname": "www.example.com",
+ "pathname": "/",
+ },
+ "http://x.com/path?that's#all, folks": {
+ "protocol": "http:",
+ "hostname": "x.com",
+ "slashes": True,
+ "search": "?that's",
+ "pathname": "/path",
+ "hash": "#all, folks",
+ },
+ "HTTP://X.COM/Y": {
+ "protocol": "HTTP:",
+ "slashes": True,
+ "hostname": "X.COM",
+ "pathname": "/Y",
+ },
+ # + not an invalid host character
+ # per https://url.spec.whatwg.org/#host-parsing
+ "http://x.y.com+a/b/c": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "x.y.com+a",
+ "pathname": "/b/c",
+ },
+ # an unexpected invalid char in the hostname.
+ "HtTp://x.y.cOm;a/b/c?d=e#f g<h>i": {
+ "protocol": "HtTp:",
+ "slashes": True,
+ "hostname": "x.y.cOm",
+ "pathname": ";a/b/c",
+ "search": "?d=e",
+ "hash": "#f g<h>i",
+ },
+ # make sure that we don't accidentally lcast the path parts.
+ "HtTp://x.y.cOm;A/b/c?d=e#f g<h>i": {
+ "protocol": "HtTp:",
+ "slashes": True,
+ "hostname": "x.y.cOm",
+ "pathname": ";A/b/c",
+ "search": "?d=e",
+ "hash": "#f g<h>i",
+ },
+ "http://x...y...#p": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "x...y...",
+ "hash": "#p",
+ "pathname": "",
+ },
+ 'http://x/p/"quoted"': {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "x",
+ "pathname": '/p/"quoted"',
+ },
+ "<http://goo.corn/bread> Is a URL!": {
+ "pathname": "<http://goo.corn/bread> Is a URL!"
+ },
+ "http://www.narwhaljs.org/blog/categories?id=news": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "www.narwhaljs.org",
+ "search": "?id=news",
+ "pathname": "/blog/categories",
+ },
+ "http://mt0.google.com/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "mt0.google.com",
+ "pathname": "/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=",
+ },
+ "http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "mt0.google.com",
+ "search": "???&hl=en&src=api&x=2&y=2&z=3&s=",
+ "pathname": "/vt/lyrs=m@114",
+ },
+ "http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "user:pass",
+ "hostname": "mt0.google.com",
+ "search": "???&hl=en&src=api&x=2&y=2&z=3&s=",
+ "pathname": "/vt/lyrs=m@114",
+ },
+ "file:///etc/passwd": {
+ "slashes": True,
+ "protocol": "file:",
+ "pathname": "/etc/passwd",
+ "hostname": "",
+ },
+ "file://localhost/etc/passwd": {
+ "protocol": "file:",
+ "slashes": True,
+ "pathname": "/etc/passwd",
+ "hostname": "localhost",
+ },
+ "file://foo/etc/passwd": {
+ "protocol": "file:",
+ "slashes": True,
+ "pathname": "/etc/passwd",
+ "hostname": "foo",
+ },
+ "file:///etc/node/": {
+ "slashes": True,
+ "protocol": "file:",
+ "pathname": "/etc/node/",
+ "hostname": "",
+ },
+ "file://localhost/etc/node/": {
+ "protocol": "file:",
+ "slashes": True,
+ "pathname": "/etc/node/",
+ "hostname": "localhost",
+ },
+ "file://foo/etc/node/": {
+ "protocol": "file:",
+ "slashes": True,
+ "pathname": "/etc/node/",
+ "hostname": "foo",
+ },
+ "http:/baz/../foo/bar": {"protocol": "http:", "pathname": "/baz/../foo/bar"},
+ "http://user:pass@example.com:8000/foo/bar?baz=quux#frag": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "user:pass",
+ "port": "8000",
+ "hostname": "example.com",
+ "hash": "#frag",
+ "search": "?baz=quux",
+ "pathname": "/foo/bar",
+ },
+ "//user:pass@example.com:8000/foo/bar?baz=quux#frag": {
+ "slashes": True,
+ "auth": "user:pass",
+ "port": "8000",
+ "hostname": "example.com",
+ "hash": "#frag",
+ "search": "?baz=quux",
+ "pathname": "/foo/bar",
+ },
+ "/foo/bar?baz=quux#frag": {
+ "hash": "#frag",
+ "search": "?baz=quux",
+ "pathname": "/foo/bar",
+ },
+ "http:/foo/bar?baz=quux#frag": {
+ "protocol": "http:",
+ "hash": "#frag",
+ "search": "?baz=quux",
+ "pathname": "/foo/bar",
+ },
+ "mailto:foo@bar.com?subject=hello": {
+ "protocol": "mailto:",
+ "auth": "foo",
+ "hostname": "bar.com",
+ "search": "?subject=hello",
+ },
+ "javascript:alert('hello');": {
+ "protocol": "javascript:",
+ "pathname": "alert('hello');",
+ },
+ "xmpp:isaacschlueter@jabber.org": {
+ "protocol": "xmpp:",
+ "auth": "isaacschlueter",
+ "hostname": "jabber.org",
+ },
+ "http://atpass:foo%40bar@127.0.0.1:8080/path?search=foo#bar": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "atpass:foo%40bar",
+ "hostname": "127.0.0.1",
+ "port": "8080",
+ "pathname": "/path",
+ "search": "?search=foo",
+ "hash": "#bar",
+ },
+ "svn+ssh://foo/bar": {
+ "hostname": "foo",
+ "protocol": "svn+ssh:",
+ "pathname": "/bar",
+ "slashes": True,
+ },
+ "dash-test://foo/bar": {
+ "hostname": "foo",
+ "protocol": "dash-test:",
+ "pathname": "/bar",
+ "slashes": True,
+ },
+ "dash-test:foo/bar": {
+ "hostname": "foo",
+ "protocol": "dash-test:",
+ "pathname": "/bar",
+ },
+ "dot.test://foo/bar": {
+ "hostname": "foo",
+ "protocol": "dot.test:",
+ "pathname": "/bar",
+ "slashes": True,
+ },
+ "dot.test:foo/bar": {
+ "hostname": "foo",
+ "protocol": "dot.test:",
+ "pathname": "/bar",
+ },
+ # IDNA tests
+ "http://www.日本語.com/": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "www.日本語.com",
+ "pathname": "/",
+ },
+ "http://example.Bücher.com/": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "example.Bücher.com",
+ "pathname": "/",
+ },
+ "http://www.Äffchen.com/": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "www.Äffchen.com",
+ "pathname": "/",
+ },
+ "http://www.Äffchen.cOm;A/b/c?d=e#f g<h>i": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "www.Äffchen.cOm",
+ "pathname": ";A/b/c",
+ "search": "?d=e",
+ "hash": "#f g<h>i",
+ },
+ "http://SÉLIER.COM/": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "SÉLIER.COM",
+ "pathname": "/",
+ },
+ "http://ليهمابتكلموشعربي؟.ي؟/": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "ليهمابتكلموشعربي؟.ي؟",
+ "pathname": "/",
+ },
+ "http://➡.ws/➡": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "➡.ws",
+ "pathname": "/➡",
+ },
+ "http://bucket_name.s3.amazonaws.com/image.jpg": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "bucket_name.s3.amazonaws.com",
+ "pathname": "/image.jpg",
+ },
+ "git+http://github.com/joyent/node.git": {
+ "protocol": "git+http:",
+ "slashes": True,
+ "hostname": "github.com",
+ "pathname": "/joyent/node.git",
+ },
+ # if local1@domain1 is uses as a relative URL it may
+ # be parse into auth@hostname, but here there is no
+ # way to make it work in url.parse, I add the test to be explicit
+ "local1@domain1": {"pathname": "local1@domain1"},
+ # While this may seem counter-intuitive, a browser will parse
+ # <a href='www.google.com'> as a path.
+ "www.example.com": {"pathname": "www.example.com"},
+ # ipv6 support
+ "[fe80::1]": {"pathname": "[fe80::1]"},
+ "coap://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]": {
+ "protocol": "coap:",
+ "slashes": True,
+ "hostname": "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210",
+ },
+ "coap://[1080:0:0:0:8:800:200C:417A]:61616/": {
+ "protocol": "coap:",
+ "slashes": True,
+ "port": "61616",
+ "hostname": "1080:0:0:0:8:800:200C:417A",
+ "pathname": "/",
+ },
+ "http://user:password@[3ffe:2a00:100:7031::1]:8080": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "user:password",
+ "port": "8080",
+ "hostname": "3ffe:2a00:100:7031::1",
+ "pathname": "",
+ },
+ "coap://u:p@[::192.9.5.5]:61616/.well-known/r?n=Temperature": {
+ "protocol": "coap:",
+ "slashes": True,
+ "auth": "u:p",
+ "port": "61616",
+ "hostname": "::192.9.5.5",
+ "search": "?n=Temperature",
+ "pathname": "/.well-known/r",
+ },
+ # empty port
+ "http://example.com:": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "example.com",
+ "pathname": ":",
+ },
+ "http://example.com:/a/b.html": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "example.com",
+ "pathname": ":/a/b.html",
+ },
+ "http://example.com:?a=b": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "example.com",
+ "search": "?a=b",
+ "pathname": ":",
+ },
+ "http://example.com:#abc": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "example.com",
+ "hash": "#abc",
+ "pathname": ":",
+ },
+ "http://[fe80::1]:/a/b?a=b#abc": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "fe80::1",
+ "search": "?a=b",
+ "hash": "#abc",
+ "pathname": ":/a/b",
+ },
+ "http://-lovemonsterz.tumblr.com/rss": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "-lovemonsterz.tumblr.com",
+ "pathname": "/rss",
+ },
+ "http://-lovemonsterz.tumblr.com:80/rss": {
+ "protocol": "http:",
+ "slashes": True,
+ "port": "80",
+ "hostname": "-lovemonsterz.tumblr.com",
+ "pathname": "/rss",
+ },
+ "http://user:pass@-lovemonsterz.tumblr.com/rss": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "user:pass",
+ "hostname": "-lovemonsterz.tumblr.com",
+ "pathname": "/rss",
+ },
+ "http://user:pass@-lovemonsterz.tumblr.com:80/rss": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "user:pass",
+ "port": "80",
+ "hostname": "-lovemonsterz.tumblr.com",
+ "pathname": "/rss",
+ },
+ "http://_jabber._tcp.google.com/test": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "_jabber._tcp.google.com",
+ "pathname": "/test",
+ },
+ "http://user:pass@_jabber._tcp.google.com/test": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "user:pass",
+ "hostname": "_jabber._tcp.google.com",
+ "pathname": "/test",
+ },
+ "http://_jabber._tcp.google.com:80/test": {
+ "protocol": "http:",
+ "slashes": True,
+ "port": "80",
+ "hostname": "_jabber._tcp.google.com",
+ "pathname": "/test",
+ },
+ "http://user:pass@_jabber._tcp.google.com:80/test": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "user:pass",
+ "port": "80",
+ "hostname": "_jabber._tcp.google.com",
+ "pathname": "/test",
+ },
+ "http://x:1/' <>\"`/{}|\\^~`/": {
+ "protocol": "http:",
+ "slashes": True,
+ "port": "1",
+ "hostname": "x",
+ "pathname": "/' <>\"`/{}|\\^~`/",
+ },
+ "http://a@b@c/": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "a@b",
+ "hostname": "c",
+ "pathname": "/",
+ },
+ "http://a@b?@c": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "a",
+ "hostname": "b",
+ "pathname": "",
+ "search": "?@c",
+ },
+ "http://a\r\" \t\n<'b:b@c\r\nd/e?f": {
+ "protocol": "http:",
+ "slashes": True,
+ "auth": "a\r\" \t\n<'b:b",
+ "hostname": "c",
+ "search": "?f",
+ "pathname": "\r\nd/e",
+ },
+ # git urls used by npm
+ "git+ssh://git@github.com:npm/npm": {
+ "protocol": "git+ssh:",
+ "slashes": True,
+ "auth": "git",
+ "hostname": "github.com",
+ "pathname": ":npm/npm",
+ },
+ "http://example.com?foo=bar#frag": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "example.com",
+ "hash": "#frag",
+ "search": "?foo=bar",
+ "pathname": "",
+ },
+ "http://example.com?foo=@bar#frag": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "example.com",
+ "hash": "#frag",
+ "search": "?foo=@bar",
+ "pathname": "",
+ },
+ "http://example.com?foo=/bar/#frag": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "example.com",
+ "hash": "#frag",
+ "search": "?foo=/bar/",
+ "pathname": "",
+ },
+ "http://example.com?foo=?bar/#frag": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "example.com",
+ "hash": "#frag",
+ "search": "?foo=?bar/",
+ "pathname": "",
+ },
+ "http://example.com#frag=?bar/#frag": {
+ "protocol": "http:",
+ "slashes": True,
+ "hostname": "example.com",
+ "hash": "#frag=?bar/#frag",
+ "pathname": "",
+ },
+ 'http://google.com" onload="alert(42)/': {
+ "hostname": "google.com",
+ "protocol": "http:",
+ "slashes": True,
+ "pathname": '" onload="alert(42)/',
+ },
+ "http://a.com/a/b/c?s#h": {
+ "protocol": "http:",
+ "slashes": True,
+ "pathname": "/a/b/c",
+ "hostname": "a.com",
+ "hash": "#h",
+ "search": "?s",
+ },
+ "http://atpass:foo%40bar@127.0.0.1/": {
+ "auth": "atpass:foo%40bar",
+ "slashes": True,
+ "hostname": "127.0.0.1",
+ "protocol": "http:",
+ "pathname": "/",
+ },
+ "http://atslash%2F%40:%2F%40@foo/": {
+ "auth": "atslash%2F%40:%2F%40",
+ "hostname": "foo",
+ "protocol": "http:",
+ "pathname": "/",
+ "slashes": True,
+ },
+ # ipv6 support
+ "coap:u:p@[::1]:61616/.well-known/r?n=Temperature": {
+ "protocol": "coap:",
+ "auth": "u:p",
+ "hostname": "::1",
+ "port": "61616",
+ "pathname": "/.well-known/r",
+ "search": "?n=Temperature",
+ },
+ "coap:[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:61616/s/stopButton": {
+ "hostname": "fedc:ba98:7654:3210:fedc:ba98:7654:3210",
+ "port": "61616",
+ "protocol": "coap:",
+ "pathname": "/s/stopButton",
+ },
+ # encode context-specific delimiters in path and query, but do not touch
+ # other non-delimiter chars like `%`.
+ # <https://github.com/joyent/node/issues/4082>
+ # `?` and `#` in path and search
+ "http://ex.com/foo%3F100%m%23r?abc=the%231?&foo=bar#frag": {
+ "protocol": "http:",
+ "hostname": "ex.com",
+ "hash": "#frag",
+ "search": "?abc=the%231?&foo=bar",
+ "pathname": "/foo%3F100%m%23r",
+ "slashes": True,
+ },
+ # `?` and `#` in search only
+ "http://ex.com/fooA100%mBr?abc=the%231?&foo=bar#frag": {
+ "protocol": "http:",
+ "hostname": "ex.com",
+ "hash": "#frag",
+ "search": "?abc=the%231?&foo=bar",
+ "pathname": "/fooA100%mBr",
+ "slashes": True,
+ },
+ #
+ "http://": {
+ "protocol": "http:",
+ "hostname": "",
+ "slashes": True,
+ },
+}
diff --git a/tests/requirements.txt b/tests/requirements.txt
new file mode 100644
index 0000000..6f05550
--- /dev/null
+++ b/tests/requirements.txt
@@ -0,0 +1,3 @@
+pytest
+pytest-randomly
+pytest-cov
diff --git a/tests/test_decode.py b/tests/test_decode.py
new file mode 100644
index 0000000..bc58ce0
--- /dev/null
+++ b/tests/test_decode.py
@@ -0,0 +1,5 @@
+from mdurl import decode
+
+
+def test_decode_multi_byte():
+ assert decode("https://host.invalid/%F0%9F%91%A9") == "https://host.invalid/👩"
diff --git a/tests/test_encode.py b/tests/test_encode.py
new file mode 100644
index 0000000..7414bac
--- /dev/null
+++ b/tests/test_encode.py
@@ -0,0 +1,50 @@
+import pytest
+
+from mdurl import encode
+
+
+@pytest.mark.parametrize(
+ "input_,expected",
+ [
+ pytest.param("%%%", "%25%25%25", id="should encode percent"),
+ pytest.param("\r\n", "%0D%0A", id="should encode control chars"),
+ pytest.param("?#", "?#", id="should not encode parts of an url"),
+ pytest.param("[]^", "%5B%5D%5E", id="should not encode []^ - commonmark tests"),
+ pytest.param("my url", "my%20url", id="should encode spaces"),
+ pytest.param("φου", "%CF%86%CE%BF%CF%85", id="should encode unicode"),
+ pytest.param(
+ "%FG", "%25FG", id="should encode % if it doesn't start a valid escape seq"
+ ),
+ pytest.param(
+ "%00%FF", "%00%FF", id="should preserve non-utf8 encoded characters"
+ ),
+ pytest.param(
+ "\x00\x7F\x80",
+ "%00%7F%C2%80",
+ id="should encode characters on the cache borders",
+ ), # protects against off-by-one in cache implementation
+ ],
+)
+def test_encode(input_, expected):
+ assert encode(input_) == expected
+
+
+def test_encode_arguments():
+ assert encode("!@#$", exclude="@$") == "%21@%23$"
+ assert encode("%20%2G", keep_escaped=True) == "%20%252G"
+ assert encode("%20%2G", keep_escaped=False) == "%2520%252G"
+ assert encode("!@%25", exclude="@", keep_escaped=False) == "%21@%2525"
+
+
+def test_encode_surrogates():
+ # bad surrogates (high)
+ assert encode("\uD800foo") == "%EF%BF%BDfoo"
+ assert encode("foo\uD800") == "foo%EF%BF%BD"
+
+ # bad surrogates (low)
+ assert encode("\uDD00foo") == "%EF%BF%BDfoo"
+ assert encode("foo\uDD00") == "foo%EF%BF%BD"
+
+ # valid one
+ # (the codepoint is "D800 DD00" in UTF-16BE)
+ assert encode("𐄀") == "%F0%90%84%80"
diff --git a/tests/test_format.py b/tests/test_format.py
new file mode 100644
index 0000000..0cf1219
--- /dev/null
+++ b/tests/test_format.py
@@ -0,0 +1,10 @@
+import pytest
+
+from mdurl import format, parse
+from tests.fixtures.url import PARSED as FIXTURES
+
+
+@pytest.mark.parametrize("url", FIXTURES.keys())
+def test_format(url):
+ parsed = parse(url)
+ assert format(parsed) == url
diff --git a/tests/test_parse.py b/tests/test_parse.py
new file mode 100644
index 0000000..aa4ae44
--- /dev/null
+++ b/tests/test_parse.py
@@ -0,0 +1,26 @@
+import pytest
+
+from mdurl import parse
+from tests.fixtures.url import PARSED as FIXTURES
+
+
+def is_url_and_dict_equal(url, url_dict):
+ return (
+ url.protocol == url_dict.get("protocol")
+ and url.slashes == url_dict.get("slashes", False)
+ and url.auth == url_dict.get("auth")
+ and url.port == url_dict.get("port")
+ and url.hostname == url_dict.get("hostname")
+ and url.hash == url_dict.get("hash")
+ and url.search == url_dict.get("search")
+ and url.pathname == url_dict.get("pathname")
+ )
+
+
+@pytest.mark.parametrize(
+ "url,expected_dict",
+ FIXTURES.items(),
+)
+def test_parse(url, expected_dict):
+ parsed = parse(url)
+ assert is_url_and_dict_equal(parsed, expected_dict)