Adding upstream version 0.1.2.upstream/0.1.2 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-29 04:25:33 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-29 04:25:33 +0000
commit: 3c33e01482cb0481e2472ee49fa55b0d7f818c26 (patch)
tree: e1bc734976912ad573bb83e8c338bc3285afe50e
parent: Initial commit. (diff)
download: mdurl-3c33e01482cb0481e2472ee49fa55b0d7f818c26.tar.xz
mdurl-3c33e01482cb0481e2472ee49fa55b0d7f818c26.zip
24 files changed, 1828 insertions, 0 deletions
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
new file mode 100644
index 0000000..058dbaa
--- /dev/null
+++ b/.bumpversion.cfg
@@ -0,0 +1,13 @@
+[bumpversion]
+commit = True
+tag = True
+tag_name = {new_version}
+current_version = 0.1.2
+
+[bumpversion:file:pyproject.toml]
+search = version = "{current_version}"  # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT
+replace = version = "{new_version}"  # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT
+
+[bumpversion:file:src/mdurl/__init__.py]
+search = __version__ = "{current_version}"
+replace = __version__ = "{new_version}"
diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..e0e9690
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,5 @@
+[flake8]
+max-line-length = 88
+# These checks violate PEP8 so let's ignore them
+extend-ignore = E203
+extend-exclude = */site-packages/*
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
new file mode 100644
index 0000000..d88b88e
--- /dev/null
+++ b/.github/workflows/tests.yaml
@@ -0,0 +1,90 @@
+name: Tests
+
+on:
+  push:
+    branches: [ master ]
+    tags: [ '[0-9]+.[0-9]+.[0-9]+*' ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+
+  linters:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v2
+      with:
+        python-version: '3.8'
+
+    - name: Install pre-commit
+      run: |
+        pip install pre-commit
+
+    - name: run linters
+      # pre-commit also runs in pre-commit.ci, but let's have it here too
+      # to block `pypi-publish` job from triggering if pre-commit fails
+      run: |
+        pre-commit run -a
+
+  tests:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: ['pypy-3.7', '3.7', '3.8', '3.9', '3.10', '3.11-dev']
+        os: [ubuntu-latest, macos-latest, windows-latest]
+    continue-on-error: ${{ matrix.python-version == '3.11-dev' }}
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install test deps
+      run: |
+        pip install . -r tests/requirements.txt
+
+    - name: Test with pytest
+      run: |
+        # TODO: bump coverage % up to 100
+        pytest --cov --cov-fail-under=75
+
+    - name: Report coverage
+      if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10'
+      uses: codecov/codecov-action@v1
+
+  allgood:
+    runs-on: ubuntu-latest
+    needs:
+    - tests
+    - linters
+    steps:
+    - run: echo "Great success!"
+
+  pypi-publish:
+    # Only publish if all other jobs succeed
+    needs: [ allgood ]
+    if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v2
+      with:
+        python-version: '3.7'
+    - name: Install build and publish tools
+      run: |
+        pip install build twine
+    - name: Build and check
+      run: |
+        rm -rf dist/ && python -m build
+        twine check --strict dist/*
+    - name: Publish
+      run: |
+        twine upload dist/*
+      env:
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..18bffe1
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,135 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# IntelliJ
+.idea/
+
+# VS Code
+.vscode/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..db92e4f
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,52 @@
+repos:
+- repo: https://github.com/executablebooks/mdformat
+  rev: b8c05ae822d53326e967da45367d0408afc56a81  # frozen: 0.7.14
+  hooks:
+  - id: mdformat
+    additional_dependencies:
+    - mdformat-gfm
+- repo: https://github.com/asottile/yesqa
+  rev: 265e9ff7c83add4949f81bb5fe14f4a743ffb04c  # frozen: v1.4.0
+  hooks:
+  - id: yesqa
+    additional_dependencies:
+    - flake8-bugbear
+    - flake8-builtins
+    - flake8-comprehensions
+- repo: https://github.com/PyCQA/isort
+  rev: c5e8fa75dda5f764d20f66a215d71c21cfa198e1  # frozen: 5.10.1
+  hooks:
+  - id: isort
+- repo: https://github.com/psf/black
+  rev: f6c139c5215ce04fd3e73a900f1372942d58eca0  # frozen: 22.6.0
+  hooks:
+  - id: black
+- repo: https://github.com/myint/docformatter
+  rev: 67919ee01837761f2d954d7fbb08c12cdd38ec5a  # frozen: v1.4
+  hooks:
+  - id: docformatter
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: 3298ddab3c13dd77d6ce1fc0baf97691430d84b0  # frozen: v4.3.0
+  hooks:
+  - id: check-yaml
+- repo: https://github.com/pre-commit/pygrep-hooks
+  rev: 6f51a66bba59954917140ec2eeeaa4d5e630e6ce  # frozen: v1.9.0
+  hooks:
+  - id: python-use-type-annotations
+  - id: python-check-blanket-noqa
+  - id: python-check-blanket-type-ignore
+- repo: https://github.com/PyCQA/flake8
+  rev: f8e1b317742036ff11ff86356fd2b68147e169f7  # frozen: 5.0.4
+  hooks:
+  - id: flake8
+    additional_dependencies:
+    - flake8-bugbear
+    - flake8-builtins
+    - flake8-comprehensions
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: fde4bb992b03943ecb94207a52739ba07957bd06  # frozen: v0.971
+  hooks:
+  - id: mypy
+    args: ["--scripts-are-modules"]
+    additional_dependencies:
+    - pytest
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..2a920c5
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,46 @@
+Copyright (c) 2015 Vitaly Puzrin, Alex Kocharin.
+Copyright (c) 2021 Taneli Hukkinen
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+.parse() is based on Joyent's node.js `url` code:
+
+Copyright Joyent, Inc. and other Node contributors. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..55b1d2e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,8 @@
+# mdurl
+
+[![Build Status](https://github.com/executablebooks/mdurl/workflows/Tests/badge.svg?branch=master)](https://github.com/executablebooks/mdurl/actions?query=workflow%3ATests+branch%3Amaster+event%3Apush)
+[![codecov.io](https://codecov.io/gh/executablebooks/mdurl/branch/master/graph/badge.svg)](https://codecov.io/gh/executablebooks/mdurl)
+[![PyPI version](https://img.shields.io/pypi/v/mdurl)](https://pypi.org/project/mdurl)
+
+This is a Python port of the JavaScript [mdurl](https://www.npmjs.com/package/mdurl) package.
+See the [upstream README.md file](https://github.com/markdown-it/mdurl/blob/master/README.md) for API documentation.
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..916a588
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,99 @@
+[build-system]
+requires = ["flit_core>=3.2.0,<4"]
+build-backend = "flit_core.buildapi"
+
+[project]
+name = "mdurl"
+version = "0.1.2"  # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT
+description = "Markdown URL utilities"
+authors = [
+    { name = "Taneli Hukkinen", email = "hukkin@users.noreply.github.com" },
+]
+license = { file = "LICENSE" }
+requires-python = ">=3.7"
+readme = "README.md"
+classifiers = [
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: MacOS",
+    "Operating System :: Microsoft :: Windows",
+    "Operating System :: POSIX :: Linux",
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: Implementation :: CPython",
+    "Programming Language :: Python :: Implementation :: PyPy",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Typing :: Typed",
+]
+keywords = ["markdown", "commonmark"]
+
+[project.urls]
+"Homepage" = "https://github.com/executablebooks/mdurl"
+
+
+[tool.isort]
+# Force imports to be sorted by module, independent of import type
+force_sort_within_sections = true
+# Group first party and local folder imports together
+no_lines_before = ["LOCALFOLDER"]
+
+# Configure isort to work without access to site-packages
+known_first_party = ["mdurl", "tests"]
+
+# Settings for Black compatibility
+profile = "black"
+
+
+[tool.pytest.ini_options]
+addopts = "--strict-markers --strict-config"
+xfail_strict = true
+
+
+[tool.tox]
+legacy_tox_ini = '''
+[tox]
+# Only run pytest envs when no args given to tox
+envlist = py{37,38,39,310}
+isolated_build = True
+
+[testenv:py{37,38,39,310}]
+description = run tests
+deps = -r tests/requirements.txt
+commands =
+    pytest {posargs}
+'''
+
+
+[tool.coverage.run]
+source = ["mdurl"]
+
+[tool.coverage.report]
+# Regexes for lines to exclude from consideration
+exclude_lines = [
+    # Have to re-enable the standard pragma
+    "pragma: no cover",
+    # Code for static type checkers
+    "if TYPE_CHECKING:",
+    # Scripts
+    'if __name__ == .__main__.:',
+]
+
+
+[tool.mypy]
+show_error_codes = true
+warn_unreachable = true
+warn_unused_ignores = true
+warn_redundant_casts = true
+warn_unused_configs = true
+# Disabling incremental mode is required for `warn_unused_configs = true` to work
+incremental = false
+disallow_untyped_defs = true
+check_untyped_defs = true
+strict_equality = true
+implicit_reexport = false
+no_implicit_optional = true
+overrides = [
+    { module = "tests.*", disallow_untyped_defs = false },
+]
diff --git a/src/mdurl/__init__.py b/src/mdurl/__init__.py
new file mode 100644
index 0000000..cdbb640
--- /dev/null
+++ b/src/mdurl/__init__.py
@@ -0,0 +1,18 @@
+__all__ = (
+    "decode",
+    "DECODE_DEFAULT_CHARS",
+    "DECODE_COMPONENT_CHARS",
+    "encode",
+    "ENCODE_DEFAULT_CHARS",
+    "ENCODE_COMPONENT_CHARS",
+    "format",
+    "parse",
+    "URL",
+)
+__version__ = "0.1.2"  # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT
+
+from mdurl._decode import DECODE_COMPONENT_CHARS, DECODE_DEFAULT_CHARS, decode
+from mdurl._encode import ENCODE_COMPONENT_CHARS, ENCODE_DEFAULT_CHARS, encode
+from mdurl._format import format
+from mdurl._parse import url_parse as parse
+from mdurl._url import URL
diff --git a/src/mdurl/_decode.py b/src/mdurl/_decode.py
new file mode 100644
index 0000000..9b50a2d
--- /dev/null
+++ b/src/mdurl/_decode.py
@@ -0,0 +1,104 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+import functools
+import re
+
+DECODE_DEFAULT_CHARS = ";/?:@&=+$,#"
+DECODE_COMPONENT_CHARS = ""
+
+decode_cache: dict[str, list[str]] = {}
+
+
+def get_decode_cache(exclude: str) -> Sequence[str]:
+    if exclude in decode_cache:
+        return decode_cache[exclude]
+
+    cache: list[str] = []
+    decode_cache[exclude] = cache
+
+    for i in range(128):
+        ch = chr(i)
+        cache.append(ch)
+
+    for i in range(len(exclude)):
+        ch_code = ord(exclude[i])
+        cache[ch_code] = "%" + ("0" + hex(ch_code)[2:].upper())[-2:]
+
+    return cache
+
+
+# Decode percent-encoded string.
+#
+def decode(string: str, exclude: str = DECODE_DEFAULT_CHARS) -> str:
+    cache = get_decode_cache(exclude)
+    repl_func = functools.partial(repl_func_with_cache, cache=cache)
+    return re.sub(r"(%[a-f0-9]{2})+", repl_func, string, flags=re.IGNORECASE)
+
+
+def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str:
+    seq = match.group()
+    result = ""
+
+    i = 0
+    l = len(seq)  # noqa: E741
+    while i < l:
+        b1 = int(seq[i + 1 : i + 3], 16)
+
+        if b1 < 0x80:
+            result += cache[b1]
+            i += 3  # emulate JS for loop statement3
+            continue
+
+        if (b1 & 0xE0) == 0xC0 and (i + 3 < l):
+            # 110xxxxx 10xxxxxx
+            b2 = int(seq[i + 4 : i + 6], 16)
+
+            if (b2 & 0xC0) == 0x80:
+                all_bytes = bytes((b1, b2))
+                try:
+                    result += all_bytes.decode()
+                except UnicodeDecodeError:
+                    result += "\ufffd" * 2
+
+                i += 3
+                i += 3  # emulate JS for loop statement3
+                continue
+
+        if (b1 & 0xF0) == 0xE0 and (i + 6 < l):
+            # 1110xxxx 10xxxxxx 10xxxxxx
+            b2 = int(seq[i + 4 : i + 6], 16)
+            b3 = int(seq[i + 7 : i + 9], 16)
+
+            if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80:
+                all_bytes = bytes((b1, b2, b3))
+                try:
+                    result += all_bytes.decode()
+                except UnicodeDecodeError:
+                    result += "\ufffd" * 3
+
+                i += 6
+                i += 3  # emulate JS for loop statement3
+                continue
+
+        if (b1 & 0xF8) == 0xF0 and (i + 9 < l):
+            # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
+            b2 = int(seq[i + 4 : i + 6], 16)
+            b3 = int(seq[i + 7 : i + 9], 16)
+            b4 = int(seq[i + 10 : i + 12], 16)
+
+            if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80 and (b4 & 0xC0) == 0x80:
+                all_bytes = bytes((b1, b2, b3, b4))
+                try:
+                    result += all_bytes.decode()
+                except UnicodeDecodeError:
+                    result += "\ufffd" * 4
+
+                i += 9
+                i += 3  # emulate JS for loop statement3
+                continue
+
+        result += "\ufffd"
+        i += 3  # emulate JS for loop statement3
+
+    return result
diff --git a/src/mdurl/_encode.py b/src/mdurl/_encode.py
new file mode 100644
index 0000000..bc2e5b9
--- /dev/null
+++ b/src/mdurl/_encode.py
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from string import ascii_letters, digits, hexdigits
+from urllib.parse import quote as encode_uri_component
+
+ASCII_LETTERS_AND_DIGITS = ascii_letters + digits
+
+ENCODE_DEFAULT_CHARS = ";/?:@&=+$,-_.!~*'()#"
+ENCODE_COMPONENT_CHARS = "-_.!~*'()"
+
+encode_cache: dict[str, list[str]] = {}
+
+
+# Create a lookup array where anything but characters in `chars` string
+# and alphanumeric chars is percent-encoded.
+def get_encode_cache(exclude: str) -> Sequence[str]:
+    if exclude in encode_cache:
+        return encode_cache[exclude]
+
+    cache: list[str] = []
+    encode_cache[exclude] = cache
+
+    for i in range(128):
+        ch = chr(i)
+
+        if ch in ASCII_LETTERS_AND_DIGITS:
+            # always allow unencoded alphanumeric characters
+            cache.append(ch)
+        else:
+            cache.append("%" + ("0" + hex(i)[2:].upper())[-2:])
+
+    for i in range(len(exclude)):
+        cache[ord(exclude[i])] = exclude[i]
+
+    return cache
+
+
+# Encode unsafe characters with percent-encoding, skipping already
+# encoded sequences.
+#
+#  - string       - string to encode
+#  - exclude      - list of characters to ignore (in addition to a-zA-Z0-9)
+#  - keepEscaped  - don't encode '%' in a correct escape sequence (default: true)
+def encode(
+    string: str, exclude: str = ENCODE_DEFAULT_CHARS, *, keep_escaped: bool = True
+) -> str:
+    result = ""
+
+    cache = get_encode_cache(exclude)
+
+    l = len(string)  # noqa: E741
+    i = 0
+    while i < l:
+        code = ord(string[i])
+
+        #                              %
+        if keep_escaped and code == 0x25 and i + 2 < l:
+            if all(c in hexdigits for c in string[i + 1 : i + 3]):
+                result += string[i : i + 3]
+                i += 2
+                i += 1  # JS for loop statement3
+                continue
+
+        if code < 128:
+            result += cache[code]
+            i += 1  # JS for loop statement3
+            continue
+
+        if code >= 0xD800 and code <= 0xDFFF:
+            if code >= 0xD800 and code <= 0xDBFF and i + 1 < l:
+                next_code = ord(string[i + 1])
+                if next_code >= 0xDC00 and next_code <= 0xDFFF:
+                    result += encode_uri_component(string[i] + string[i + 1])
+                    i += 1
+                    i += 1  # JS for loop statement3
+                    continue
+            result += "%EF%BF%BD"
+            i += 1  # JS for loop statement3
+            continue
+
+        result += encode_uri_component(string[i])
+        i += 1  # JS for loop statement3
+
+    return result
diff --git a/src/mdurl/_format.py b/src/mdurl/_format.py
new file mode 100644
index 0000000..12524ca
--- /dev/null
+++ b/src/mdurl/_format.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from mdurl._url import URL
+
+
+def format(url: URL) -> str:  # noqa: A001
+    result = ""
+
+    result += url.protocol or ""
+    result += "//" if url.slashes else ""
+    result += url.auth + "@" if url.auth else ""
+
+    if url.hostname and ":" in url.hostname:
+        # ipv6 address
+        result += "[" + url.hostname + "]"
+    else:
+        result += url.hostname or ""
+
+    result += ":" + url.port if url.port else ""
+    result += url.pathname or ""
+    result += url.search or ""
+    result += url.hash or ""
+
+    return result
diff --git a/src/mdurl/_parse.py b/src/mdurl/_parse.py
new file mode 100644
index 0000000..ffeeac7
--- /dev/null
+++ b/src/mdurl/_parse.py
@@ -0,0 +1,304 @@
+# Copyright Joyent, Inc. and other Node contributors.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the
+# following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+# USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+# Changes from joyent/node:
+#
+# 1. No leading slash in paths,
+#    e.g. in `url.parse('http://foo?bar')` pathname is ``, not `/`
+#
+# 2. Backslashes are not replaced with slashes,
+#    so `http:\\example.org\` is treated like a relative path
+#
+# 3. Trailing colon is treated like a part of the path,
+#    i.e. in `http://example.org:foo` pathname is `:foo`
+#
+# 4. Nothing is URL-encoded in the resulting object,
+#    (in joyent/node some chars in auth and paths are encoded)
+#
+# 5. `url.parse()` does not have `parseQueryString` argument
+#
+# 6. Removed extraneous result properties: `host`, `path`, `query`, etc.,
+#    which can be constructed using other parts of the url.
+
+from __future__ import annotations
+
+from collections import defaultdict
+import re
+
+from mdurl._url import URL
+
+# Reference: RFC 3986, RFC 1808, RFC 2396
+
+# define these here so at least they only have to be
+# compiled once on the first module load.
+PROTOCOL_PATTERN = re.compile(r"^([a-z0-9.+-]+:)", flags=re.IGNORECASE)
+PORT_PATTERN = re.compile(r":[0-9]*$")
+
+# Special case for a simple path URL
+SIMPLE_PATH_PATTERN = re.compile(r"^(//?(?!/)[^?\s]*)(\?[^\s]*)?$")
+
+# RFC 2396: characters reserved for delimiting URLs.
+# We actually just auto-escape these.
+DELIMS = ("<", ">", '"', "`", " ", "\r", "\n", "\t")
+
+# RFC 2396: characters not allowed for various reasons.
+UNWISE = ("{", "}", "|", "\\", "^", "`") + DELIMS
+
+# Allowed by RFCs, but cause of XSS attacks.  Always escape these.
+AUTO_ESCAPE = ("'",) + UNWISE
+# Characters that are never ever allowed in a hostname.
+# Note that any invalid chars are also handled, but these
+# are the ones that are *expected* to be seen, so we fast-path
+# them.
+NON_HOST_CHARS = ("%", "/", "?", ";", "#") + AUTO_ESCAPE
+HOST_ENDING_CHARS = ("/", "?", "#")
+HOSTNAME_MAX_LEN = 255
+HOSTNAME_PART_PATTERN = re.compile(r"^[+a-z0-9A-Z_-]{0,63}$")
+HOSTNAME_PART_START = re.compile(r"^([+a-z0-9A-Z_-]{0,63})(.*)$")
+# protocols that can allow "unsafe" and "unwise" chars.
+
+# protocols that never have a hostname.
+HOSTLESS_PROTOCOL = defaultdict(
+    bool,
+    {
+        "javascript": True,
+        "javascript:": True,
+    },
+)
+# protocols that always contain a // bit.
+SLASHED_PROTOCOL = defaultdict(
+    bool,
+    {
+        "http": True,
+        "https": True,
+        "ftp": True,
+        "gopher": True,
+        "file": True,
+        "http:": True,
+        "https:": True,
+        "ftp:": True,
+        "gopher:": True,
+        "file:": True,
+    },
+)
+
+
+class MutableURL:
+    def __init__(self) -> None:
+        self.protocol: str | None = None
+        self.slashes: bool = False
+        self.auth: str | None = None
+        self.port: str | None = None
+        self.hostname: str | None = None
+        self.hash: str | None = None
+        self.search: str | None = None
+        self.pathname: str | None = None
+
+    def parse(self, url: str, slashes_denote_host: bool) -> "MutableURL":
+        lower_proto = ""
+        slashes = False
+        rest = url
+
+        # trim before proceeding.
+        # This is to support parse stuff like "  http://foo.com  \n"
+        rest = rest.strip()
+
+        if not slashes_denote_host and len(url.split("#")) == 1:
+            # Try fast path regexp
+            simple_path = SIMPLE_PATH_PATTERN.match(rest)
+            if simple_path:
+                self.pathname = simple_path.group(1)
+                if simple_path.group(2):
+                    self.search = simple_path.group(2)
+                return self
+
+        proto = ""
+        proto_match = PROTOCOL_PATTERN.match(rest)
+        if proto_match:
+            proto = proto_match.group()
+            lower_proto = proto.lower()
+            self.protocol = proto
+            rest = rest[len(proto) :]
+
+        # figure out if it's got a host
+        # user@server is *always* interpreted as a hostname, and url
+        # resolution will treat //foo/bar as host=foo,path=bar because that's
+        # how the browser resolves relative URLs.
+        if slashes_denote_host or proto or re.search(r"^//[^@/]+@[^@/]+", rest):
+            slashes = rest.startswith("//")
+            if slashes and not (proto and HOSTLESS_PROTOCOL[proto]):
+                rest = rest[2:]
+                self.slashes = True
+
+        if not HOSTLESS_PROTOCOL[proto] and (
+            slashes or (proto and not SLASHED_PROTOCOL[proto])
+        ):
+
+            # there's a hostname.
+            # the first instance of /, ?, ;, or # ends the host.
+            #
+            # If there is an @ in the hostname, then non-host chars *are* allowed
+            # to the left of the last @ sign, unless some host-ending character
+            # comes *before* the @-sign.
+            # URLs are obnoxious.
+            #
+            # ex:
+            # http://a@b@c/ => user:a@b host:c
+            # http://a@b?@c => user:a host:c path:/?@c
+
+            # v0.12 TODO(isaacs): This is not quite how Chrome does things.
+            # Review our test case against browsers more comprehensively.
+
+            # find the first instance of any hostEndingChars
+            host_end = -1
+            for i in range(len(HOST_ENDING_CHARS)):
+                hec = rest.find(HOST_ENDING_CHARS[i])
+                if hec != -1 and (host_end == -1 or hec < host_end):
+                    host_end = hec
+
+            # at this point, either we have an explicit point where the
+            # auth portion cannot go past, or the last @ char is the decider.
+            if host_end == -1:
+                # atSign can be anywhere.
+                at_sign = rest.rfind("@")
+            else:
+                # atSign must be in auth portion.
+                # http://a@b/c@d => host:b auth:a path:/c@d
+                at_sign = rest.rfind("@", 0, host_end + 1)
+
+            # Now we have a portion which is definitely the auth.
+            # Pull that off.
+            if at_sign != -1:
+                auth = rest[:at_sign]
+                rest = rest[at_sign + 1 :]
+                self.auth = auth
+
+            # the host is the remaining to the left of the first non-host char
+            host_end = -1
+            for i in range(len(NON_HOST_CHARS)):
+                hec = rest.find(NON_HOST_CHARS[i])
+                if hec != -1 and (host_end == -1 or hec < host_end):
+                    host_end = hec
+            # if we still have not hit it, then the entire thing is a host.
+            if host_end == -1:
+                host_end = len(rest)
+
+            if host_end > 0 and rest[host_end - 1] == ":":
+                host_end -= 1
+            host = rest[:host_end]
+            rest = rest[host_end:]
+
+            # pull out port.
+            self.parse_host(host)
+
+            # we've indicated that there is a hostname,
+            # so even if it's empty, it has to be present.
+            self.hostname = self.hostname or ""
+
+            # if hostname begins with [ and ends with ]
+            # assume that it's an IPv6 address.
+            ipv6_hostname = self.hostname.startswith("[") and self.hostname.endswith(
+                "]"
+            )
+
+            # validate a little.
+            if not ipv6_hostname:
+                hostparts = self.hostname.split(".")
+                l = len(hostparts)  # noqa: E741
+                i = 0
+                while i < l:
+                    part = hostparts[i]
+                    if not part:
+                        i += 1  # emulate statement3 in JS for loop
+                        continue
+                    if not HOSTNAME_PART_PATTERN.search(part):
+                        newpart = ""
+                        k = len(part)
+                        j = 0
+                        while j < k:
+                            if ord(part[j]) > 127:
+                                # we replace non-ASCII char with a temporary placeholder
+                                # we need this to make sure size of hostname is not
+                                # broken by replacing non-ASCII by nothing
+                                newpart += "x"
+                            else:
+                                newpart += part[j]
+                            j += 1  # emulate statement3 in JS for loop
+
+                        # we test again with ASCII char only
+                        if not HOSTNAME_PART_PATTERN.search(newpart):
+                            valid_parts = hostparts[:i]
+                            not_host = hostparts[i + 1 :]
+                            bit = HOSTNAME_PART_START.search(part)
+                            if bit:
+                                valid_parts.append(bit.group(1))
+                                not_host.insert(0, bit.group(2))
+                            if not_host:
+                                rest = ".".join(not_host) + rest
+                            self.hostname = ".".join(valid_parts)
+                            break
+                    i += 1  # emulate statement3 in JS for loop
+
+            if len(self.hostname) > HOSTNAME_MAX_LEN:
+                self.hostname = ""
+
+            # strip [ and ] from the hostname
+            # the host field still retains them, though
+            if ipv6_hostname:
+                self.hostname = self.hostname[1:-1]
+
+        # chop off from the tail first.
+        hash = rest.find("#")  # noqa: A001
+        if hash != -1:
+            # got a fragment string.
+            self.hash = rest[hash:]
+            rest = rest[:hash]
+        qm = rest.find("?")
+        if qm != -1:
+            self.search = rest[qm:]
+            rest = rest[:qm]
+        if rest:
+            self.pathname = rest
+        if SLASHED_PROTOCOL[lower_proto] and self.hostname and not self.pathname:
+            self.pathname = ""
+
+        return self
+
+    def parse_host(self, host: str) -> None:
+        port_match = PORT_PATTERN.search(host)
+        if port_match:
+            port = port_match.group()
+            if port != ":":
+                self.port = port[1:]
+            host = host[: -len(port)]
+        if host:
+            self.hostname = host
+
+
+def url_parse(url: URL | str, *, slashes_denote_host: bool = False) -> URL:
+    if isinstance(url, URL):
+        return url
+    u = MutableURL()
+    u.parse(url, slashes_denote_host)
+    return URL(
+        u.protocol, u.slashes, u.auth, u.port, u.hostname, u.hash, u.search, u.pathname
+    )
diff --git a/src/mdurl/_url.py b/src/mdurl/_url.py
new file mode 100644
index 0000000..f866e7a
--- /dev/null
+++ b/src/mdurl/_url.py
@@ -0,0 +1,14 @@
+from __future__ import annotations
+
+from typing import NamedTuple
+
+
+class URL(NamedTuple):
+    protocol: str | None
+    slashes: bool
+    auth: str | None
+    port: str | None
+    hostname: str | None
+    hash: str | None  # noqa: A003
+    search: str | None
+    pathname: str | None
diff --git a/src/mdurl/py.typed b/src/mdurl/py.typed
new file mode 100644
index 0000000..7632ecf
--- /dev/null
+++ b/src/mdurl/py.typed
@@ -0,0 +1 @@
+# Marker file for PEP 561
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/__init__.py
diff --git a/tests/decode.js b/tests/decode.js
new file mode 100644
index 0000000..c9457ba
--- /dev/null
+++ b/tests/decode.js
@@ -0,0 +1,123 @@
+// TODO: port to Python
+'use strict';
+
+
+var assert = require('assert');
+var decode = require('../decode');
+
+function encodeBinary(str) {
+  var result = '';
+
+  str = str.replace(/\s+/g, '');
+  while (str.length) {
+    result = '%' + ('0' + parseInt(str.slice(-8), 2).toString(16)).slice(-2) + result;
+    str = str.slice(0, -8);
+  }
+
+  return result;
+}
+
+var samples = {
+  '00000000': true,
+  '01010101': true,
+  '01111111': true,
+
+  // invalid as 1st byte
+  '10000000': true,
+  '10111111': true,
+
+  // invalid sequences, 2nd byte should be >= 0x80
+  '11000111 01010101': false,
+  '11100011 01010101': false,
+  '11110001 01010101': false,
+
+  // invalid sequences, 2nd byte should be < 0xc0
+  '11000111 11000000': false,
+  '11100011 11000000': false,
+  '11110001 11000000': false,
+
+  // invalid 3rd byte
+  '11100011 10010101 01010101': false,
+  '11110001 10010101 01010101': false,
+
+  // invalid 4th byte
+  '11110001 10010101 10010101 01010101': false,
+
+  // valid sequences
+  '11000111 10101010': true,
+  '11100011 10101010 10101010': true,
+  '11110001 10101010 10101010 10101010': true,
+
+  // minimal chars with given length
+  '11000010 10000000': true,
+  '11100000 10100000 10000000': true,
+
+  // impossible sequences
+  '11000001 10111111': false,
+  '11100000 10011111 10111111': false,
+  '11000001 10000000': false,
+  '11100000 10010000 10000000': false,
+
+  // maximum chars with given length
+  '11011111 10111111': true,
+  '11101111 10111111 10111111': true,
+
+  '11110000 10010000 10000000 10000000': true,
+  '11110000 10010000 10001111 10001111': true,
+  '11110100 10001111 10110000 10000000': true,
+  '11110100 10001111 10111111 10111111': true,
+
+  // too low
+  '11110000 10001111 10111111 10111111': false,
+
+  // too high
+  '11110100 10010000 10000000 10000000': false,
+  '11110100 10011111 10111111 10111111': false,
+
+  // surrogate range
+  '11101101 10011111 10111111': true,
+  '11101101 10100000 10000000': false,
+  '11101101 10111111 10111111': false,
+  '11101110 10000000 10000000': true
+};
+
+describe('decode', function() {
+  it('should decode %xx', function() {
+    assert.equal(decode('x%20xx%20%2520'), 'x xx %20');
+  });
+
+  it('should not decode invalid sequences', function() {
+    assert.equal(decode('%2g%z1%%'), '%2g%z1%%');
+  });
+
+  it('should not decode reservedSet', function() {
+    assert.equal(decode('%20%25%20', '%'),  ' %25 ');
+    assert.equal(decode('%20%25%20', ' '),  '%20%%20');
+    assert.equal(decode('%20%25%20', ' %'), '%20%25%20');
+  });
+
+  describe('utf8', function() {
+    Object.keys(samples).forEach(function(k) {
+      it(k, function() {
+        var res1, res2,
+            er = null,
+            str = encodeBinary(k);
+
+        try {
+          res1 = decodeURIComponent(str);
+        } catch(e) {
+          er = e;
+        }
+
+        res2 = decode(str);
+
+        if (er) {
+          assert.notEqual(res2.indexOf('\ufffd'), -1);
+        } else {
+          assert.equal(res1, res2);
+          assert.equal(res2.indexOf('\ufffd'), -1);
+        }
+      });
+    });
+  });
+});
diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/fixtures/__init__.py
diff --git a/tests/fixtures/url.py b/tests/fixtures/url.py
new file mode 100644
index 0000000..29431ec
--- /dev/null
+++ b/tests/fixtures/url.py
@@ -0,0 +1,610 @@
+# Copyright Joyent, Inc. and other Node contributors.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the
+# following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+# USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+# URLs to parse, and expected data
+# { url : parsed }
+PARSED = {
+    "//some_path": {"pathname": "//some_path"},
+    "HTTP://www.example.com/": {
+        "protocol": "HTTP:",
+        "slashes": True,
+        "hostname": "www.example.com",
+        "pathname": "/",
+    },
+    "HTTP://www.example.com": {
+        "protocol": "HTTP:",
+        "slashes": True,
+        "hostname": "www.example.com",
+        "pathname": "",
+    },
+    "http://www.ExAmPlE.com/": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "www.ExAmPlE.com",
+        "pathname": "/",
+    },
+    "http://user:pw@www.ExAmPlE.com/": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "user:pw",
+        "hostname": "www.ExAmPlE.com",
+        "pathname": "/",
+    },
+    "http://USER:PW@www.ExAmPlE.com/": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "USER:PW",
+        "hostname": "www.ExAmPlE.com",
+        "pathname": "/",
+    },
+    "http://user@www.example.com/": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "user",
+        "hostname": "www.example.com",
+        "pathname": "/",
+    },
+    "http://user%3Apw@www.example.com/": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "user%3Apw",
+        "hostname": "www.example.com",
+        "pathname": "/",
+    },
+    "http://x.com/path?that's#all, folks": {
+        "protocol": "http:",
+        "hostname": "x.com",
+        "slashes": True,
+        "search": "?that's",
+        "pathname": "/path",
+        "hash": "#all, folks",
+    },
+    "HTTP://X.COM/Y": {
+        "protocol": "HTTP:",
+        "slashes": True,
+        "hostname": "X.COM",
+        "pathname": "/Y",
+    },
+    # + not an invalid host character
+    # per https://url.spec.whatwg.org/#host-parsing
+    "http://x.y.com+a/b/c": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "x.y.com+a",
+        "pathname": "/b/c",
+    },
+    # an unexpected invalid char in the hostname.
+    "HtTp://x.y.cOm;a/b/c?d=e#f g<h>i": {
+        "protocol": "HtTp:",
+        "slashes": True,
+        "hostname": "x.y.cOm",
+        "pathname": ";a/b/c",
+        "search": "?d=e",
+        "hash": "#f g<h>i",
+    },
+    # make sure that we don't accidentally lcast the path parts.
+    "HtTp://x.y.cOm;A/b/c?d=e#f g<h>i": {
+        "protocol": "HtTp:",
+        "slashes": True,
+        "hostname": "x.y.cOm",
+        "pathname": ";A/b/c",
+        "search": "?d=e",
+        "hash": "#f g<h>i",
+    },
+    "http://x...y...#p": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "x...y...",
+        "hash": "#p",
+        "pathname": "",
+    },
+    'http://x/p/"quoted"': {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "x",
+        "pathname": '/p/"quoted"',
+    },
+    "<http://goo.corn/bread> Is a URL!": {
+        "pathname": "<http://goo.corn/bread> Is a URL!"
+    },
+    "http://www.narwhaljs.org/blog/categories?id=news": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "www.narwhaljs.org",
+        "search": "?id=news",
+        "pathname": "/blog/categories",
+    },
+    "http://mt0.google.com/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "mt0.google.com",
+        "pathname": "/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=",
+    },
+    "http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "mt0.google.com",
+        "search": "???&hl=en&src=api&x=2&y=2&z=3&s=",
+        "pathname": "/vt/lyrs=m@114",
+    },
+    "http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "user:pass",
+        "hostname": "mt0.google.com",
+        "search": "???&hl=en&src=api&x=2&y=2&z=3&s=",
+        "pathname": "/vt/lyrs=m@114",
+    },
+    "file:///etc/passwd": {
+        "slashes": True,
+        "protocol": "file:",
+        "pathname": "/etc/passwd",
+        "hostname": "",
+    },
+    "file://localhost/etc/passwd": {
+        "protocol": "file:",
+        "slashes": True,
+        "pathname": "/etc/passwd",
+        "hostname": "localhost",
+    },
+    "file://foo/etc/passwd": {
+        "protocol": "file:",
+        "slashes": True,
+        "pathname": "/etc/passwd",
+        "hostname": "foo",
+    },
+    "file:///etc/node/": {
+        "slashes": True,
+        "protocol": "file:",
+        "pathname": "/etc/node/",
+        "hostname": "",
+    },
+    "file://localhost/etc/node/": {
+        "protocol": "file:",
+        "slashes": True,
+        "pathname": "/etc/node/",
+        "hostname": "localhost",
+    },
+    "file://foo/etc/node/": {
+        "protocol": "file:",
+        "slashes": True,
+        "pathname": "/etc/node/",
+        "hostname": "foo",
+    },
+    "http:/baz/../foo/bar": {"protocol": "http:", "pathname": "/baz/../foo/bar"},
+    "http://user:pass@example.com:8000/foo/bar?baz=quux#frag": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "user:pass",
+        "port": "8000",
+        "hostname": "example.com",
+        "hash": "#frag",
+        "search": "?baz=quux",
+        "pathname": "/foo/bar",
+    },
+    "//user:pass@example.com:8000/foo/bar?baz=quux#frag": {
+        "slashes": True,
+        "auth": "user:pass",
+        "port": "8000",
+        "hostname": "example.com",
+        "hash": "#frag",
+        "search": "?baz=quux",
+        "pathname": "/foo/bar",
+    },
+    "/foo/bar?baz=quux#frag": {
+        "hash": "#frag",
+        "search": "?baz=quux",
+        "pathname": "/foo/bar",
+    },
+    "http:/foo/bar?baz=quux#frag": {
+        "protocol": "http:",
+        "hash": "#frag",
+        "search": "?baz=quux",
+        "pathname": "/foo/bar",
+    },
+    "mailto:foo@bar.com?subject=hello": {
+        "protocol": "mailto:",
+        "auth": "foo",
+        "hostname": "bar.com",
+        "search": "?subject=hello",
+    },
+    "javascript:alert('hello');": {
+        "protocol": "javascript:",
+        "pathname": "alert('hello');",
+    },
+    "xmpp:isaacschlueter@jabber.org": {
+        "protocol": "xmpp:",
+        "auth": "isaacschlueter",
+        "hostname": "jabber.org",
+    },
+    "http://atpass:foo%40bar@127.0.0.1:8080/path?search=foo#bar": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "atpass:foo%40bar",
+        "hostname": "127.0.0.1",
+        "port": "8080",
+        "pathname": "/path",
+        "search": "?search=foo",
+        "hash": "#bar",
+    },
+    "svn+ssh://foo/bar": {
+        "hostname": "foo",
+        "protocol": "svn+ssh:",
+        "pathname": "/bar",
+        "slashes": True,
+    },
+    "dash-test://foo/bar": {
+        "hostname": "foo",
+        "protocol": "dash-test:",
+        "pathname": "/bar",
+        "slashes": True,
+    },
+    "dash-test:foo/bar": {
+        "hostname": "foo",
+        "protocol": "dash-test:",
+        "pathname": "/bar",
+    },
+    "dot.test://foo/bar": {
+        "hostname": "foo",
+        "protocol": "dot.test:",
+        "pathname": "/bar",
+        "slashes": True,
+    },
+    "dot.test:foo/bar": {
+        "hostname": "foo",
+        "protocol": "dot.test:",
+        "pathname": "/bar",
+    },
+    # IDNA tests
+    "http://www.日本語.com/": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "www.日本語.com",
+        "pathname": "/",
+    },
+    "http://example.Bücher.com/": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "example.Bücher.com",
+        "pathname": "/",
+    },
+    "http://www.Äffchen.com/": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "www.Äffchen.com",
+        "pathname": "/",
+    },
+    "http://www.Äffchen.cOm;A/b/c?d=e#f g<h>i": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "www.Äffchen.cOm",
+        "pathname": ";A/b/c",
+        "search": "?d=e",
+        "hash": "#f g<h>i",
+    },
+    "http://SÉLIER.COM/": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "SÉLIER.COM",
+        "pathname": "/",
+    },
+    "http://ليهمابتكلموشعربي؟.ي؟/": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "ليهمابتكلموشعربي؟.ي؟",
+        "pathname": "/",
+    },
+    "http://➡.ws/➡": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "➡.ws",
+        "pathname": "/➡",
+    },
+    "http://bucket_name.s3.amazonaws.com/image.jpg": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "bucket_name.s3.amazonaws.com",
+        "pathname": "/image.jpg",
+    },
+    "git+http://github.com/joyent/node.git": {
+        "protocol": "git+http:",
+        "slashes": True,
+        "hostname": "github.com",
+        "pathname": "/joyent/node.git",
+    },
+    # if local1@domain1 is uses as a relative URL it may
+    # be parse into auth@hostname, but here there is no
+    # way to make it work in url.parse, I add the test to be explicit
+    "local1@domain1": {"pathname": "local1@domain1"},
+    # While this may seem counter-intuitive, a browser will parse
+    # <a href='www.google.com'> as a path.
+    "www.example.com": {"pathname": "www.example.com"},
+    # ipv6 support
+    "[fe80::1]": {"pathname": "[fe80::1]"},
+    "coap://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]": {
+        "protocol": "coap:",
+        "slashes": True,
+        "hostname": "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210",
+    },
+    "coap://[1080:0:0:0:8:800:200C:417A]:61616/": {
+        "protocol": "coap:",
+        "slashes": True,
+        "port": "61616",
+        "hostname": "1080:0:0:0:8:800:200C:417A",
+        "pathname": "/",
+    },
+    "http://user:password@[3ffe:2a00:100:7031::1]:8080": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "user:password",
+        "port": "8080",
+        "hostname": "3ffe:2a00:100:7031::1",
+        "pathname": "",
+    },
+    "coap://u:p@[::192.9.5.5]:61616/.well-known/r?n=Temperature": {
+        "protocol": "coap:",
+        "slashes": True,
+        "auth": "u:p",
+        "port": "61616",
+        "hostname": "::192.9.5.5",
+        "search": "?n=Temperature",
+        "pathname": "/.well-known/r",
+    },
+    # empty port
+    "http://example.com:": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "example.com",
+        "pathname": ":",
+    },
+    "http://example.com:/a/b.html": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "example.com",
+        "pathname": ":/a/b.html",
+    },
+    "http://example.com:?a=b": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "example.com",
+        "search": "?a=b",
+        "pathname": ":",
+    },
+    "http://example.com:#abc": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "example.com",
+        "hash": "#abc",
+        "pathname": ":",
+    },
+    "http://[fe80::1]:/a/b?a=b#abc": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "fe80::1",
+        "search": "?a=b",
+        "hash": "#abc",
+        "pathname": ":/a/b",
+    },
+    "http://-lovemonsterz.tumblr.com/rss": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "-lovemonsterz.tumblr.com",
+        "pathname": "/rss",
+    },
+    "http://-lovemonsterz.tumblr.com:80/rss": {
+        "protocol": "http:",
+        "slashes": True,
+        "port": "80",
+        "hostname": "-lovemonsterz.tumblr.com",
+        "pathname": "/rss",
+    },
+    "http://user:pass@-lovemonsterz.tumblr.com/rss": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "user:pass",
+        "hostname": "-lovemonsterz.tumblr.com",
+        "pathname": "/rss",
+    },
+    "http://user:pass@-lovemonsterz.tumblr.com:80/rss": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "user:pass",
+        "port": "80",
+        "hostname": "-lovemonsterz.tumblr.com",
+        "pathname": "/rss",
+    },
+    "http://_jabber._tcp.google.com/test": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "_jabber._tcp.google.com",
+        "pathname": "/test",
+    },
+    "http://user:pass@_jabber._tcp.google.com/test": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "user:pass",
+        "hostname": "_jabber._tcp.google.com",
+        "pathname": "/test",
+    },
+    "http://_jabber._tcp.google.com:80/test": {
+        "protocol": "http:",
+        "slashes": True,
+        "port": "80",
+        "hostname": "_jabber._tcp.google.com",
+        "pathname": "/test",
+    },
+    "http://user:pass@_jabber._tcp.google.com:80/test": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "user:pass",
+        "port": "80",
+        "hostname": "_jabber._tcp.google.com",
+        "pathname": "/test",
+    },
+    "http://x:1/' <>\"`/{}|\\^~`/": {
+        "protocol": "http:",
+        "slashes": True,
+        "port": "1",
+        "hostname": "x",
+        "pathname": "/' <>\"`/{}|\\^~`/",
+    },
+    "http://a@b@c/": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "a@b",
+        "hostname": "c",
+        "pathname": "/",
+    },
+    "http://a@b?@c": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "a",
+        "hostname": "b",
+        "pathname": "",
+        "search": "?@c",
+    },
+    "http://a\r\" \t\n<'b:b@c\r\nd/e?f": {
+        "protocol": "http:",
+        "slashes": True,
+        "auth": "a\r\" \t\n<'b:b",
+        "hostname": "c",
+        "search": "?f",
+        "pathname": "\r\nd/e",
+    },
+    # git urls used by npm
+    "git+ssh://git@github.com:npm/npm": {
+        "protocol": "git+ssh:",
+        "slashes": True,
+        "auth": "git",
+        "hostname": "github.com",
+        "pathname": ":npm/npm",
+    },
+    "http://example.com?foo=bar#frag": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "example.com",
+        "hash": "#frag",
+        "search": "?foo=bar",
+        "pathname": "",
+    },
+    "http://example.com?foo=@bar#frag": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "example.com",
+        "hash": "#frag",
+        "search": "?foo=@bar",
+        "pathname": "",
+    },
+    "http://example.com?foo=/bar/#frag": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "example.com",
+        "hash": "#frag",
+        "search": "?foo=/bar/",
+        "pathname": "",
+    },
+    "http://example.com?foo=?bar/#frag": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "example.com",
+        "hash": "#frag",
+        "search": "?foo=?bar/",
+        "pathname": "",
+    },
+    "http://example.com#frag=?bar/#frag": {
+        "protocol": "http:",
+        "slashes": True,
+        "hostname": "example.com",
+        "hash": "#frag=?bar/#frag",
+        "pathname": "",
+    },
+    'http://google.com" onload="alert(42)/': {
+        "hostname": "google.com",
+        "protocol": "http:",
+        "slashes": True,
+        "pathname": '" onload="alert(42)/',
+    },
+    "http://a.com/a/b/c?s#h": {
+        "protocol": "http:",
+        "slashes": True,
+        "pathname": "/a/b/c",
+        "hostname": "a.com",
+        "hash": "#h",
+        "search": "?s",
+    },
+    "http://atpass:foo%40bar@127.0.0.1/": {
+        "auth": "atpass:foo%40bar",
+        "slashes": True,
+        "hostname": "127.0.0.1",
+        "protocol": "http:",
+        "pathname": "/",
+    },
+    "http://atslash%2F%40:%2F%40@foo/": {
+        "auth": "atslash%2F%40:%2F%40",
+        "hostname": "foo",
+        "protocol": "http:",
+        "pathname": "/",
+        "slashes": True,
+    },
+    # ipv6 support
+    "coap:u:p@[::1]:61616/.well-known/r?n=Temperature": {
+        "protocol": "coap:",
+        "auth": "u:p",
+        "hostname": "::1",
+        "port": "61616",
+        "pathname": "/.well-known/r",
+        "search": "?n=Temperature",
+    },
+    "coap:[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:61616/s/stopButton": {
+        "hostname": "fedc:ba98:7654:3210:fedc:ba98:7654:3210",
+        "port": "61616",
+        "protocol": "coap:",
+        "pathname": "/s/stopButton",
+    },
+    # encode context-specific delimiters in path and query, but do not touch
+    # other non-delimiter chars like `%`.
+    # <https://github.com/joyent/node/issues/4082>
+    # `?` and `#` in path and search
+    "http://ex.com/foo%3F100%m%23r?abc=the%231?&foo=bar#frag": {
+        "protocol": "http:",
+        "hostname": "ex.com",
+        "hash": "#frag",
+        "search": "?abc=the%231?&foo=bar",
+        "pathname": "/foo%3F100%m%23r",
+        "slashes": True,
+    },
+    # `?` and `#` in search only
+    "http://ex.com/fooA100%mBr?abc=the%231?&foo=bar#frag": {
+        "protocol": "http:",
+        "hostname": "ex.com",
+        "hash": "#frag",
+        "search": "?abc=the%231?&foo=bar",
+        "pathname": "/fooA100%mBr",
+        "slashes": True,
+    },
+    #
+    "http://": {
+        "protocol": "http:",
+        "hostname": "",
+        "slashes": True,
+    },
+}
diff --git a/tests/requirements.txt b/tests/requirements.txt
new file mode 100644
index 0000000..6f05550
--- /dev/null
+++ b/tests/requirements.txt
@@ -0,0 +1,3 @@
+pytest
+pytest-randomly
+pytest-cov
diff --git a/tests/test_decode.py b/tests/test_decode.py
new file mode 100644
index 0000000..bc58ce0
--- /dev/null
+++ b/tests/test_decode.py
@@ -0,0 +1,5 @@
+from mdurl import decode
+
+
+def test_decode_multi_byte():
+    assert decode("https://host.invalid/%F0%9F%91%A9") == "https://host.invalid/👩"
diff --git a/tests/test_encode.py b/tests/test_encode.py
new file mode 100644
index 0000000..7414bac
--- /dev/null
+++ b/tests/test_encode.py
@@ -0,0 +1,50 @@
+import pytest
+
+from mdurl import encode
+
+
+@pytest.mark.parametrize(
+    "input_,expected",
+    [
+        pytest.param("%%%", "%25%25%25", id="should encode percent"),
+        pytest.param("\r\n", "%0D%0A", id="should encode control chars"),
+        pytest.param("?#", "?#", id="should not encode parts of an url"),
+        pytest.param("[]^", "%5B%5D%5E", id="should not encode []^ - commonmark tests"),
+        pytest.param("my url", "my%20url", id="should encode spaces"),
+        pytest.param("φου", "%CF%86%CE%BF%CF%85", id="should encode unicode"),
+        pytest.param(
+            "%FG", "%25FG", id="should encode % if it doesn't start a valid escape seq"
+        ),
+        pytest.param(
+            "%00%FF", "%00%FF", id="should preserve non-utf8 encoded characters"
+        ),
+        pytest.param(
+            "\x00\x7F\x80",
+            "%00%7F%C2%80",
+            id="should encode characters on the cache borders",
+        ),  # protects against off-by-one in cache implementation
+    ],
+)
+def test_encode(input_, expected):
+    assert encode(input_) == expected
+
+
+def test_encode_arguments():
+    assert encode("!@#$", exclude="@$") == "%21@%23$"
+    assert encode("%20%2G", keep_escaped=True) == "%20%252G"
+    assert encode("%20%2G", keep_escaped=False) == "%2520%252G"
+    assert encode("!@%25", exclude="@", keep_escaped=False) == "%21@%2525"
+
+
+def test_encode_surrogates():
+    # bad surrogates (high)
+    assert encode("\uD800foo") == "%EF%BF%BDfoo"
+    assert encode("foo\uD800") == "foo%EF%BF%BD"
+
+    # bad surrogates (low)
+    assert encode("\uDD00foo") == "%EF%BF%BDfoo"
+    assert encode("foo\uDD00") == "foo%EF%BF%BD"
+
+    # valid one
+    # (the codepoint is "D800 DD00" in UTF-16BE)
+    assert encode("𐄀") == "%F0%90%84%80"
diff --git a/tests/test_format.py b/tests/test_format.py
new file mode 100644
index 0000000..0cf1219
--- /dev/null
+++ b/tests/test_format.py
@@ -0,0 +1,10 @@
+import pytest
+
+from mdurl import format, parse
+from tests.fixtures.url import PARSED as FIXTURES
+
+
+@pytest.mark.parametrize("url", FIXTURES.keys())
+def test_format(url):
+    parsed = parse(url)
+    assert format(parsed) == url
diff --git a/tests/test_parse.py b/tests/test_parse.py
new file mode 100644
index 0000000..aa4ae44
--- /dev/null
+++ b/tests/test_parse.py
@@ -0,0 +1,26 @@
+import pytest
+
+from mdurl import parse
+from tests.fixtures.url import PARSED as FIXTURES
+
+
+def is_url_and_dict_equal(url, url_dict):
+    return (
+        url.protocol == url_dict.get("protocol")
+        and url.slashes == url_dict.get("slashes", False)
+        and url.auth == url_dict.get("auth")
+        and url.port == url_dict.get("port")
+        and url.hostname == url_dict.get("hostname")
+        and url.hash == url_dict.get("hash")
+        and url.search == url_dict.get("search")
+        and url.pathname == url_dict.get("pathname")
+    )
+
+
+@pytest.mark.parametrize(
+    "url,expected_dict",
+    FIXTURES.items(),
+)
+def test_parse(url, expected_dict):
+    parsed = parse(url)
+    assert is_url_and_dict_equal(parsed, expected_dict)
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-29 04:25:33 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-29 04:25:33 +0000
commit	3c33e01482cb0481e2472ee49fa55b0d7f818c26 (patch)
tree	e1bc734976912ad573bb83e8c338bc3285afe50e
parent	Initial commit. (diff)
download	mdurl-3c33e01482cb0481e2472ee49fa55b0d7f818c26.tar.xz mdurl-3c33e01482cb0481e2472ee49fa55b0d7f818c26.zip