summaryrefslogtreecommitdiffstats
path: root/port_for/_download_ranges.py
blob: 5e6a8fb41ca180204db209628646640ec8a11832 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# -*- coding: utf-8 -*-
"""
This module/script is for updating port_for._ranges with recent information
from IANA and Wikipedia.
"""
import sys
import os
import re
import datetime
from urllib.request import Request, urlopen
from xml.etree import ElementTree
from typing import Set, Iterator, Iterable, Tuple

from port_for.utils import to_ranges, ranges_to_set

name = os.path.abspath(
    os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))
)
sys.path.insert(0, name)

IANA_DOWNLOAD_URL = (
    "https://www.iana.org/assignments"
    "/service-names-port-numbers/service-names-port-numbers.xml"
)
IANA_NS = "http://www.iana.org/assignments"
WIKIPEDIA_PAGE = "http://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers"


def _write_unassigned_ranges(out_filename: str) -> None:
    """
    Downloads ports data from IANA & Wikipedia and converts
    it to a python module. This function is used to generate _ranges.py.
    """
    with open(out_filename, "wt") as f:
        f.write(
            "# auto-generated by port_for._download_ranges (%s)\n"
            % datetime.date.today()
        )
        f.write("UNASSIGNED_RANGES = [\n")
        for range in to_ranges(sorted(list(_unassigned_ports()))):
            f.write("    (%d, %d),\n" % range)
        f.write("]\n")


def _unassigned_ports() -> Set[int]:
    """Return a set of all unassigned ports (according to IANA and Wikipedia)"""
    free_ports = ranges_to_set(_parse_ranges(_iana_unassigned_port_ranges()))
    known_ports = ranges_to_set(_wikipedia_known_port_ranges())
    return free_ports.difference(known_ports)


def _wikipedia_known_port_ranges() -> Iterator[Tuple[int, int]]:
    """
    Returns used port ranges according to Wikipedia page.
    This page contains unofficial well-known ports.
    """
    req = Request(WIKIPEDIA_PAGE, headers={"User-Agent": "Magic Browser"})
    page = urlopen(req).read().decode("utf8")

    # just find all numbers in table cells
    ports = re.findall(r"<td>((\d+)(\W(\d+))?)</td>", page, re.U)
    return ((int(p[1]), int(p[3] if p[3] else p[1])) for p in ports)


def _iana_unassigned_port_ranges() -> Iterator[str]:
    """
    Returns unassigned port ranges according to IANA.
    """
    page = urlopen(IANA_DOWNLOAD_URL).read()
    xml = ElementTree.fromstring(page)
    records = xml.findall("{%s}record" % IANA_NS)
    for record in records:
        description_el = record.find("{%s}description" % IANA_NS)
        assert description_el is not None
        description = description_el.text
        if description == "Unassigned":
            number_el = record.find("{%s}number" % IANA_NS)
            assert number_el is not None
            numbers = number_el.text
            assert numbers is not None
            yield numbers


def _parse_ranges(ranges: Iterable[str]) -> Iterator[Tuple[int, int]]:
    """Converts a list of string ranges to a list of [low, high] tuples."""
    for txt in ranges:
        if "-" in txt:
            low, high = txt.split("-")
        else:
            low, high = txt, txt
        yield int(low), int(high)


if __name__ == "__main__":
    _write_unassigned_ranges("_ranges.py")