diff options
Diffstat (limited to '')
-rw-r--r-- | port_for/_download_ranges.py | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/port_for/_download_ranges.py b/port_for/_download_ranges.py new file mode 100644 index 0000000..5e6a8fb --- /dev/null +++ b/port_for/_download_ranges.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +""" +This module/script is for updating port_for._ranges with recent information +from IANA and Wikipedia. +""" +import sys +import os +import re +import datetime +from urllib.request import Request, urlopen +from xml.etree import ElementTree +from typing import Set, Iterator, Iterable, Tuple + +from port_for.utils import to_ranges, ranges_to_set + +name = os.path.abspath( + os.path.normpath(os.path.join(os.path.dirname(__file__), "..")) +) +sys.path.insert(0, name) + +IANA_DOWNLOAD_URL = ( + "https://www.iana.org/assignments" + "/service-names-port-numbers/service-names-port-numbers.xml" +) +IANA_NS = "http://www.iana.org/assignments" +WIKIPEDIA_PAGE = "http://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers" + + +def _write_unassigned_ranges(out_filename: str) -> None: + """ + Downloads ports data from IANA & Wikipedia and converts + it to a python module. This function is used to generate _ranges.py. + """ + with open(out_filename, "wt") as f: + f.write( + "# auto-generated by port_for._download_ranges (%s)\n" + % datetime.date.today() + ) + f.write("UNASSIGNED_RANGES = [\n") + for range in to_ranges(sorted(list(_unassigned_ports()))): + f.write(" (%d, %d),\n" % range) + f.write("]\n") + + +def _unassigned_ports() -> Set[int]: + """Return a set of all unassigned ports (according to IANA and Wikipedia)""" + free_ports = ranges_to_set(_parse_ranges(_iana_unassigned_port_ranges())) + known_ports = ranges_to_set(_wikipedia_known_port_ranges()) + return free_ports.difference(known_ports) + + +def _wikipedia_known_port_ranges() -> Iterator[Tuple[int, int]]: + """ + Returns used port ranges according to Wikipedia page. + This page contains unofficial well-known ports. + """ + req = Request(WIKIPEDIA_PAGE, headers={"User-Agent": "Magic Browser"}) + page = urlopen(req).read().decode("utf8") + + # just find all numbers in table cells + ports = re.findall(r"<td>((\d+)(\W(\d+))?)</td>", page, re.U) + return ((int(p[1]), int(p[3] if p[3] else p[1])) for p in ports) + + +def _iana_unassigned_port_ranges() -> Iterator[str]: + """ + Returns unassigned port ranges according to IANA. + """ + page = urlopen(IANA_DOWNLOAD_URL).read() + xml = ElementTree.fromstring(page) + records = xml.findall("{%s}record" % IANA_NS) + for record in records: + description_el = record.find("{%s}description" % IANA_NS) + assert description_el is not None + description = description_el.text + if description == "Unassigned": + number_el = record.find("{%s}number" % IANA_NS) + assert number_el is not None + numbers = number_el.text + assert numbers is not None + yield numbers + + +def _parse_ranges(ranges: Iterable[str]) -> Iterator[Tuple[int, int]]: + """Converts a list of string ranges to a list of [low, high] tuples.""" + for txt in ranges: + if "-" in txt: + low, high = txt.split("-") + else: + low, high = txt, txt + yield int(low), int(high) + + +if __name__ == "__main__": + _write_unassigned_ranges("_ranges.py") |