# -*- coding: utf-8 -*- """ This module/script is for updating port_for._ranges with recent information from IANA and Wikipedia. """ import sys import os import re import datetime from urllib.request import Request, urlopen from xml.etree import ElementTree from typing import Set, Iterator, Iterable, Tuple from port_for.utils import to_ranges, ranges_to_set name = os.path.abspath( os.path.normpath(os.path.join(os.path.dirname(__file__), "..")) ) sys.path.insert(0, name) IANA_DOWNLOAD_URL = ( "https://www.iana.org/assignments" "/service-names-port-numbers/service-names-port-numbers.xml" ) IANA_NS = "http://www.iana.org/assignments" WIKIPEDIA_PAGE = "http://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers" def _write_unassigned_ranges(out_filename: str) -> None: """ Downloads ports data from IANA & Wikipedia and converts it to a python module. This function is used to generate _ranges.py. """ with open(out_filename, "wt") as f: f.write( "# auto-generated by port_for._download_ranges (%s)\n" % datetime.date.today() ) f.write("UNASSIGNED_RANGES = [\n") for range in to_ranges(sorted(list(_unassigned_ports()))): f.write(" (%d, %d),\n" % range) f.write("]\n") def _unassigned_ports() -> Set[int]: """Return a set of all unassigned ports (according to IANA and Wikipedia)""" free_ports = ranges_to_set(_parse_ranges(_iana_unassigned_port_ranges())) known_ports = ranges_to_set(_wikipedia_known_port_ranges()) return free_ports.difference(known_ports) def _wikipedia_known_port_ranges() -> Iterator[Tuple[int, int]]: """ Returns used port ranges according to Wikipedia page. This page contains unofficial well-known ports. """ req = Request(WIKIPEDIA_PAGE, headers={"User-Agent": "Magic Browser"}) page = urlopen(req).read().decode("utf8") # just find all numbers in table cells ports = re.findall(r"((\d+)(\W(\d+))?)", page, re.U) return ((int(p[1]), int(p[3] if p[3] else p[1])) for p in ports) def _iana_unassigned_port_ranges() -> Iterator[str]: """ Returns unassigned port ranges according to IANA. """ page = urlopen(IANA_DOWNLOAD_URL).read() xml = ElementTree.fromstring(page) records = xml.findall("{%s}record" % IANA_NS) for record in records: description_el = record.find("{%s}description" % IANA_NS) assert description_el is not None description = description_el.text if description == "Unassigned": number_el = record.find("{%s}number" % IANA_NS) assert number_el is not None numbers = number_el.text assert numbers is not None yield numbers def _parse_ranges(ranges: Iterable[str]) -> Iterator[Tuple[int, int]]: """Converts a list of string ranges to a list of [low, high] tuples.""" for txt in ranges: if "-" in txt: low, high = txt.split("-") else: low, high = txt, txt yield int(low), int(high) if __name__ == "__main__": _write_unassigned_ranges("_ranges.py")