summaryrefslogtreecommitdiffstats
path: root/port_for/_download_ranges.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--port_for/_download_ranges.py95
1 files changed, 95 insertions, 0 deletions
diff --git a/port_for/_download_ranges.py b/port_for/_download_ranges.py
new file mode 100644
index 0000000..5e6a8fb
--- /dev/null
+++ b/port_for/_download_ranges.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+"""
+This module/script is for updating port_for._ranges with recent information
+from IANA and Wikipedia.
+"""
+import sys
+import os
+import re
+import datetime
+from urllib.request import Request, urlopen
+from xml.etree import ElementTree
+from typing import Set, Iterator, Iterable, Tuple
+
+from port_for.utils import to_ranges, ranges_to_set
+
+name = os.path.abspath(
+ os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))
+)
+sys.path.insert(0, name)
+
+IANA_DOWNLOAD_URL = (
+ "https://www.iana.org/assignments"
+ "/service-names-port-numbers/service-names-port-numbers.xml"
+)
+IANA_NS = "http://www.iana.org/assignments"
+WIKIPEDIA_PAGE = "http://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers"
+
+
+def _write_unassigned_ranges(out_filename: str) -> None:
+ """
+ Downloads ports data from IANA & Wikipedia and converts
+ it to a python module. This function is used to generate _ranges.py.
+ """
+ with open(out_filename, "wt") as f:
+ f.write(
+ "# auto-generated by port_for._download_ranges (%s)\n"
+ % datetime.date.today()
+ )
+ f.write("UNASSIGNED_RANGES = [\n")
+ for range in to_ranges(sorted(list(_unassigned_ports()))):
+ f.write(" (%d, %d),\n" % range)
+ f.write("]\n")
+
+
+def _unassigned_ports() -> Set[int]:
+ """Return a set of all unassigned ports (according to IANA and Wikipedia)"""
+ free_ports = ranges_to_set(_parse_ranges(_iana_unassigned_port_ranges()))
+ known_ports = ranges_to_set(_wikipedia_known_port_ranges())
+ return free_ports.difference(known_ports)
+
+
+def _wikipedia_known_port_ranges() -> Iterator[Tuple[int, int]]:
+ """
+ Returns used port ranges according to Wikipedia page.
+ This page contains unofficial well-known ports.
+ """
+ req = Request(WIKIPEDIA_PAGE, headers={"User-Agent": "Magic Browser"})
+ page = urlopen(req).read().decode("utf8")
+
+ # just find all numbers in table cells
+ ports = re.findall(r"<td>((\d+)(\W(\d+))?)</td>", page, re.U)
+ return ((int(p[1]), int(p[3] if p[3] else p[1])) for p in ports)
+
+
+def _iana_unassigned_port_ranges() -> Iterator[str]:
+ """
+ Returns unassigned port ranges according to IANA.
+ """
+ page = urlopen(IANA_DOWNLOAD_URL).read()
+ xml = ElementTree.fromstring(page)
+ records = xml.findall("{%s}record" % IANA_NS)
+ for record in records:
+ description_el = record.find("{%s}description" % IANA_NS)
+ assert description_el is not None
+ description = description_el.text
+ if description == "Unassigned":
+ number_el = record.find("{%s}number" % IANA_NS)
+ assert number_el is not None
+ numbers = number_el.text
+ assert numbers is not None
+ yield numbers
+
+
+def _parse_ranges(ranges: Iterable[str]) -> Iterator[Tuple[int, int]]:
+ """Converts a list of string ranges to a list of [low, high] tuples."""
+ for txt in ranges:
+ if "-" in txt:
+ low, high = txt.split("-")
+ else:
+ low, high = txt, txt
+ yield int(low), int(high)
+
+
+if __name__ == "__main__":
+ _write_unassigned_ranges("_ranges.py")