Adding upstream version 4.2.2.upstream/4.2.2

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-10 20:34:10 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-10 20:34:10 +0000
commit: e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc (patch)
tree: 68cb5ef9081156392f1dd62a00c6ccc1451b93df /tools/make-manuf.py
parent: Initial commit. (diff)
download: wireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.tar.xz
wireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.zip
1 files changed, 401 insertions, 0 deletions
diff --git a/tools/make-manuf.py b/tools/make-manuf.py
new file mode 100755
index 00000000..22f3aa03
--- /dev/null
+++ b/tools/make-manuf.py
@@ -0,0 +1,401 @@
+#!/usr/bin/env python3
+#
+# Wireshark - Network traffic analyzer
+# By Gerald Combs <gerald@wireshark.org>
+# Copyright 1998 Gerald Combs
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+'''Update the "manuf" file.
+
+Make-manuf creates a file containing ethernet OUIs and their company
+IDs from the databases at IEEE.
+'''
+
+import csv
+import html
+import io
+import os
+import re
+import sys
+import urllib.request, urllib.error, urllib.parse
+
+have_icu = False
+try:
+    # Use the grapheme or segments module instead?
+    import icu
+    have_icu = True
+except ImportError:
+    pass
+
+def exit_msg(msg=None, status=1):
+    if msg is not None:
+        sys.stderr.write(msg + '\n\n')
+    sys.stderr.write(__doc__ + '\n')
+    sys.exit(status)
+
+def open_url(url):
+    '''Open a URL.
+    Returns a tuple containing the body and response dict. The body is a
+    str in Python 3 and bytes in Python 2 in order to be compatibile with
+    csv.reader.
+    '''
+
+    if len(sys.argv) > 1:
+        url_path = os.path.join(sys.argv[1], url[1])
+        url_fd = open(url_path)
+        body = url_fd.read()
+        url_fd.close()
+    else:
+        url_path = '/'.join(url)
+
+        req_headers = { 'User-Agent': 'Wireshark make-manuf' }
+        try:
+            req = urllib.request.Request(url_path, headers=req_headers)
+            response = urllib.request.urlopen(req)
+            body = response.read().decode('UTF-8', 'replace')
+        except Exception:
+            exit_msg('Error opening ' + url_path)
+
+    return body
+
+# These are applied after punctuation has been removed.
+# More examples at https://en.wikipedia.org/wiki/Incorporation_(business)
+general_terms = '|'.join([
+    ' a +s\\b', # A/S and A.S. but not "As" as in "Connect As".
+    ' ab\\b', # Also follows "Oy", which is covered below.
+    ' ag\\b',
+    ' b ?v\\b',
+    ' closed joint stock company\\b',
+    ' co\\b',
+    ' company\\b',
+    ' corp\\b',
+    ' corporation\\b',
+    ' corporate\\b',
+    ' de c ?v\\b', # Follows "S.A.", which is covered separately below.
+    ' gmbh\\b',
+    ' holding\\b',
+    ' inc\\b',
+    ' incorporated\\b',
+    ' jsc\\b',
+    ' kg\\b',
+    ' k k\\b', # "K.K." as in "kabushiki kaisha", but not "K+K" as in "K+K Messtechnik".
+    ' limited\\b',
+    ' llc\\b',
+    ' ltd\\b',
+    ' n ?v\\b',
+    ' oao\\b',
+    ' of\\b',
+    ' open joint stock company\\b',
+    ' ooo\\b',
+    ' oü\\b',
+    ' oy\\b',
+    ' oyj\\b',
+    ' plc\\b',
+    ' pty\\b',
+    ' pvt\\b',
+    ' s ?a ?r ?l\\b',
+    ' s ?a\\b',
+    ' s ?p ?a\\b',
+    ' sp ?k\\b',
+    ' s ?r ?l\\b',
+    ' systems\\b',
+    '\\bthe\\b',
+    ' zao\\b',
+    ' z ?o ?o\\b'
+    ])
+
+# Chinese company names tend to start with the location, skip it (non-exhaustive list).
+skip_start = [
+    'shengzen',
+    'shenzhen',
+    'beijing',
+    'shanghai',
+    'wuhan',
+    'hangzhou',
+    'guangxi',
+    'guangdong',
+    'chengdu',
+]
+
+# Special cases handled directly
+special_case = {
+    "Advanced Micro Devices": "AMD",
+    "杭州德澜科技有限公司": "DelanTech" # 杭州德澜科技有限公司（HangZhou Delan Technology Co.,Ltd）
+}
+
+def shorten(manuf):
+    '''Convert a long manufacturer name to abbreviated and short names'''
+    # Normalize whitespace.
+    manuf = ' '.join(manuf.split())
+    orig_manuf = manuf
+    # Convert all caps to title case
+    if manuf.isupper():
+        manuf = manuf.title()
+    # Remove the contents of parenthesis as ancillary data
+    manuf = re.sub(r"\(.*\)", '', manuf)
+    # Remove the contents of fullwidth parenthesis (mostly in Asian names)
+    manuf = re.sub(r"（.*）", '', manuf)
+    # Remove "a" before removing punctuation ("Aruba, a Hewlett [...]" etc.)
+    manuf = manuf.replace(" a ", " ")
+    # Remove any punctuation
+    # XXX Use string.punctuation? Note that it includes '-' and '*'.
+    manuf = re.sub(r"[\"',./:()+-]", ' ', manuf)
+    # XXX For some reason including the double angle brackets in the above
+    # regex makes it bomb
+    manuf = re.sub(r"[«»“”]", ' ', manuf)
+    # & isn't needed when Standalone
+    manuf = manuf.replace(" & ", " ")
+    # Remove business types and other general terms ("the", "inc", "plc", etc.)
+    plain_manuf = re.sub(general_terms, '', manuf, flags=re.IGNORECASE)
+    # ...but make sure we don't remove everything.
+    if not all(s == ' ' for s in plain_manuf):
+        manuf = plain_manuf
+
+    manuf = manuf.strip()
+
+    # Check for special case
+    if manuf in special_case.keys():
+        manuf = special_case[manuf]
+
+    # XXX: Some of the entries have Chinese city or other location
+    # names written with spaces between each character, like
+    # Bei jing, Wu Han, Shen Zhen, etc. We should remove that too.
+    split = manuf.split()
+    if len(split) > 1 and split[0].lower() in skip_start:
+        manuf = ' '.join(split[1:])
+
+    # Remove all spaces
+    manuf = re.sub(r'\s+', '', manuf)
+
+    if len(manuf) < 1:
+        sys.stderr.write('Manufacturer "{}" shortened to nothing.\n'.format(orig_manuf))
+        sys.exit(1)
+
+    # Truncate names to a reasonable length, say, 12 characters. If
+    # the string contains UTF-8, this may be substantially more than
+    # 12 bytes. It might also be less than 12 visible characters. Plain
+    # Python slices Unicode strings by code point, which is better
+    # than raw bytes but not as good as grapheme clusters. PyICU
+    # supports grapheme clusters. https://bugs.python.org/issue30717
+    #
+
+    # Truncate by code points
+    trunc_len = 12
+
+    if have_icu:
+        # Truncate by grapheme clusters
+        bi_ci = icu.BreakIterator.createCharacterInstance(icu.Locale('en_US'))
+        bi_ci.setText(manuf)
+        bounds = list(bi_ci)
+        bounds = bounds[0:trunc_len]
+        trunc_len = bounds[-1]
+
+    manuf = manuf[:trunc_len]
+
+    if manuf.lower() == orig_manuf.lower():
+        # Original manufacturer name was short and simple.
+        return [manuf, None]
+
+    mixed_manuf = orig_manuf
+    # At least one entry has whitespace in front of a period.
+    mixed_manuf = re.sub(r'\s+\.', '.', mixed_manuf)
+    #If company is all caps, convert to mixed case (so it doesn't look like we're screaming the company name)
+    if mixed_manuf.upper() == mixed_manuf:
+        mixed_manuf = mixed_manuf.title()
+
+    return [manuf, mixed_manuf]
+
+MA_L = 'MA_L'
+MA_M = 'MA_M'
+MA_S = 'MA_S'
+
+def prefix_to_oui(prefix, prefix_map):
+    pfx_len = int(len(prefix) * 8 / 2)
+    prefix24 = prefix[:6]
+    oui24 = ':'.join(hi + lo for hi, lo in zip(prefix24[0::2], prefix24[1::2]))
+
+    if pfx_len == 24:
+        # 24-bit OUI assignment, no mask
+        return oui24, MA_L
+
+    # Other lengths which require a mask.
+    oui = prefix.ljust(12, '0')
+    oui = ':'.join(hi + lo for hi, lo in zip(oui[0::2], oui[1::2]))
+    if pfx_len == 28:
+        kind = MA_M
+    elif pfx_len == 36:
+        kind = MA_S
+    prefix_map[oui24] = kind
+
+    return '{}/{:d}'.format(oui, int(pfx_len)), kind
+
+def main():
+    this_dir = os.path.dirname(__file__)
+    manuf_path = os.path.join('epan', 'manuf-data.c')
+
+    ieee_d = {
+        'OUI':   { 'url': ["https://standards-oui.ieee.org/oui/", "oui.csv"], 'min_entries': 1000 },
+        'CID':   { 'url': ["https://standards-oui.ieee.org/cid/", "cid.csv"], 'min_entries': 75 },
+        'IAB':   { 'url': ["https://standards-oui.ieee.org/iab/", "iab.csv"], 'min_entries': 1000 },
+        'OUI28': { 'url': ["https://standards-oui.ieee.org/oui28/", "mam.csv"], 'min_entries': 1000 },
+        'OUI36': { 'url': ["https://standards-oui.ieee.org/oui36/", "oui36.csv"], 'min_entries': 1000 },
+    }
+    oui_d = {
+        MA_L: { '00:00:00' : ['00:00:00', 'Officially Xerox, but 0:0:0:0:0:0 is more common'] },
+        MA_M: {},
+        MA_S: {},
+    }
+
+    min_total = 35000; # 35830 as of 2018-09-05
+    total_added = 0
+
+    # Add IEEE entries from each of their databases
+    ieee_db_l = ['OUI', 'OUI28', 'OUI36', 'CID', 'IAB']
+
+    # map a 24-bit prefix to MA-M/MA-S or none (MA-L by default)
+    prefix_map = {}
+
+    for db in ieee_db_l:
+        db_url = ieee_d[db]['url']
+        ieee_d[db]['skipped'] = 0
+        ieee_d[db]['added'] = 0
+        ieee_d[db]['total'] = 0
+        print('Merging {} data from {}'.format(db, db_url))
+        body = open_url(db_url)
+        ieee_csv = csv.reader(body.splitlines())
+
+        # Pop the title row.
+        next(ieee_csv)
+        for ieee_row in ieee_csv:
+            #Registry,Assignment,Organization Name,Organization Address
+            #IAB,0050C2DD6,Transas Marine Limited,Datavagen 37 Askim Vastra Gotaland SE 436 32
+            oui, kind = prefix_to_oui(ieee_row[1].upper(), prefix_map)
+            manuf = ieee_row[2].strip()
+            # The Organization Name field occasionally contains HTML entities. Undo them.
+            manuf = html.unescape(manuf)
+            # "Watts A\S"
+            manuf = manuf.replace('\\', '/')
+            if manuf == 'IEEE Registration Authority':
+                continue
+            if manuf == 'Private':
+                continue
+            if oui in oui_d[kind]:
+                action = 'Skipping'
+                print('{} - {} IEEE "{}" in favor of "{}"'.format(oui, action, manuf, oui_d[kind][oui]))
+                ieee_d[db]['skipped'] += 1
+            else:
+                oui_d[kind][oui] = shorten(manuf)
+                ieee_d[db]['added'] += 1
+            ieee_d[db]['total'] += 1
+
+        if ieee_d[db]['total'] < ieee_d[db]['min_entries']:
+            exit_msg("Too few {} entries. Got {}, wanted {}".format(db, ieee_d[db]['total'], ieee_d[db]['min_entries']))
+        total_added += ieee_d[db]['total']
+
+    if total_added < min_total:
+        exit_msg("Too few total entries ({})".format(total_added))
+
+    try:
+        manuf_fd = io.open(manuf_path, 'w', encoding='UTF-8')
+    except Exception:
+        exit_msg("Couldn't open manuf file for reading ({}) ".format(manuf_path))
+
+    manuf_fd.write('''/*
+ * This file was generated by running ./tools/make-manuf.py.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * The data below has been assembled from the following sources:
+ *
+ * The IEEE public OUI listings available from:
+ * <http://standards-oui.ieee.org/oui/oui.csv>
+ * <http://standards-oui.ieee.org/cid/cid.csv>
+ * <http://standards-oui.ieee.org/iab/iab.csv>
+ * <http://standards-oui.ieee.org/oui28/mam.csv>
+ * <http://standards-oui.ieee.org/oui36/oui36.csv>
+ *
+ */
+
+''')
+
+    # Write the prefix map
+    manuf_fd.write("static const manuf_registry_t ieee_registry_table[] = {\n")
+    keys = list(prefix_map.keys())
+    keys.sort()
+    for oui in keys:
+        manuf_fd.write("    {{ {{ 0x{}, 0x{}, 0x{} }}, {} }},\n".format(oui[0:2], oui[3:5], oui[6:8], prefix_map[oui]))
+    manuf_fd.write("};\n\n")
+
+    # write the MA-L table
+    manuf_fd.write("static const manuf_oui24_t global_manuf_oui24_table[] = {\n")
+    keys = list(oui_d[MA_L].keys())
+    keys.sort()
+    for oui in keys:
+        short = oui_d[MA_L][oui][0]
+        if oui_d[MA_L][oui][1]:
+            long = oui_d[MA_L][oui][1]
+        else:
+            long = short
+        line = "    {{ {{ 0x{}, 0x{}, 0x{} }}, \"{}\", ".format(oui[0:2], oui[3:5], oui[6:8], short)
+        sep = 44 - len(line)
+        if sep <= 0:
+            sep = 0
+        line += sep * ' '
+        line += "\"{}\" }},\n".format(long.replace('"', '\\"'))
+        manuf_fd.write(line)
+    manuf_fd.write("};\n\n")
+
+    # write the MA-M table
+    manuf_fd.write("static const manuf_oui28_t global_manuf_oui28_table[] = {\n")
+    keys = list(oui_d[MA_M].keys())
+    keys.sort()
+    for oui in keys:
+        short = oui_d[MA_M][oui][0]
+        if oui_d[MA_M][oui][1]:
+            long = oui_d[MA_M][oui][1]
+        else:
+            long = short
+        line = "    {{ {{ 0x{}, 0x{}, 0x{}, 0x{} }}, \"{}\", ".format(oui[0:2], oui[3:5], oui[6:8], oui[9:11], short)
+        sep = 50 - len(line)
+        if sep <= 0:
+            sep = 0
+        line += sep * ' '
+        line += "\"{}\" }},\n".format(long.replace('"', '\\"'))
+        manuf_fd.write(line)
+    manuf_fd.write("};\n\n")
+
+    #write the MA-S table
+    manuf_fd.write("static const manuf_oui36_t global_manuf_oui36_table[] = {\n")
+    keys = list(oui_d[MA_S].keys())
+    keys.sort()
+    for oui in keys:
+        short = oui_d[MA_S][oui][0]
+        if oui_d[MA_S][oui][1]:
+            long = oui_d[MA_S][oui][1]
+        else:
+            long = short
+        line = "    {{ {{ 0x{}, 0x{}, 0x{}, 0x{}, 0x{} }}, \"{}\", ".format(oui[0:2], oui[3:5], oui[6:8], oui[9:11], oui[12:14], short)
+        sep = 56 - len(line)
+        if sep <= 0:
+            sep = 0
+        line += sep * ' '
+        line += "\"{}\" }},\n".format(long.replace('"', '\\"'))
+        manuf_fd.write(line)
+    manuf_fd.write("};\n")
+
+    manuf_fd.close()
+
+    for db in ieee_d:
+        print('{:<20}: {}'.format('IEEE ' + db + ' added', ieee_d[db]['added']))
+    print('{:<20}: {}'.format('Total added', total_added))
+
+    print()
+    for db in ieee_d:
+        print('{:<20}: {}'.format('IEEE ' + db + ' total', ieee_d[db]['total']))
+
+    print()
+    for db in ieee_d:
+        print('{:<20}: {}'.format('IEEE ' + db + ' skipped', ieee_d[db]['skipped']))
+
+if __name__ == '__main__':
+    main()
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-10 20:34:10 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-10 20:34:10 +0000
commit	e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc (patch)
tree	68cb5ef9081156392f1dd62a00c6ccc1451b93df /tools/make-manuf.py
parent	Initial commit. (diff)
download	wireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.tar.xz wireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.zip