tools/make-enterprises.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196

#!/usr/bin/env python3
# create the enterprises.c file from
# https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
# or an offline copy
#
# Copyright 2022 by Moshe Kaplan
# Based on make-sminmpec.pl by Gerald Combs
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 2004 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later

import os
import argparse
import re
import urllib.request


ENTERPRISES_CFILE = os.path.join('epan', 'enterprises.c')

ENTERPRISE_NUMBERS_URL = "https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers"

DECIMAL_PATTERN = r"^(\d+)"
# up to three spaces because of formatting errors in the source
ORGANIZATION_PATTERN = r"^   ?(\S.*)"
FORMERLY_PATTERN = r" \(((formerly|previously) .*)\)"


LOOKUP_FUNCTION = r"""
const char* global_enterprises_lookup(uint32_t value)
{
    if (value > table.max_idx) {
        return NULL;
    }
    else return table.values[value];
}
"""

DUMP_FUNCTION = r"""
void global_enterprises_dump(FILE *fp)
{
    for (size_t idx = 0; idx <= table.max_idx; idx++) {
        if (table.values[idx] != NULL) {
            fprintf(fp, "%zu\t%s\n", idx, table.values[idx]);
        }
    }
}
"""

# This intermediate format is no longer written to a file - returned as string
def generate_enterprise_entries(file_content):
    # We only care about the "Decimal" and "Organization",
    # not the contact or email
    org_lines = []
    last_updated = ""
    end_seen = False
    for line in file_content.splitlines():
        decimal_match = re.match(DECIMAL_PATTERN, line)
        if decimal_match:
            decimal = decimal_match.group(0)
        elif re.match(ORGANIZATION_PATTERN, line):
            organization = line.strip()
            if organization.lower() == "unassigned":
                continue
            organization = re.sub(FORMERLY_PATTERN, r"\t# \1", organization)
            org_lines += [decimal + "\t" + organization]
        elif "last updated" in line.lower():
            last_updated = line
        elif "end of document" in line.lower():
            end_seen = True

    if not end_seen:
        raise Exception('"End of Document" not found. Truncated source file?')

    last_updated_line = "/* " + last_updated + " */\n\n"
    output = "\n".join(org_lines) + "\n"
    return (output,last_updated_line)

class CFile:
    def __init__(self, filename, last_updated_line):
        self.filename = filename
        self.f = open(filename, 'w')
        self.mappings = {}
        self.highest_num = 0

        # Write file header
        self.f.write('/* ' + os.path.basename(self.filename) + '\n')
        self.f.write(' *\n')
        self.f.write(' * Wireshark - Network traffic analyzer\n')
        self.f.write(' * By Gerald Combs <gerald@wireshark.org>\n')
        self.f.write(' * Copyright 1998 Gerald Combs\n')
        self.f.write(' *\n')
        self.f.write(' * Do not edit - this file is automatically generated\n')
        self.f.write(' * SPDX-License-Identifier: GPL-2.0-or-later\n')
        self.f.write(' */\n\n')
        self.f.write(last_updated_line)

        # Include header files
        self.f.write('#include "config.h"\n\n')
        self.f.write('#include <stddef.h>\n')
        self.f.write('#include "enterprises.h"\n')
        self.f.write('\n\n')

    def __del__(self):
        self.f.write('typedef struct\n')
        self.f.write('{\n')
        self.f.write('    uint32_t max_idx;\n')
        self.f.write('    const char* values[' + str(self.highest_num+1) + '];\n')
        self.f.write('} global_enterprises_table_t;\n\n')

        # Write static table
        self.f.write('static global_enterprises_table_t table =\n')
        self.f.write('{\n')
        # Largest index
        self.f.write('    ' + str(self.highest_num) + ',\n')
        self.f.write('    {\n')
        # Entries (read from dict)
        for n in range(0, self.highest_num+1):
            if n not in self.mappings:
                # There are some gaps, write a NULL entry so can lookup by index
                line = '        NULL'
            else:
                line = '        "' + self.mappings[n] + '"'
            # Add coma.
            if n < self.highest_num:
                line += ','
            # Add number as aligned comment.
            line += ' '*(90-len(line)) + '// ' + str(n)

            self.f.write(line+'\n')

        # End of array
        self.f.write('    }\n')
        # End of struct
        self.f.write('};\n')
        print('Re-generated', self.filename)

        # Lookup function
        self.f.write(LOOKUP_FUNCTION)

        # Dump function
        self.f.write(DUMP_FUNCTION)

    # Add an individual mapping to the function
    def addMapping(self, num, name):
        # Handle some escapings
        name = name.replace('\\', '\\\\')
        name = name.replace('"', '""')

        # Record.
        self.mappings[num] = name
        self.highest_num = num if num>self.highest_num else self.highest_num


def main():
    parser = argparse.ArgumentParser(description="Create the {} file.".format(ENTERPRISES_CFILE))
    parser.add_argument('--infile')
    parser.add_argument('outfile', nargs='?', default=ENTERPRISES_CFILE)
    parsed_args = parser.parse_args()

    # Read data from file or webpage
    if parsed_args.infile:
        with open(parsed_args.infile, encoding='utf-8') as fh:
            data = fh.read()
    else:
        with urllib.request.urlopen(ENTERPRISE_NUMBERS_URL) as f:
            if f.status != 200:
                raise Exception("request for " + ENTERPRISE_NUMBERS_URL + " failed with result code " + f.status)
            data = f.read().decode('utf-8')

    # Find bits we need and generate enterprise entries
    enterprises_content,last_updated_line = generate_enterprise_entries(data)

    # Now write to a C file the contents (which is faster than parsing the global file at runtime).
    c_file = CFile(parsed_args.outfile, last_updated_line)

    mapping_re = re.compile(r'^(\d+)\s+(.*)$')
    for line in enterprises_content.splitlines():
        match = mapping_re.match(line)
        if match:
            num, name = match.group(1), match.group(2)
            # Strip any comments and/or trailing whitespace
            idx = name.find('#')
            if idx != -1:
                name = name[0:idx]
            name = name.rstrip()
            # Add
            c_file.addMapping(int(num), name)


if __name__ == "__main__":
    main()