#!/usr/bin/env python3 """ Extract stencil data/struct from C++ header. """ import argparse import os import re import shutil import subprocess import sys from textwrap import dedent parser = argparse.ArgumentParser(description='Update stencil data/struct') parser.add_argument('PATH_TO_MOZILLA_CENTRAL', help='Path to mozilla-central') parser.add_argument('PATH_TO_JSPARAGUS', help='Path to jsparagus') args = parser.parse_args() def ensure_exists(path): if not os.path.exists(path): print(f'{path} does not exist', file=sys.stderr) sys.exit(1) def ensure_input_files(files): paths = {} for (parent, name) in files: path = os.path.join(parent, name) ensure_exists(path) paths[name] = path return paths js_dir = os.path.join(args.PATH_TO_MOZILLA_CENTRAL, 'js') frontend_dir = os.path.join(js_dir, 'src', 'frontend') vm_dir = os.path.join(js_dir, 'src', 'vm') public_dir = os.path.join(js_dir, 'public') input_paths = ensure_input_files([ (frontend_dir, 'SourceNotes.h'), (public_dir, 'Symbol.h'), (vm_dir, 'AsyncFunctionResolveKind.h'), (vm_dir, 'BytecodeFormatFlags.h'), (vm_dir, 'CheckIsObjectKind.h'), (vm_dir, 'FunctionFlags.h'), (vm_dir, 'FunctionPrefixKind.h'), (vm_dir, 'GeneratorAndAsyncKind.h'), (vm_dir, 'GeneratorResumeKind.h'), (vm_dir, 'Opcodes.h'), (vm_dir, 'ThrowMsgKind.h'), (vm_dir, 'StencilEnums.h'), ]) def get_source_path(crate, name): path = os.path.join(args.PATH_TO_JSPARAGUS, 'crates', crate, 'src', name) ensure_exists(path) return path opcode_dest_path = get_source_path('stencil', 'opcode.rs') emitter_dest_path = get_source_path('emitter', 'emitter.rs') function_dest_path = get_source_path('stencil', 'function.rs') script_dest_path = get_source_path('stencil', 'script.rs') copy_dir = os.path.join(args.PATH_TO_JSPARAGUS, 'crates', 'stencil', 'src', 'copy') if not os.path.exists(copy_dir): os.makedirs(copy_dir) def extract_opcodes(paths): opcodes = [] with open(paths['Opcodes.h'], 'r') as f: for line in f: line = line.strip() if line.startswith('MACRO(') and ',' in line: line = line[5:] if line.endswith(' \\'): line = line[:-2] assert line.endswith(')') opcodes.append((" " * 16) + line + ",") return opcodes def extract_opcode_flags(paths): pat = re.compile(r'(JOF_[A-Z0-9_]+)\s=\s([^,]+),\s*/\*\s+(.*)\s+\*/') flags = [] with open(paths['BytecodeFormatFlags.h'], 'r') as f: for line in f: m = pat.search(line) if not m: continue name = m.group(1) value = m.group(2) comment = m.group(3) if name == 'JOF_MODEMASK': continue flags.append({ 'name': name, 'value': value, 'comment': comment, }) return flags def remove_comment(body): block_comment_pat = re.compile(r'/\*.+?\*/', re.M) line_comment_pat = re.compile(r'//.*') result = '' for line in block_comment_pat.sub('', body).split('\n'): line = line_comment_pat.sub('', line) result += line return result def filter_enum_body(body): space_pat = re.compile(r'\s*') return space_pat.sub('', body) size_types = { 'bool': 'bool', 'int8_t': 'i8', 'uint8_t': 'u8', 'uint16_t': 'u16', 'uint24_t': 'u24', 'int32_t': 'i32', 'uint32_t': 'u32', } def extract_enum(types, paths, ty, filename=None, custom_handler=None): variants_pat = re.compile( r'enum(?:\s+class)?\s*' + ty + r'\s*:\s*([A-Za-z0-9_]+)\s*\{([^}]+)\}', re.M) simple_init_pat = re.compile(r'^([A-Za-z0-9_]+)=((:?0x)?[A-Fa-f0-9+]+)$') bits_init_pat = re.compile(r'^([A-Za-z0-9_]+)=(\d+)<<(\d+)$') if not filename: filename = f'{ty}.h' with open(paths[filename], 'r') as f: content = f.read() content = remove_comment(content) m = variants_pat.search(content) assert m, f'enum {ty} is not found' size_type = m.group(1) body = m.group(2) if size_type not in size_types: print(f'{size_types} is not supported', file=sys.stderr) sys.exit(1) size = size_types[size_type] body = filter_enum_body(body) variants = [] i = 0 for variant in body.split(','): if variant == '': # After trailing comma continue m = simple_init_pat.search(variant) if m: name = m.group(1) value = m.group(2) variants.append((name, value)) if value.startswith('0x'): i = int(value, 16) + 1 else: i = int(value) + 1 continue m = bits_init_pat.search(variant) if m: name = m.group(1) bits = m.group(2) shift = m.group(3) value = f'{bits} << {shift}' variants.append((name, value)) # If a bit pattern is inside the variant, # do not support any variant without initializer. i = None continue if custom_handler: if custom_handler(variants, variant): i = None continue if i is None: raise Exception(f'All variants should have initializer : {variant}') name = variant value = i variants.append((name, value)) i += 1 types[ty] = { 'dead': False, 'size': size, 'variants': variants } def extract_function_flags(paths): kind_init_pat = re.compile(r'^([A-Za-z0-9_]+)=([A-Za-z0-9_]+)<<([A-Za-z0-9_]+)$') combined_init_pat = re.compile(r'^([A-Za-z0-9_]+)=([A-Za-z0-9_]+(\|[A-Za-z0-9_]+)*)$') def custom_handler(variants, variant): m = kind_init_pat.search(variant) if m: name = m.group(1) bits = m.group(2) shift = m.group(3) value = f'(FunctionKind::{bits} as u16) << {shift}' variants.append((name, value)) return True m = combined_init_pat.search(variant) if m: name = m.group(1) value = m.group(2) variants.append((name, value)) return True raise Exception(f'unhandled variant {variant}') types = {} extract_enum(types, paths, 'Flags', 'FunctionFlags.h', custom_handler) assert types['Flags']['size'] == 'u16' return types['Flags']['variants'] def extract_types(paths): types = {} def extract_symbols(): pat = re.compile(r'MACRO\((.+)\)') ty = 'SymbolCode' variants = [] i = 0 found = False state = 'before' with open(paths['Symbol.h'], 'r') as f: for line in f: if 'enum class SymbolCode : uint32_t {' in line: found = True if state == 'before': if 'JS_FOR_EACH_WELL_KNOWN_SYMBOL' in line: state = 'macro' elif state == 'macro': m = pat.search(line) if m: sym = m.group(1) sym = sym[0].upper() + sym[1:] variants.append((sym, i)) i += 1 if not line.strip().endswith('\\'): state = 'after' if not found: print('SymbolCode : uint32_t is not found', file=sys.stderr) sys.exit(1) types[ty] = { 'dead': False, 'size': 'u32', 'variants': variants } def extract_source_notes(): pat = re.compile(r'M\((.+),(.+),(.+)\)') ty = 'SrcNoteType' variants = [] i = 0 found = False state = 'before' with open(paths['SourceNotes.h'], 'r') as f: for line in f: if 'enum class SrcNoteType : uint8_t {' in line: found = True if state == 'before': if 'FOR_EACH_SRC_NOTE_TYPE' in line: state = 'macro' elif state == 'macro': m = pat.search(line) if m: variants.append((m.group(1), i)) i += 1 if not line.strip().endswith('\\'): state = 'after' if not found: print('SrcNoteType : uint8_t is not found', file=sys.stderr) sys.exit(1) types[ty] = { 'dead': False, 'size': 'u8', 'variants': variants } extract_enum(types, paths, 'AsyncFunctionResolveKind') extract_enum(types, paths, 'CheckIsObjectKind') extract_enum(types, paths, 'FunctionPrefixKind') extract_enum(types, paths, 'GeneratorResumeKind') extract_enum(types, paths, 'ThrowMsgKind') extract_enum(types, paths, 'ThrowCondition', 'ThrowMsgKind.h') extract_enum(types, paths, 'TryNoteKind', 'StencilEnums.h') extract_symbols() extract_source_notes() return types def extract_script_types(paths): types = {} extract_enum(types, paths, 'ImmutableScriptFlagsEnum', 'StencilEnums.h') types['ImmutableScriptFlagsEnum']['dead'] = True extract_enum(types, paths, 'MutableScriptFlagsEnum', 'StencilEnums.h') types['MutableScriptFlagsEnum']['dead'] = True # Remove unused mask that doesn't follow the naming convention. types['MutableScriptFlagsEnum']['variants'] = \ filter(lambda item: item[0] != 'WarmupResets_MASK', types['MutableScriptFlagsEnum']['variants']) return types def extract_function_types(paths): types = {} extract_enum(types, paths, 'FunctionKind', filename='FunctionFlags.h') return types def format_opcodes(out, opcodes): for opcode in opcodes: out.write(f'{opcode}\n') def format_opcode_flags(out, flags): for flag in flags: out.write(dedent(f"""\ /// {flag['comment']} const {flag['name']}: u32 = {flag['value']}; """)) def rustfmt(path): subprocess.run(['rustfmt', path], check=True) def update_opcode(path, opcodes, flags): tmppath = f'{path}.tmp' with open(path, 'r') as in_f: with open(tmppath, 'w') as out_f: state = 'normal' for line in in_f: if '@@@@ BEGIN OPCODES @@@@' in line: state = 'opcodes' out_f.write(line) format_opcodes(out_f, opcodes) elif '@@@@ END OPCODES @@@@' in line: assert state == 'opcodes' state = 'normal' out_f.write(line) elif '@@@@ BEGIN FLAGS @@@@' in line: state = 'flags' out_f.write(line) format_opcode_flags(out_f, flags) elif '@@@@ END FLAGS @@@@' in line: assert state == 'flags' state = 'normal' out_f.write(line) elif state == 'normal': out_f.write(line) assert state == 'normal' os.replace(tmppath, path) rustfmt(path) def to_snake_case(s): return re.sub(r'(?