diff options
Diffstat (limited to 'third_party/libwebrtc/build/android/pylib/dex')
-rw-r--r-- | third_party/libwebrtc/build/android/pylib/dex/__init__.py | 3 | ||||
-rwxr-xr-x | third_party/libwebrtc/build/android/pylib/dex/dex_parser.py | 551 |
2 files changed, 554 insertions, 0 deletions
diff --git a/third_party/libwebrtc/build/android/pylib/dex/__init__.py b/third_party/libwebrtc/build/android/pylib/dex/__init__.py new file mode 100644 index 0000000000..4a12e35c92 --- /dev/null +++ b/third_party/libwebrtc/build/android/pylib/dex/__init__.py @@ -0,0 +1,3 @@ +# Copyright 2019 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. diff --git a/third_party/libwebrtc/build/android/pylib/dex/dex_parser.py b/third_party/libwebrtc/build/android/pylib/dex/dex_parser.py new file mode 100755 index 0000000000..1ff8d25276 --- /dev/null +++ b/third_party/libwebrtc/build/android/pylib/dex/dex_parser.py @@ -0,0 +1,551 @@ +#!/usr/bin/env python3 +# Copyright 2019 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Utilities for optimistically parsing dex files. + +This file is not meant to provide a generic tool for analyzing dex files. +A DexFile class that exposes access to several memory items in the dex format +is provided, but it does not include error handling or validation. +""" + + + +import argparse +import collections +import errno +import os +import re +import struct +import sys +import zipfile + +# https://source.android.com/devices/tech/dalvik/dex-format#header-item +_DEX_HEADER_FMT = ( + ('magic', '8s'), + ('checksum', 'I'), + ('signature', '20s'), + ('file_size', 'I'), + ('header_size', 'I'), + ('endian_tag', 'I'), + ('link_size', 'I'), + ('link_off', 'I'), + ('map_off', 'I'), + ('string_ids_size', 'I'), + ('string_ids_off', 'I'), + ('type_ids_size', 'I'), + ('type_ids_off', 'I'), + ('proto_ids_size', 'I'), + ('proto_ids_off', 'I'), + ('field_ids_size', 'I'), + ('field_ids_off', 'I'), + ('method_ids_size', 'I'), + ('method_ids_off', 'I'), + ('class_defs_size', 'I'), + ('class_defs_off', 'I'), + ('data_size', 'I'), + ('data_off', 'I'), +) + +DexHeader = collections.namedtuple('DexHeader', + ','.join(t[0] for t in _DEX_HEADER_FMT)) + +# Simple memory items. +_TypeIdItem = collections.namedtuple('TypeIdItem', 'descriptor_idx') +_ProtoIdItem = collections.namedtuple( + 'ProtoIdItem', 'shorty_idx,return_type_idx,parameters_off') +_MethodIdItem = collections.namedtuple('MethodIdItem', + 'type_idx,proto_idx,name_idx') +_TypeItem = collections.namedtuple('TypeItem', 'type_idx') +_StringDataItem = collections.namedtuple('StringItem', 'utf16_size,data') +_ClassDefItem = collections.namedtuple( + 'ClassDefItem', + 'class_idx,access_flags,superclass_idx,interfaces_off,source_file_idx,' + 'annotations_off,class_data_off,static_values_off') + + +class _MemoryItemList(object): + """Base class for repeated memory items.""" + + def __init__(self, + reader, + offset, + size, + factory, + alignment=None, + first_item_offset=None): + """Creates the item list using the specific item factory. + + Args: + reader: _DexReader used for decoding the memory item. + offset: Offset from start of the file to the item list, serving as the + key for some item types. + size: Number of memory items in the list. + factory: Function to extract each memory item from a _DexReader. + alignment: Optional integer specifying the alignment for the memory + section represented by this list. + first_item_offset: Optional, specifies a different offset to use for + extracting memory items (default is to use offset). + """ + self.offset = offset + self.size = size + reader.Seek(first_item_offset or offset) + self._items = [factory(reader) for _ in range(size)] + + if alignment: + reader.AlignUpTo(alignment) + + def __iter__(self): + return iter(self._items) + + def __getitem__(self, key): + return self._items[key] + + def __len__(self): + return len(self._items) + + def __repr__(self): + item_type_part = '' + if self.size != 0: + item_type = type(self._items[0]) + item_type_part = ', item type={}'.format(item_type.__name__) + + return '{}(offset={:#x}, size={}{})'.format( + type(self).__name__, self.offset, self.size, item_type_part) + + +class _TypeIdItemList(_MemoryItemList): + + def __init__(self, reader, offset, size): + factory = lambda x: _TypeIdItem(x.ReadUInt()) + super(_TypeIdItemList, self).__init__(reader, offset, size, factory) + + +class _ProtoIdItemList(_MemoryItemList): + + def __init__(self, reader, offset, size): + factory = lambda x: _ProtoIdItem(x.ReadUInt(), x.ReadUInt(), x.ReadUInt()) + super(_ProtoIdItemList, self).__init__(reader, offset, size, factory) + + +class _MethodIdItemList(_MemoryItemList): + + def __init__(self, reader, offset, size): + factory = ( + lambda x: _MethodIdItem(x.ReadUShort(), x.ReadUShort(), x.ReadUInt())) + super(_MethodIdItemList, self).__init__(reader, offset, size, factory) + + +class _StringItemList(_MemoryItemList): + + def __init__(self, reader, offset, size): + reader.Seek(offset) + string_item_offsets = iter([reader.ReadUInt() for _ in range(size)]) + + def factory(x): + data_offset = next(string_item_offsets) + string = x.ReadString(data_offset) + return _StringDataItem(len(string), string) + + super(_StringItemList, self).__init__(reader, offset, size, factory) + + +class _TypeListItem(_MemoryItemList): + + def __init__(self, reader): + offset = reader.Tell() + size = reader.ReadUInt() + factory = lambda x: _TypeItem(x.ReadUShort()) + # This is necessary because we need to extract the size of the type list + # (in other cases the list size is provided in the header). + first_item_offset = reader.Tell() + super(_TypeListItem, self).__init__( + reader, + offset, + size, + factory, + alignment=4, + first_item_offset=first_item_offset) + + +class _TypeListItemList(_MemoryItemList): + + def __init__(self, reader, offset, size): + super(_TypeListItemList, self).__init__(reader, offset, size, _TypeListItem) + + +class _ClassDefItemList(_MemoryItemList): + + def __init__(self, reader, offset, size): + reader.Seek(offset) + + def factory(x): + return _ClassDefItem(*(x.ReadUInt() + for _ in range(len(_ClassDefItem._fields)))) + + super(_ClassDefItemList, self).__init__(reader, offset, size, factory) + + +class _DexMapItem(object): + + def __init__(self, reader): + self.type = reader.ReadUShort() + reader.ReadUShort() + self.size = reader.ReadUInt() + self.offset = reader.ReadUInt() + + def __repr__(self): + return '_DexMapItem(type={}, size={}, offset={:#x})'.format( + self.type, self.size, self.offset) + + +class _DexMapList(object): + # Full list of type codes: + # https://source.android.com/devices/tech/dalvik/dex-format#type-codes + TYPE_TYPE_LIST = 0x1001 + + def __init__(self, reader, offset): + self._map = {} + reader.Seek(offset) + self._size = reader.ReadUInt() + for _ in range(self._size): + item = _DexMapItem(reader) + self._map[item.type] = item + + def __getitem__(self, key): + return self._map[key] + + def __contains__(self, key): + return key in self._map + + def __repr__(self): + return '_DexMapList(size={}, items={})'.format(self._size, self._map) + + +class _DexReader(object): + + def __init__(self, data): + self._data = data + self._pos = 0 + + def Seek(self, offset): + self._pos = offset + + def Tell(self): + return self._pos + + def ReadUByte(self): + return self._ReadData('<B') + + def ReadUShort(self): + return self._ReadData('<H') + + def ReadUInt(self): + return self._ReadData('<I') + + def ReadString(self, data_offset): + string_length, string_offset = self._ReadULeb128(data_offset) + string_data_offset = string_offset + data_offset + return self._DecodeMUtf8(string_length, string_data_offset) + + def AlignUpTo(self, align_unit): + off_by = self._pos % align_unit + if off_by: + self.Seek(self._pos + align_unit - off_by) + + def ReadHeader(self): + header_fmt = '<' + ''.join(t[1] for t in _DEX_HEADER_FMT) + return DexHeader._make(struct.unpack_from(header_fmt, self._data)) + + def _ReadData(self, fmt): + ret = struct.unpack_from(fmt, self._data, self._pos)[0] + self._pos += struct.calcsize(fmt) + return ret + + def _ReadULeb128(self, data_offset): + """Returns a tuple of (uleb128 value, number of bytes occupied). + + From DWARF3 spec: http://dwarfstd.org/doc/Dwarf3.pdf + + Args: + data_offset: Location of the unsigned LEB128. + """ + value = 0 + shift = 0 + cur_offset = data_offset + while True: + byte = self._data[cur_offset] + cur_offset += 1 + value |= (byte & 0b01111111) << shift + if (byte & 0b10000000) == 0: + break + shift += 7 + + return value, cur_offset - data_offset + + def _DecodeMUtf8(self, string_length, offset): + """Returns the string located at the specified offset. + + See https://source.android.com/devices/tech/dalvik/dex-format#mutf-8 + + Ported from the Android Java implementation: + https://android.googlesource.com/platform/dalvik/+/fe107fb6e3f308ac5174ebdc5a794ee880c741d9/dx/src/com/android/dex/Mutf8.java#34 + + Args: + string_length: The length of the decoded string. + offset: Offset to the beginning of the string. + """ + self.Seek(offset) + ret = '' + + for _ in range(string_length): + a = self.ReadUByte() + if a == 0: + raise _MUTf8DecodeError('Early string termination encountered', + string_length, offset) + if (a & 0x80) == 0x00: + code = a + elif (a & 0xe0) == 0xc0: + b = self.ReadUByte() + if (b & 0xc0) != 0x80: + raise _MUTf8DecodeError('Error in byte 2', string_length, offset) + code = ((a & 0x1f) << 6) | (b & 0x3f) + elif (a & 0xf0) == 0xe0: + b = self.ReadUByte() + c = self.ReadUByte() + if (b & 0xc0) != 0x80 or (c & 0xc0) != 0x80: + raise _MUTf8DecodeError('Error in byte 3 or 4', string_length, offset) + code = ((a & 0x0f) << 12) | ((b & 0x3f) << 6) | (c & 0x3f) + else: + raise _MUTf8DecodeError('Bad byte', string_length, offset) + + try: + ret += unichr(code) + except NameError: + ret += chr(code) + + if self.ReadUByte() != 0x00: + raise _MUTf8DecodeError('Expected string termination', string_length, + offset) + + return ret + + +class _MUTf8DecodeError(Exception): + + def __init__(self, message, length, offset): + message += ' (decoded string length: {}, string data offset: {:#x})'.format( + length, offset) + super(_MUTf8DecodeError, self).__init__(message) + + +class DexFile(object): + """Represents a single dex file. + + Parses and exposes access to dex file structure and contents, as described + at https://source.android.com/devices/tech/dalvik/dex-format + + Fields: + reader: _DexReader object used to decode dex file contents. + header: DexHeader for this dex file. + map_list: _DexMapList object containing list of dex file contents. + type_item_list: _TypeIdItemList containing type_id_items. + proto_item_list: _ProtoIdItemList containing proto_id_items. + method_item_list: _MethodIdItemList containing method_id_items. + string_item_list: _StringItemList containing string_data_items that are + referenced by index in other sections. + type_list_item_list: _TypeListItemList containing _TypeListItems. + _TypeListItems are referenced by their offsets from other dex items. + class_def_item_list: _ClassDefItemList containing _ClassDefItems. + """ + _CLASS_ACCESS_FLAGS = { + 0x1: 'public', + 0x2: 'private', + 0x4: 'protected', + 0x8: 'static', + 0x10: 'final', + 0x200: 'interface', + 0x400: 'abstract', + 0x1000: 'synthetic', + 0x2000: 'annotation', + 0x4000: 'enum', + } + + def __init__(self, data): + """Decodes dex file memory sections. + + Args: + data: bytearray containing the contents of a dex file. + """ + self.reader = _DexReader(data) + self.header = self.reader.ReadHeader() + self.map_list = _DexMapList(self.reader, self.header.map_off) + self.type_item_list = _TypeIdItemList(self.reader, self.header.type_ids_off, + self.header.type_ids_size) + self.proto_item_list = _ProtoIdItemList( + self.reader, self.header.proto_ids_off, self.header.proto_ids_size) + self.method_item_list = _MethodIdItemList( + self.reader, self.header.method_ids_off, self.header.method_ids_size) + self.string_item_list = _StringItemList( + self.reader, self.header.string_ids_off, self.header.string_ids_size) + self.class_def_item_list = _ClassDefItemList( + self.reader, self.header.class_defs_off, self.header.class_defs_size) + + type_list_key = _DexMapList.TYPE_TYPE_LIST + if type_list_key in self.map_list: + map_list_item = self.map_list[type_list_key] + self.type_list_item_list = _TypeListItemList( + self.reader, map_list_item.offset, map_list_item.size) + else: + self.type_list_item_list = _TypeListItemList(self.reader, 0, 0) + self._type_lists_by_offset = { + type_list.offset: type_list + for type_list in self.type_list_item_list + } + + def GetString(self, string_item_idx): + string_item = self.string_item_list[string_item_idx] + return string_item.data + + def GetTypeString(self, type_item_idx): + type_item = self.type_item_list[type_item_idx] + return self.GetString(type_item.descriptor_idx) + + def GetTypeListStringsByOffset(self, offset): + if not offset: + return () + type_list = self._type_lists_by_offset[offset] + return tuple(self.GetTypeString(item.type_idx) for item in type_list) + + @staticmethod + def ResolveClassAccessFlags(access_flags): + return tuple(flag_string + for flag, flag_string in DexFile._CLASS_ACCESS_FLAGS.items() + if flag & access_flags) + + def IterMethodSignatureParts(self): + """Yields the string components of dex methods in a dex file. + + Yields: + Tuples that look like: + (class name, return type, method name, (parameter type, ...)). + """ + for method_item in self.method_item_list: + class_name_string = self.GetTypeString(method_item.type_idx) + method_name_string = self.GetString(method_item.name_idx) + proto_item = self.proto_item_list[method_item.proto_idx] + return_type_string = self.GetTypeString(proto_item.return_type_idx) + parameter_types = self.GetTypeListStringsByOffset( + proto_item.parameters_off) + yield (class_name_string, return_type_string, method_name_string, + parameter_types) + + def __repr__(self): + items = [ + self.header, + self.map_list, + self.type_item_list, + self.proto_item_list, + self.method_item_list, + self.string_item_list, + self.type_list_item_list, + self.class_def_item_list, + ] + return '\n'.join(str(item) for item in items) + + +class _DumpCommand(object): + + def __init__(self, dexfile): + self._dexfile = dexfile + + def Run(self): + raise NotImplementedError() + + +class _DumpMethods(_DumpCommand): + + def Run(self): + for parts in self._dexfile.IterMethodSignatureParts(): + class_type, return_type, method_name, parameter_types = parts + print('{} {} (return type={}, parameters={})'.format( + class_type, method_name, return_type, parameter_types)) + + +class _DumpStrings(_DumpCommand): + + def Run(self): + for string_item in self._dexfile.string_item_list: + # Some strings are likely to be non-ascii (vs. methods/classes). + print(string_item.data.encode('utf-8')) + + +class _DumpClasses(_DumpCommand): + + def Run(self): + for class_item in self._dexfile.class_def_item_list: + class_string = self._dexfile.GetTypeString(class_item.class_idx) + superclass_string = self._dexfile.GetTypeString(class_item.superclass_idx) + interfaces = self._dexfile.GetTypeListStringsByOffset( + class_item.interfaces_off) + access_flags = DexFile.ResolveClassAccessFlags(class_item.access_flags) + print('{} (superclass={}, interfaces={}, access_flags={})'.format( + class_string, superclass_string, interfaces, access_flags)) + + +class _DumpSummary(_DumpCommand): + + def Run(self): + print(self._dexfile) + + +def _DumpDexItems(dexfile_data, name, item): + dexfile = DexFile(bytearray(dexfile_data)) + print('dex_parser: Dumping {} for {}'.format(item, name)) + cmds = { + 'summary': _DumpSummary, + 'methods': _DumpMethods, + 'strings': _DumpStrings, + 'classes': _DumpClasses, + } + try: + cmds[item](dexfile).Run() + except IOError as e: + if e.errno == errno.EPIPE: + # Assume we're piping to "less", do nothing. + pass + + +def main(): + parser = argparse.ArgumentParser(description='Dump dex contents to stdout.') + parser.add_argument( + 'input', help='Input (.dex, .jar, .zip, .aab, .apk) file path.') + parser.add_argument( + 'item', + choices=('methods', 'strings', 'classes', 'summary'), + help='Item to dump', + nargs='?', + default='summary') + args = parser.parse_args() + + if os.path.splitext(args.input)[1] in ('.apk', '.jar', '.zip', '.aab'): + with zipfile.ZipFile(args.input) as z: + dex_file_paths = [ + f for f in z.namelist() if re.match(r'.*classes[0-9]*\.dex$', f) + ] + if not dex_file_paths: + print('Error: {} does not contain any classes.dex files'.format( + args.input)) + sys.exit(1) + + for path in dex_file_paths: + _DumpDexItems(z.read(path), path, args.item) + + else: + with open(args.input) as f: + _DumpDexItems(f.read(), args.input, args.item) + + +if __name__ == '__main__': + main() |