summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/build/android/pylib/utils/dexdump.py
blob: f81ac603d432cd55edc9b07115ffdf214a19a399 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import os
import re
import shutil
import sys
import tempfile
from xml.etree import ElementTree

from devil.utils import cmd_helper
from pylib import constants

sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'gyp'))
from util import build_utils

DEXDUMP_PATH = os.path.join(constants.ANDROID_SDK_TOOLS, 'dexdump')


def Dump(apk_path):
  """Dumps class and method information from a APK into a dict via dexdump.

  Args:
    apk_path: An absolute path to an APK file to dump.
  Returns:
    A dict in the following format:
      {
        <package_name>: {
          'classes': {
            <class_name>: {
              'methods': [<method_1>, <method_2>]
            }
          }
        }
      }
  """
  try:
    dexfile_dir = tempfile.mkdtemp()
    parsed_dex_files = []
    for dex_file in build_utils.ExtractAll(apk_path,
                                           dexfile_dir,
                                           pattern='*classes*.dex'):
      output_xml = cmd_helper.GetCmdOutput(
          [DEXDUMP_PATH, '-l', 'xml', dex_file])
      # Dexdump doesn't escape its XML output very well; decode it as utf-8 with
      # invalid sequences replaced, then remove forbidden characters and
      # re-encode it (as etree expects a byte string as input so it can figure
      # out the encoding itself from the XML declaration)
      BAD_XML_CHARS = re.compile(
          u'[\x00-\x08\x0b-\x0c\x0e-\x1f\x7f-\x84\x86-\x9f' +
          u'\ud800-\udfff\ufdd0-\ufddf\ufffe-\uffff]')
      if sys.version_info[0] < 3:
        decoded_xml = output_xml.decode('utf-8', 'replace')
        clean_xml = BAD_XML_CHARS.sub(u'\ufffd', decoded_xml)
      else:
        # Line duplicated to avoid pylint redefined-variable-type error.
        clean_xml = BAD_XML_CHARS.sub(u'\ufffd', output_xml)
      parsed_dex_files.append(
          _ParseRootNode(ElementTree.fromstring(clean_xml.encode('utf-8'))))
    return parsed_dex_files
  finally:
    shutil.rmtree(dexfile_dir)


def _ParseRootNode(root):
  """Parses the XML output of dexdump. This output is in the following format.

  This is a subset of the information contained within dexdump output.

  <api>
    <package name="foo.bar">
      <class name="Class" extends="foo.bar.SuperClass">
        <field name="Field">
        </field>
        <constructor name="Method">
          <parameter name="Param" type="int">
          </parameter>
        </constructor>
        <method name="Method">
          <parameter name="Param" type="int">
          </parameter>
        </method>
      </class>
    </package>
  </api>
  """
  results = {}
  for child in root:
    if child.tag == 'package':
      package_name = child.attrib['name']
      parsed_node = _ParsePackageNode(child)
      if package_name in results:
        results[package_name]['classes'].update(parsed_node['classes'])
      else:
        results[package_name] = parsed_node
  return results


def _ParsePackageNode(package_node):
  """Parses a <package> node from the dexdump xml output.

  Returns:
    A dict in the format:
      {
        'classes': {
          <class_1>: {
            'methods': [<method_1>, <method_2>]
          },
          <class_2>: {
            'methods': [<method_1>, <method_2>]
          },
        }
      }
  """
  classes = {}
  for child in package_node:
    if child.tag == 'class':
      classes[child.attrib['name']] = _ParseClassNode(child)
  return {'classes': classes}


def _ParseClassNode(class_node):
  """Parses a <class> node from the dexdump xml output.

  Returns:
    A dict in the format:
      {
        'methods': [<method_1>, <method_2>]
      }
  """
  methods = []
  for child in class_node:
    if child.tag == 'method':
      methods.append(child.attrib['name'])
  return {'methods': methods, 'superclass': class_node.attrib['extends']}