Coverage for src/debputy/elf_util.py: 76%
100 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 12:14 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 12:14 +0200
1import io
2import os
3import struct
4from typing import List, Optional, Callable, Tuple, Iterable
6from debputy.filesystem_scan import FSPath
7from debputy.plugin.api import VirtualPath
9ELF_HEADER_SIZE32 = 136
10ELF_HEADER_SIZE64 = 232
11ELF_MAGIC = b"\x7fELF"
12ELF_VERSION = 0x00000001
13ELF_ENDIAN_LE = 0x01
14ELF_ENDIAN_BE = 0x02
15ELF_TYPE_EXECUTABLE = 0x0002
16ELF_TYPE_SHARED_OBJECT = 0x0003
18ELF_LINKING_TYPE_ANY = None
19ELF_LINKING_TYPE_DYNAMIC = True
20ELF_LINKING_TYPE_STATIC = False
22ELF_EI_ELFCLASS32 = 1
23ELF_EI_ELFCLASS64 = 2
25ELF_PT_DYNAMIC = 2
27ELF_EI_NIDENT = 0x10
29# ELF header format:
30# typedef struct {
31# unsigned char e_ident[EI_NIDENT]; # <-- 16 / 0x10 bytes
32# uint16_t e_type;
33# uint16_t e_machine;
34# uint32_t e_version;
35# ElfN_Addr e_entry;
36# ElfN_Off e_phoff;
37# ElfN_Off e_shoff;
38# uint32_t e_flags;
39# uint16_t e_ehsize;
40# uint16_t e_phentsize;
41# uint16_t e_phnum;
42# uint16_t e_shentsize;
43# uint16_t e_shnum;
44# uint16_t e_shstrndx;
45# } ElfN_Ehdr;
48class IncompleteFileError(RuntimeError):
49 pass
52def is_so_or_exec_elf_file(
53 path: VirtualPath,
54 *,
55 assert_linking_type: Optional[bool] = ELF_LINKING_TYPE_ANY,
56) -> bool:
57 is_elf, linking_type = _read_elf_file(
58 path,
59 determine_linking_type=assert_linking_type is not None,
60 )
61 return is_elf and (
62 assert_linking_type is ELF_LINKING_TYPE_ANY
63 or assert_linking_type == linking_type
64 )
67def _read_elf_file(
68 path: VirtualPath,
69 *,
70 determine_linking_type: bool = False,
71) -> Tuple[bool, Optional[bool]]:
72 buffer_size = 4096
73 fd_buffer = bytearray(buffer_size)
74 linking_type = None
75 fd: io.BufferedReader
76 with path.open(byte_io=True, buffering=io.DEFAULT_BUFFER_SIZE) as fd:
77 len_elf_header_raw = fd.readinto(fd_buffer)
78 if (
79 not fd_buffer
80 or len_elf_header_raw < ELF_HEADER_SIZE32
81 or not fd_buffer.startswith(ELF_MAGIC)
82 ):
83 return False, None
85 elf_ei_class = fd_buffer[4]
86 endian_raw = fd_buffer[5]
87 if endian_raw == ELF_ENDIAN_LE: 87 ↛ 89line 87 didn't jump to line 89, because the condition on line 87 was never false
88 endian = "<"
89 elif endian_raw == ELF_ENDIAN_BE:
90 endian = ">"
91 else:
92 return False, None
94 if elf_ei_class == ELF_EI_ELFCLASS64: 94 ↛ 100line 94 didn't jump to line 100, because the condition on line 94 was never false
95 offset_size = "Q"
96 # We know it needs to be a 64bit ELF, then the header must be
97 # large enough for that.
98 if len_elf_header_raw < ELF_HEADER_SIZE64: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true
99 return False, None
100 elif elf_ei_class == ELF_EI_ELFCLASS32:
101 offset_size = "L"
102 else:
103 return False, None
105 elf_type, _elf_machine, elf_version = struct.unpack_from(
106 f"{endian}HHL", fd_buffer, offset=ELF_EI_NIDENT
107 )
108 if elf_version != ELF_VERSION: 108 ↛ 109line 108 didn't jump to line 109, because the condition on line 108 was never true
109 return False, None
110 if elf_type not in (ELF_TYPE_EXECUTABLE, ELF_TYPE_SHARED_OBJECT): 110 ↛ 111line 110 didn't jump to line 111, because the condition on line 110 was never true
111 return False, None
113 if determine_linking_type: 113 ↛ 76line 113 didn't jump to line 76
114 linking_type = _determine_elf_linking_type(
115 fd, fd_buffer, endian, offset_size
116 )
117 if linking_type is None: 117 ↛ 118line 117 didn't jump to line 118, because the condition on line 117 was never true
118 return False, None
120 return True, linking_type
123def _determine_elf_linking_type(fd, fd_buffer, endian, offset_size) -> Optional[bool]:
124 # To check the linking, we look for a DYNAMICALLY program header
125 # In other words, we assume static linking by default.
127 linking_type = ELF_LINKING_TYPE_STATIC
128 # To do that, we need to read a bit more of the ELF header to
129 # locate the Program header table.
130 #
131 # Reading - in order at offset 0x18:
132 # * e_entry (ignored)
133 # * e_phoff
134 # * e_shoff (ignored)
135 # * e_flags (ignored)
136 # * e_ehsize (ignored)
137 # * e_phentsize
138 # * e_phnum
139 _, e_phoff, _, _, _, e_phentsize, e_phnum = struct.unpack_from(
140 f"{endian}{offset_size}{offset_size}{offset_size}LHHH",
141 fd_buffer,
142 offset=ELF_EI_NIDENT + 8,
143 )
145 # man 5 elf suggests that Program headers can be absent. If so,
146 # e_phnum will be zero - but we assume the same for e_phentsize.
147 if e_phnum == 0: 147 ↛ 148line 147 didn't jump to line 148, because the condition on line 147 was never true
148 return linking_type
150 # Program headers must be at least 4 bytes for this code to do
151 # anything sanely. In practise, it must be larger than that
152 # as well. Accordingly, at best this is a corrupted ELF file.
153 if e_phentsize < 4: 153 ↛ 154line 153 didn't jump to line 154, because the condition on line 153 was never true
154 return None
156 fd.seek(e_phoff, os.SEEK_SET)
157 unpack_format = f"{endian}L"
158 try:
159 for program_header_raw in _read_bytes_iteratively(fd, e_phentsize, e_phnum): 159 ↛ 167line 159 didn't jump to line 167, because the loop on line 159 didn't complete
160 p_type = struct.unpack_from(unpack_format, program_header_raw)[0]
161 if p_type == ELF_PT_DYNAMIC:
162 linking_type = ELF_LINKING_TYPE_DYNAMIC
163 break
164 except IncompleteFileError:
165 return None
167 return linking_type
170def _read_bytes_iteratively(
171 fd: io.BufferedReader,
172 object_size: int,
173 object_count: int,
174) -> Iterable[bytes]:
175 total_size = object_size * object_count
176 bytes_remaining = total_size
177 # FIXME: improve this to read larger chunks and yield them one-by-one
178 byte_buffer = bytearray(object_size)
180 while bytes_remaining > 0: 180 ↛ 187line 180 didn't jump to line 187, because the condition on line 180 was never false
181 n = fd.readinto(byte_buffer)
182 if n != object_size: 182 ↛ 183line 182 didn't jump to line 183, because the condition on line 182 was never true
183 break
184 bytes_remaining -= n
185 yield byte_buffer
187 if bytes_remaining:
188 raise IncompleteFileError()
191def find_all_elf_files(
192 fs_root: VirtualPath,
193 *,
194 walk_filter: Optional[Callable[[VirtualPath, List[VirtualPath]], bool]] = None,
195 with_linking_type: Optional[bool] = ELF_LINKING_TYPE_ANY,
196) -> List[VirtualPath]:
197 matches: List[VirtualPath] = []
198 # FIXME: Implementation detail that fs_root is always `FSPath` and has `.walk()`
199 assert isinstance(fs_root, FSPath)
200 for path, children in fs_root.walk():
201 if walk_filter is not None and not walk_filter(path, children):
202 continue
203 if not path.is_file or path.size < ELF_HEADER_SIZE32:
204 continue
205 if not is_so_or_exec_elf_file(path, assert_linking_type=with_linking_type):
206 continue
207 matches.append(path)
208 return matches