Coverage for src/debputy/elf_util.py: 76%

100 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-04-07 12:14 +0200

1import io 

2import os 

3import struct 

4from typing import List, Optional, Callable, Tuple, Iterable 

5 

6from debputy.filesystem_scan import FSPath 

7from debputy.plugin.api import VirtualPath 

8 

9ELF_HEADER_SIZE32 = 136 

10ELF_HEADER_SIZE64 = 232 

11ELF_MAGIC = b"\x7fELF" 

12ELF_VERSION = 0x00000001 

13ELF_ENDIAN_LE = 0x01 

14ELF_ENDIAN_BE = 0x02 

15ELF_TYPE_EXECUTABLE = 0x0002 

16ELF_TYPE_SHARED_OBJECT = 0x0003 

17 

18ELF_LINKING_TYPE_ANY = None 

19ELF_LINKING_TYPE_DYNAMIC = True 

20ELF_LINKING_TYPE_STATIC = False 

21 

22ELF_EI_ELFCLASS32 = 1 

23ELF_EI_ELFCLASS64 = 2 

24 

25ELF_PT_DYNAMIC = 2 

26 

27ELF_EI_NIDENT = 0x10 

28 

29# ELF header format: 

30# typedef struct { 

31# unsigned char e_ident[EI_NIDENT]; # <-- 16 / 0x10 bytes 

32# uint16_t e_type; 

33# uint16_t e_machine; 

34# uint32_t e_version; 

35# ElfN_Addr e_entry; 

36# ElfN_Off e_phoff; 

37# ElfN_Off e_shoff; 

38# uint32_t e_flags; 

39# uint16_t e_ehsize; 

40# uint16_t e_phentsize; 

41# uint16_t e_phnum; 

42# uint16_t e_shentsize; 

43# uint16_t e_shnum; 

44# uint16_t e_shstrndx; 

45# } ElfN_Ehdr; 

46 

47 

48class IncompleteFileError(RuntimeError): 

49 pass 

50 

51 

52def is_so_or_exec_elf_file( 

53 path: VirtualPath, 

54 *, 

55 assert_linking_type: Optional[bool] = ELF_LINKING_TYPE_ANY, 

56) -> bool: 

57 is_elf, linking_type = _read_elf_file( 

58 path, 

59 determine_linking_type=assert_linking_type is not None, 

60 ) 

61 return is_elf and ( 

62 assert_linking_type is ELF_LINKING_TYPE_ANY 

63 or assert_linking_type == linking_type 

64 ) 

65 

66 

67def _read_elf_file( 

68 path: VirtualPath, 

69 *, 

70 determine_linking_type: bool = False, 

71) -> Tuple[bool, Optional[bool]]: 

72 buffer_size = 4096 

73 fd_buffer = bytearray(buffer_size) 

74 linking_type = None 

75 fd: io.BufferedReader 

76 with path.open(byte_io=True, buffering=io.DEFAULT_BUFFER_SIZE) as fd: 

77 len_elf_header_raw = fd.readinto(fd_buffer) 

78 if ( 

79 not fd_buffer 

80 or len_elf_header_raw < ELF_HEADER_SIZE32 

81 or not fd_buffer.startswith(ELF_MAGIC) 

82 ): 

83 return False, None 

84 

85 elf_ei_class = fd_buffer[4] 

86 endian_raw = fd_buffer[5] 

87 if endian_raw == ELF_ENDIAN_LE: 87 ↛ 89line 87 didn't jump to line 89, because the condition on line 87 was never false

88 endian = "<" 

89 elif endian_raw == ELF_ENDIAN_BE: 

90 endian = ">" 

91 else: 

92 return False, None 

93 

94 if elf_ei_class == ELF_EI_ELFCLASS64: 94 ↛ 100line 94 didn't jump to line 100, because the condition on line 94 was never false

95 offset_size = "Q" 

96 # We know it needs to be a 64bit ELF, then the header must be 

97 # large enough for that. 

98 if len_elf_header_raw < ELF_HEADER_SIZE64: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true

99 return False, None 

100 elif elf_ei_class == ELF_EI_ELFCLASS32: 

101 offset_size = "L" 

102 else: 

103 return False, None 

104 

105 elf_type, _elf_machine, elf_version = struct.unpack_from( 

106 f"{endian}HHL", fd_buffer, offset=ELF_EI_NIDENT 

107 ) 

108 if elf_version != ELF_VERSION: 108 ↛ 109line 108 didn't jump to line 109, because the condition on line 108 was never true

109 return False, None 

110 if elf_type not in (ELF_TYPE_EXECUTABLE, ELF_TYPE_SHARED_OBJECT): 110 ↛ 111line 110 didn't jump to line 111, because the condition on line 110 was never true

111 return False, None 

112 

113 if determine_linking_type: 113 ↛ 76line 113 didn't jump to line 76

114 linking_type = _determine_elf_linking_type( 

115 fd, fd_buffer, endian, offset_size 

116 ) 

117 if linking_type is None: 117 ↛ 118line 117 didn't jump to line 118, because the condition on line 117 was never true

118 return False, None 

119 

120 return True, linking_type 

121 

122 

123def _determine_elf_linking_type(fd, fd_buffer, endian, offset_size) -> Optional[bool]: 

124 # To check the linking, we look for a DYNAMICALLY program header 

125 # In other words, we assume static linking by default. 

126 

127 linking_type = ELF_LINKING_TYPE_STATIC 

128 # To do that, we need to read a bit more of the ELF header to 

129 # locate the Program header table. 

130 # 

131 # Reading - in order at offset 0x18: 

132 # * e_entry (ignored) 

133 # * e_phoff 

134 # * e_shoff (ignored) 

135 # * e_flags (ignored) 

136 # * e_ehsize (ignored) 

137 # * e_phentsize 

138 # * e_phnum 

139 _, e_phoff, _, _, _, e_phentsize, e_phnum = struct.unpack_from( 

140 f"{endian}{offset_size}{offset_size}{offset_size}LHHH", 

141 fd_buffer, 

142 offset=ELF_EI_NIDENT + 8, 

143 ) 

144 

145 # man 5 elf suggests that Program headers can be absent. If so, 

146 # e_phnum will be zero - but we assume the same for e_phentsize. 

147 if e_phnum == 0: 147 ↛ 148line 147 didn't jump to line 148, because the condition on line 147 was never true

148 return linking_type 

149 

150 # Program headers must be at least 4 bytes for this code to do 

151 # anything sanely. In practise, it must be larger than that 

152 # as well. Accordingly, at best this is a corrupted ELF file. 

153 if e_phentsize < 4: 153 ↛ 154line 153 didn't jump to line 154, because the condition on line 153 was never true

154 return None 

155 

156 fd.seek(e_phoff, os.SEEK_SET) 

157 unpack_format = f"{endian}L" 

158 try: 

159 for program_header_raw in _read_bytes_iteratively(fd, e_phentsize, e_phnum): 159 ↛ 167line 159 didn't jump to line 167, because the loop on line 159 didn't complete

160 p_type = struct.unpack_from(unpack_format, program_header_raw)[0] 

161 if p_type == ELF_PT_DYNAMIC: 

162 linking_type = ELF_LINKING_TYPE_DYNAMIC 

163 break 

164 except IncompleteFileError: 

165 return None 

166 

167 return linking_type 

168 

169 

170def _read_bytes_iteratively( 

171 fd: io.BufferedReader, 

172 object_size: int, 

173 object_count: int, 

174) -> Iterable[bytes]: 

175 total_size = object_size * object_count 

176 bytes_remaining = total_size 

177 # FIXME: improve this to read larger chunks and yield them one-by-one 

178 byte_buffer = bytearray(object_size) 

179 

180 while bytes_remaining > 0: 180 ↛ 187line 180 didn't jump to line 187, because the condition on line 180 was never false

181 n = fd.readinto(byte_buffer) 

182 if n != object_size: 182 ↛ 183line 182 didn't jump to line 183, because the condition on line 182 was never true

183 break 

184 bytes_remaining -= n 

185 yield byte_buffer 

186 

187 if bytes_remaining: 

188 raise IncompleteFileError() 

189 

190 

191def find_all_elf_files( 

192 fs_root: VirtualPath, 

193 *, 

194 walk_filter: Optional[Callable[[VirtualPath, List[VirtualPath]], bool]] = None, 

195 with_linking_type: Optional[bool] = ELF_LINKING_TYPE_ANY, 

196) -> List[VirtualPath]: 

197 matches: List[VirtualPath] = [] 

198 # FIXME: Implementation detail that fs_root is always `FSPath` and has `.walk()` 

199 assert isinstance(fs_root, FSPath) 

200 for path, children in fs_root.walk(): 

201 if walk_filter is not None and not walk_filter(path, children): 

202 continue 

203 if not path.is_file or path.size < ELF_HEADER_SIZE32: 

204 continue 

205 if not is_so_or_exec_elf_file(path, assert_linking_type=with_linking_type): 

206 continue 

207 matches.append(path) 

208 return matches