Coverage for src/debputy/interpreter.py: 96%

82 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-04-07 12:14 +0200

1import dataclasses 

2import os.path 

3import re 

4import shutil 

5from typing import Optional, IO, TYPE_CHECKING 

6 

7if TYPE_CHECKING: 

8 from debputy.plugin.api import VirtualPath 

9 

10_SHEBANG_RE = re.compile( 

11 rb""" 

12 ^[#][!]\s* 

13 (/\S+/([a-zA-Z][^/\s]*)) 

14""", 

15 re.VERBOSE | re.ASCII, 

16) 

17_WORD = re.compile(rb"\s+(\S+)") 

18_STRIP_VERSION = re.compile(r"(-?\d+(?:[.]\d.+)?)$") 

19 

20_KNOWN_INTERPRETERS = { 

21 os.path.basename(c): c 

22 for c in ["/bin/sh", "/bin/bash", "/bin/dash", "/usr/bin/perl", "/usr/bin/python"] 

23} 

24 

25 

26class Interpreter: 

27 @property 

28 def original_command(self) -> str: 

29 """The original command (without arguments) from the #! line 

30 

31 This returns the command as it was written (without flags/arguments) in the file. 

32 

33 Note as a special-case, if the original command is `env` then the first argument is included 

34 as well, because it is assumed to be the real command. 

35 

36 

37 >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter 

38 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b") 

39 >>> python3.original_command 

40 '/usr/bin/python3' 

41 >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh") 

42 >>> env_sh.original_command 

43 '/usr/bin/env sh' 

44 

45 :return: The original command in the #!-line 

46 """ 

47 raise NotImplementedError 

48 

49 @property 

50 def command_full_basename(self) -> str: 

51 """The full basename of the command (with version) 

52 

53 Note that for #!-lines that uses `env`, this will return the argument for `env` rather than 

54 `env`. 

55 

56 >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter 

57 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b") 

58 >>> python3.command_full_basename 

59 'python3' 

60 >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh") 

61 >>> env_sh.command_full_basename 

62 'sh' 

63 

64 :return: The full basename of the command. 

65 """ 

66 raise NotImplementedError 

67 

68 @property 

69 def command_stem(self) -> str: 

70 """The basename of the command **without** version 

71 

72 Note that for #!-lines that uses `env`, this will return the argument for `env` rather than 

73 `env`. 

74 

75 >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter 

76 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b") 

77 >>> python3.command_stem 

78 'python' 

79 >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh") 

80 >>> env_sh.command_stem 

81 'sh' 

82 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3.12-dbg -b") 

83 >>> python3.command_stem 

84 'python' 

85 

86 :return: The basename of the command **without** version. 

87 """ 

88 raise NotImplementedError 

89 

90 @property 

91 def interpreter_version(self) -> str: 

92 """The version part of the basename 

93 

94 Note that for #!-lines that uses `env`, this will return the argument for `env` rather than 

95 `env`. 

96 

97 >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter 

98 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b") 

99 >>> python3.interpreter_version 

100 '3' 

101 >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh") 

102 >>> env_sh.interpreter_version 

103 '' 

104 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3.12-dbg -b") 

105 >>> python3.interpreter_version 

106 '3.12-dbg' 

107 

108 :return: The version part of the command or the empty string if the command is versionless. 

109 """ 

110 raise NotImplementedError 

111 

112 @property 

113 def fixup_needed(self) -> bool: 

114 """Whether the interpreter uses a non-canonical location 

115 

116 >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter 

117 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b") 

118 >>> python3.fixup_needed 

119 False 

120 >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh") 

121 >>> env_sh.fixup_needed 

122 True 

123 >>> ub_sh = extract_shebang_interpreter(b"#! /usr/bin/sh") 

124 >>> ub_sh.fixup_needed 

125 True 

126 >>> sh = extract_shebang_interpreter(b"#! /bin/sh") 

127 >>> sh.fixup_needed 

128 False 

129 

130 :return: True if this interpreter is uses a non-canonical version. 

131 """ 

132 return False 

133 

134 

135@dataclasses.dataclass(slots=True, frozen=True) 

136class DetectedInterpreter(Interpreter): 

137 original_command: str 

138 command_full_basename: str 

139 command_stem: str 

140 interpreter_version: str 

141 correct_command: Optional[str] = None 

142 corrected_shebang_line: Optional[str] = None 

143 

144 @property 

145 def fixup_needed(self) -> bool: 

146 return self.corrected_shebang_line is not None 

147 

148 def replace_shebang_line(self, path: "VirtualPath") -> None: 

149 new_shebang_line = self.corrected_shebang_line 

150 assert new_shebang_line.startswith("#!") 

151 if not new_shebang_line.endswith("\n"): 151 ↛ 153line 151 didn't jump to line 153, because the condition on line 151 was never false

152 new_shebang_line += "\n" 

153 parent_dir = path.parent_dir 

154 assert parent_dir is not None 

155 with path.open(byte_io=True) as rfd: 

156 original_first_line = rfd.readline() 

157 if not original_first_line.startswith(b"#!"): 157 ↛ 158line 157 didn't jump to line 158, because the condition on line 157 was never true

158 raise ValueError( 

159 f'The provided path "{path.path}" does not start with a shebang line!?' 

160 ) 

161 mtime = path.mtime 

162 with path.replace_fs_path_content() as new_fs_path, open( 

163 new_fs_path, "wb" 

164 ) as wfd: 

165 wfd.write(new_shebang_line.encode("utf-8")) 

166 shutil.copyfileobj(rfd, wfd) 

167 # Ensure the mtime is not updated (we do not count interpreter correction as a "change") 

168 path.mtime = mtime 

169 

170 

171def extract_shebang_interpreter_from_file( 

172 fd: IO[bytes], 

173) -> Optional[DetectedInterpreter]: 

174 first_line = fd.readline(4096) 

175 if b"\n" not in first_line: 

176 # If there is no newline, then it is probably not a shebang line 

177 return None 

178 return extract_shebang_interpreter(first_line) 

179 

180 

181def extract_shebang_interpreter(first_line: bytes) -> Optional[DetectedInterpreter]: 

182 m = _SHEBANG_RE.search(first_line) 

183 if not m: 

184 return None 

185 raw_command = m.group(1).strip().decode("utf-8") 

186 command_full_basename = m.group(2).strip().decode("utf-8") 

187 endpos = m.end() 

188 if command_full_basename == "env": 

189 wm = _WORD.search(first_line, pos=m.end()) 

190 if wm is not None: 190 ↛ 194line 190 didn't jump to line 194, because the condition on line 190 was never false

191 command_full_basename = wm.group(1).decode("utf-8") 

192 raw_command += " " + command_full_basename 

193 endpos = wm.end() 

194 command_stem = command_full_basename 

195 vm = _STRIP_VERSION.search(command_full_basename) 

196 if vm: 

197 version = vm.group(1) 

198 command_stem = command_full_basename[: -len(version)] 

199 else: 

200 version = "" 

201 correct_command = _KNOWN_INTERPRETERS.get(command_stem) 

202 if correct_command is not None and version != "": 

203 correct_command += version 

204 

205 if correct_command is not None and correct_command != raw_command: 

206 trailing = first_line[endpos + 1 :].strip().decode("utf-8") 

207 corrected_shebang_line = "#! " + correct_command 

208 if trailing: 

209 corrected_shebang_line += " " + trailing 

210 else: 

211 corrected_shebang_line = None 

212 

213 return DetectedInterpreter( 

214 raw_command, 

215 command_full_basename, 

216 command_stem, 

217 version, 

218 correct_command, 

219 corrected_shebang_line, 

220 )