Coverage for src/debputy/interpreter.py: 96%
82 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 12:14 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 12:14 +0200
1import dataclasses
2import os.path
3import re
4import shutil
5from typing import Optional, IO, TYPE_CHECKING
7if TYPE_CHECKING:
8 from debputy.plugin.api import VirtualPath
10_SHEBANG_RE = re.compile(
11 rb"""
12 ^[#][!]\s*
13 (/\S+/([a-zA-Z][^/\s]*))
14""",
15 re.VERBOSE | re.ASCII,
16)
17_WORD = re.compile(rb"\s+(\S+)")
18_STRIP_VERSION = re.compile(r"(-?\d+(?:[.]\d.+)?)$")
20_KNOWN_INTERPRETERS = {
21 os.path.basename(c): c
22 for c in ["/bin/sh", "/bin/bash", "/bin/dash", "/usr/bin/perl", "/usr/bin/python"]
23}
26class Interpreter:
27 @property
28 def original_command(self) -> str:
29 """The original command (without arguments) from the #! line
31 This returns the command as it was written (without flags/arguments) in the file.
33 Note as a special-case, if the original command is `env` then the first argument is included
34 as well, because it is assumed to be the real command.
37 >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter
38 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b")
39 >>> python3.original_command
40 '/usr/bin/python3'
41 >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh")
42 >>> env_sh.original_command
43 '/usr/bin/env sh'
45 :return: The original command in the #!-line
46 """
47 raise NotImplementedError
49 @property
50 def command_full_basename(self) -> str:
51 """The full basename of the command (with version)
53 Note that for #!-lines that uses `env`, this will return the argument for `env` rather than
54 `env`.
56 >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter
57 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b")
58 >>> python3.command_full_basename
59 'python3'
60 >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh")
61 >>> env_sh.command_full_basename
62 'sh'
64 :return: The full basename of the command.
65 """
66 raise NotImplementedError
68 @property
69 def command_stem(self) -> str:
70 """The basename of the command **without** version
72 Note that for #!-lines that uses `env`, this will return the argument for `env` rather than
73 `env`.
75 >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter
76 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b")
77 >>> python3.command_stem
78 'python'
79 >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh")
80 >>> env_sh.command_stem
81 'sh'
82 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3.12-dbg -b")
83 >>> python3.command_stem
84 'python'
86 :return: The basename of the command **without** version.
87 """
88 raise NotImplementedError
90 @property
91 def interpreter_version(self) -> str:
92 """The version part of the basename
94 Note that for #!-lines that uses `env`, this will return the argument for `env` rather than
95 `env`.
97 >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter
98 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b")
99 >>> python3.interpreter_version
100 '3'
101 >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh")
102 >>> env_sh.interpreter_version
103 ''
104 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3.12-dbg -b")
105 >>> python3.interpreter_version
106 '3.12-dbg'
108 :return: The version part of the command or the empty string if the command is versionless.
109 """
110 raise NotImplementedError
112 @property
113 def fixup_needed(self) -> bool:
114 """Whether the interpreter uses a non-canonical location
116 >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter
117 >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b")
118 >>> python3.fixup_needed
119 False
120 >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh")
121 >>> env_sh.fixup_needed
122 True
123 >>> ub_sh = extract_shebang_interpreter(b"#! /usr/bin/sh")
124 >>> ub_sh.fixup_needed
125 True
126 >>> sh = extract_shebang_interpreter(b"#! /bin/sh")
127 >>> sh.fixup_needed
128 False
130 :return: True if this interpreter is uses a non-canonical version.
131 """
132 return False
135@dataclasses.dataclass(slots=True, frozen=True)
136class DetectedInterpreter(Interpreter):
137 original_command: str
138 command_full_basename: str
139 command_stem: str
140 interpreter_version: str
141 correct_command: Optional[str] = None
142 corrected_shebang_line: Optional[str] = None
144 @property
145 def fixup_needed(self) -> bool:
146 return self.corrected_shebang_line is not None
148 def replace_shebang_line(self, path: "VirtualPath") -> None:
149 new_shebang_line = self.corrected_shebang_line
150 assert new_shebang_line.startswith("#!")
151 if not new_shebang_line.endswith("\n"): 151 ↛ 153line 151 didn't jump to line 153, because the condition on line 151 was never false
152 new_shebang_line += "\n"
153 parent_dir = path.parent_dir
154 assert parent_dir is not None
155 with path.open(byte_io=True) as rfd:
156 original_first_line = rfd.readline()
157 if not original_first_line.startswith(b"#!"): 157 ↛ 158line 157 didn't jump to line 158, because the condition on line 157 was never true
158 raise ValueError(
159 f'The provided path "{path.path}" does not start with a shebang line!?'
160 )
161 mtime = path.mtime
162 with path.replace_fs_path_content() as new_fs_path, open(
163 new_fs_path, "wb"
164 ) as wfd:
165 wfd.write(new_shebang_line.encode("utf-8"))
166 shutil.copyfileobj(rfd, wfd)
167 # Ensure the mtime is not updated (we do not count interpreter correction as a "change")
168 path.mtime = mtime
171def extract_shebang_interpreter_from_file(
172 fd: IO[bytes],
173) -> Optional[DetectedInterpreter]:
174 first_line = fd.readline(4096)
175 if b"\n" not in first_line:
176 # If there is no newline, then it is probably not a shebang line
177 return None
178 return extract_shebang_interpreter(first_line)
181def extract_shebang_interpreter(first_line: bytes) -> Optional[DetectedInterpreter]:
182 m = _SHEBANG_RE.search(first_line)
183 if not m:
184 return None
185 raw_command = m.group(1).strip().decode("utf-8")
186 command_full_basename = m.group(2).strip().decode("utf-8")
187 endpos = m.end()
188 if command_full_basename == "env":
189 wm = _WORD.search(first_line, pos=m.end())
190 if wm is not None: 190 ↛ 194line 190 didn't jump to line 194, because the condition on line 190 was never false
191 command_full_basename = wm.group(1).decode("utf-8")
192 raw_command += " " + command_full_basename
193 endpos = wm.end()
194 command_stem = command_full_basename
195 vm = _STRIP_VERSION.search(command_full_basename)
196 if vm:
197 version = vm.group(1)
198 command_stem = command_full_basename[: -len(version)]
199 else:
200 version = ""
201 correct_command = _KNOWN_INTERPRETERS.get(command_stem)
202 if correct_command is not None and version != "":
203 correct_command += version
205 if correct_command is not None and correct_command != raw_command:
206 trailing = first_line[endpos + 1 :].strip().decode("utf-8")
207 corrected_shebang_line = "#! " + correct_command
208 if trailing:
209 corrected_shebang_line += " " + trailing
210 else:
211 corrected_shebang_line = None
213 return DetectedInterpreter(
214 raw_command,
215 command_full_basename,
216 command_stem,
217 version,
218 correct_command,
219 corrected_shebang_line,
220 )