1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
|
import collections
import collections.abc
import logging
import sys
import textwrap
from abc import ABC
try:
from typing import (
Optional,
Union,
Iterable,
Callable,
TYPE_CHECKING,
Iterator,
Type,
cast,
List,
Generic,
)
from debian._util import T
from .types import TE, R, TokenOrElement
_combine_parts_ret_type = Callable[
[Iterable[Union[TokenOrElement, TE]]], Iterable[Union[TokenOrElement, R]]
]
except ImportError:
# pylint: disable=unnecessary-lambda-assignment
TYPE_CHECKING = False
cast = lambda t, v: v
if TYPE_CHECKING:
from .parsing import Deb822Element
from .tokens import Deb822Token
def print_ast(
ast_tree, # type: Union[Iterable[TokenOrElement], 'Deb822Element']
*,
end_marker_after=5, # type: Optional[int]
output_function=None # type: Optional[Callable[[str], None]]
):
# type: (...) -> None
"""Debugging aid, which can dump a Deb822Element or a list of tokens/elements
:param ast_tree: Either a Deb822Element or an iterable Deb822Token/Deb822Element entries
(both types may be mixed in the same iterable, which enable it to dump the
ast tree at different stages of parse_deb822_file method)
:param end_marker_after: The dump will add "end of element" markers if a
given element spans at least this many tokens/elements. Can be disabled
with by passing None as value. Use 0 for unconditionally marking all
elements (note that tokens never get an "end of element" marker as they
are not an elements).
:param output_function: Callable that receives a single str argument and is responsible
for "displaying" that line. The callable may be invoked multiple times (one per line
of output). Defaults to logging.info if omitted.
"""
# Avoid circular dependency
# pylint: disable=import-outside-toplevel
from debian._deb822_repro.parsing import Deb822Element
prefix = None
if isinstance(ast_tree, Deb822Element):
ast_tree = [ast_tree]
stack = [(0, "", iter(ast_tree))]
current_no = 0
if output_function is None:
output_function = logging.info
while stack:
start_no, name, current_iter = stack[-1]
for current in current_iter:
current_no += 1
if prefix is None:
prefix = " " * len(stack)
if isinstance(current, Deb822Element):
stack.append(
(current_no, current.__class__.__name__, iter(current.iter_parts()))
)
output_function(prefix + current.__class__.__name__)
prefix = None
break
output_function(prefix + str(current))
else:
# current_iter is depleted
stack.pop()
prefix = None
if (
end_marker_after is not None
and start_no + end_marker_after <= current_no
and name
):
if prefix is None:
prefix = " " * len(stack)
output_function(prefix + "# <-- END OF " + name)
def combine_into_replacement(
source_class, # type: Type[TE]
replacement_class, # type: Type[R]
*,
constructor=None # type: Optional[Callable[[List[TE]], R]]
):
# type: (...) -> _combine_parts_ret_type[TE, R]
"""Combines runs of one type into another type
This is primarily useful for transforming tokens (e.g, Comment tokens) into
the relevant element (such as the Comment element).
"""
if constructor is None:
_constructor = cast("Callable[[List[TE]], R]", replacement_class)
else:
# Force mypy to see that constructor is no longer optional
_constructor = constructor
def _impl(token_stream):
# type: (Iterable[Union[TokenOrElement, TE]]) -> Iterable[Union[TokenOrElement, R]]
tokens = []
for token in token_stream:
if isinstance(token, source_class):
tokens.append(token)
continue
if tokens:
yield _constructor(list(tokens))
tokens.clear()
yield token
if tokens:
yield _constructor(tokens)
return _impl
if sys.version_info >= (3, 9) or TYPE_CHECKING:
_bufferingIterator_Base = collections.abc.Iterator[T]
else:
# Python 3.5 - 3.8 compat - we are not allowed to subscript the abc.Iterator
# - use this little hack to work around it
class _bufferingIterator_Base(collections.abc.Iterator, Generic[T], ABC):
pass
class BufferingIterator(_bufferingIterator_Base[T], Generic[T]):
def __init__(self, stream):
# type: (Iterable[T]) -> None
self._stream = iter(stream) # type: Iterator[T]
self._buffer = collections.deque() # type: collections.deque[T]
self._expired = False # type: bool
def __next__(self):
# type: () -> T
if self._buffer:
return self._buffer.popleft()
if self._expired:
raise StopIteration
return next(self._stream)
def takewhile(self, predicate):
# type: (Callable[[T], bool]) -> Iterable[T]
"""Variant of itertools.takewhile except it does not discard the first non-matching token"""
buffer = self._buffer
while buffer or self._fill_buffer(5):
v = buffer[0]
if predicate(v):
buffer.popleft()
yield v
else:
break
def consume_many(self, count):
# type: (int) -> List[T]
self._fill_buffer(count)
buffer = self._buffer
if len(buffer) == count:
ret = list(buffer)
buffer.clear()
else:
ret = []
while buffer and count:
ret.append(buffer.popleft())
count -= 1
return ret
def peek_buffer(self):
# type: () -> List[T]
return list(self._buffer)
def peek_find(
self,
predicate, # type: Callable[[T], bool]
limit=None, # type: Optional[int]
):
# type: (...) -> Optional[int]
buffer = self._buffer
i = 0
while limit is None or i < limit:
if i >= len(buffer):
self._fill_buffer(i + 5)
if i >= len(buffer):
return None
v = buffer[i]
if predicate(v):
return i + 1
i += 1
return None
def _fill_buffer(self, number):
# type: (int) -> bool
if not self._expired:
while len(self._buffer) < number:
try:
self._buffer.append(next(self._stream))
except StopIteration:
self._expired = True
break
return bool(self._buffer)
def peek(self):
# type: () -> Optional[T]
return self.peek_at(1)
def peek_at(self, tokens_ahead):
# type: (int) -> Optional[T]
self._fill_buffer(tokens_ahead)
return (
self._buffer[tokens_ahead - 1]
if len(self._buffer) >= tokens_ahead
else None
)
def peek_many(self, number):
# type: (int) -> List[T]
self._fill_buffer(number)
buffer = self._buffer
if len(buffer) == number:
ret = list(buffer)
elif number:
ret = []
for t in buffer:
ret.append(t)
number -= 1
if not number:
break
else:
ret = []
return ret
def len_check_iterator(
content, # type: str
stream, # type: Iterable[TE]
content_len=None, # type: Optional[int]
):
# type: (...) -> Iterable[TE]
"""Flatten a parser's output into tokens and verify it covers the entire line/text"""
if content_len is None:
content_len = len(content)
# Fail-safe to ensure none of the value parsers incorrectly parse a value.
covered = 0
for token_or_element in stream:
# We use the AttributeError to discriminate between elements and tokens
# The cast()s are here to assist / workaround mypy not realizing that.
try:
tokens = cast("Deb822Element", token_or_element).iter_tokens()
except AttributeError:
token = cast("Deb822Token", token_or_element)
covered += len(token.text)
else:
for token in tokens:
covered += len(token.text)
yield token_or_element
if covered != content_len:
if covered < content_len:
msg = textwrap.dedent(
"""\
Value parser did not fully cover the entire line with tokens (
missing range {covered}..{content_len}). Occurred when parsing "{content}"
"""
).format(covered=covered, content_len=content_len, line=content)
raise ValueError(msg)
msg = textwrap.dedent(
"""\
Value parser emitted tokens for more text than was present? Should have
emitted {content_len} characters, got {covered}. Occurred when parsing
"{content}"
"""
).format(covered=covered, content_len=content_len, content=content)
raise ValueError(msg)
|