Coverage for src/debputy/lsp/vendoring/_deb822_repro/parsing.py: 59%
1464 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 12:14 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2024-04-07 12:14 +0200
1# -*- coding: utf-8 -*- vim: fileencoding=utf-8 :
3import collections.abc
4import contextlib
5import sys
6import textwrap
7import weakref
8from abc import ABC
9from types import TracebackType
10from weakref import ReferenceType
12from ._util import (
13 combine_into_replacement,
14 BufferingIterator,
15 len_check_iterator,
16)
17from .formatter import (
18 FormatterContentToken,
19 one_value_per_line_trailing_separator,
20 format_field,
21)
22from .locatable import Locatable, START_POSITION, Position, Range
23from .tokens import (
24 Deb822Token,
25 Deb822ValueToken,
26 Deb822SemanticallySignificantWhiteSpace,
27 Deb822SpaceSeparatorToken,
28 Deb822CommentToken,
29 Deb822WhitespaceToken,
30 Deb822ValueContinuationToken,
31 Deb822NewlineAfterValueToken,
32 Deb822CommaToken,
33 Deb822FieldNameToken,
34 Deb822FieldSeparatorToken,
35 Deb822ErrorToken,
36 tokenize_deb822_file,
37 comma_split_tokenizer,
38 whitespace_split_tokenizer,
39)
40from .types import AmbiguousDeb822FieldKeyError, SyntaxOrParseError
41from debian._util import (
42 resolve_ref,
43 LinkedList,
44 LinkedListNode,
45 OrderedSet,
46 _strI,
47 default_field_sort_key,
48)
50try:
51 from typing import (
52 Iterable,
53 Iterator,
54 List,
55 Union,
56 Dict,
57 Optional,
58 Callable,
59 Any,
60 Generic,
61 Type,
62 Tuple,
63 IO,
64 cast,
65 overload,
66 Mapping,
67 TYPE_CHECKING,
68 Sequence,
69 )
70 from debian._util import T
72 # for some reason, pylint does not see that Commentish is used in typing
73 from .types import ( # pylint: disable=unused-import
74 ST,
75 VE,
76 TE,
77 ParagraphKey,
78 TokenOrElement,
79 Commentish,
80 ParagraphKeyBase,
81 FormatterCallback,
82 )
84 if TYPE_CHECKING:
85 StreamingValueParser = Callable[
86 [Deb822Token, BufferingIterator[Deb822Token]], VE
87 ]
88 StrToValueParser = Callable[[str], Iterable[Union["Deb822Token", VE]]]
89 KVPNode = LinkedListNode["Deb822KeyValuePairElement"]
90 else:
91 StreamingValueParser = None
92 StrToValueParser = None
93 KVPNode = None
94except ImportError:
95 if not TYPE_CHECKING:
96 # pylint: disable=unnecessary-lambda-assignment
97 cast = lambda t, v: v
98 overload = lambda f: None
101class ValueReference(Generic[TE]):
102 """Reference to a value inside a Deb822 paragraph
104 This is useful for cases where want to modify values "in-place" or maybe
105 conditionally remove a value after looking at it.
107 ValueReferences can be invalidated by various changes or actions performed
108 to the underlying provider of the value reference. As an example, sorting
109 a list of values will generally invalidate all ValueReferences related to
110 that list.
112 The ValueReference will raise validity issues where it detects them but most
113 of the time it will not notice. As a means to this end, the ValueReference
114 will *not* keep a strong reference to the underlying value. This enables it
115 to detect when the container goes out of scope. However, keep in mind that
116 the timeliness of garbage collection is implementation defined (e.g., pypy
117 does not use ref-counting).
118 """
120 __slots__ = (
121 "_node",
122 "_render",
123 "_value_factory",
124 "_removal_handler",
125 "_mutation_notifier",
126 )
128 def __init__(
129 self,
130 node, # type: LinkedListNode[TE]
131 render, # type: Callable[[TE], str]
132 value_factory, # type: Callable[[str], TE]
133 removal_handler, # type: Callable[[LinkedListNode[TokenOrElement]], None]
134 mutation_notifier, # type: Optional[Callable[[], None]]
135 ):
136 self._node = weakref.ref(
137 node
138 ) # type: Optional[ReferenceType[LinkedListNode[TE]]]
139 self._render = render
140 self._value_factory = value_factory
141 self._removal_handler = removal_handler
142 self._mutation_notifier = mutation_notifier
144 def _resolve_node(self):
145 # type: () -> LinkedListNode[TE]
146 # NB: We check whether the "ref" itself is None (instead of the ref resolving to None)
147 # This enables us to tell the difference between "known removal" vs. "garbage collected"
148 if self._node is None: 148 ↛ 149line 148 didn't jump to line 149, because the condition on line 148 was never true
149 raise RuntimeError("Cannot use ValueReference after remove()")
150 node = self._node()
151 if node is None: 151 ↛ 152line 151 didn't jump to line 152, because the condition on line 151 was never true
152 raise RuntimeError("ValueReference is invalid (garbage collected)")
153 return node
155 @property
156 def value(self):
157 # type: () -> str
158 """Resolve the reference into a str"""
159 return self._render(self._resolve_node().value)
161 @value.setter
162 def value(self, new_value):
163 # type: (str) -> None
164 """Update the reference value
166 Updating the value via this method will *not* invalidate the reference (or other
167 references to the same container).
169 This can raise an exception if the new value does not follow the requirements
170 for the referenced values. As an example, values in whitespace separated
171 lists cannot contain spaces and would trigger an exception.
172 """
173 self._resolve_node().value = self._value_factory(new_value)
174 if self._mutation_notifier is not None:
175 self._mutation_notifier()
177 @property
178 def locatable(self):
179 # type: () -> Locatable
180 """Reference to a locatable that can be used to determine where this value is"""
181 return self._resolve_node().value
183 def remove(self):
184 # type: () -> None
185 """Remove the underlying value
187 This will invalidate the ValueReference (and any other ValueReferences pointing
188 to that exact value). The validity of other ValueReferences to that container
189 remains unaffected.
190 """
191 self._removal_handler(
192 cast("LinkedListNode[TokenOrElement]", self._resolve_node())
193 )
194 self._node = None
197if sys.version_info >= (3, 9) or TYPE_CHECKING: 197 ↛ 204line 197 didn't jump to line 204, because the condition on line 197 was never false
198 _Deb822ParsedTokenList_ContextManager = contextlib.AbstractContextManager[T]
199else:
200 # Python 3.5 - 3.8 compat - we are not allowed to subscript the abc.Iterator
201 # - use this little hack to work around it
202 # Note that Python 3.5 is so old that it does not have AbstractContextManager,
203 # so we re-implement it here.
204 class _Deb822ParsedTokenList_ContextManager(Generic[T]):
206 def __enter__(self):
207 return self
209 def __exit__(self, exc_type, exc_val, exc_tb):
210 return None
213class Deb822ParsedTokenList(
214 Generic[VE, ST],
215 _Deb822ParsedTokenList_ContextManager["Deb822ParsedTokenList[VE, ST]"],
216):
218 def __init__(
219 self,
220 kvpair_element, # type: 'Deb822KeyValuePairElement'
221 interpreted_value_element, # type: Deb822InterpretationProxyElement
222 vtype, # type: Type[VE]
223 stype, # type: Type[ST]
224 str2value_parser, # type: StrToValueParser[VE]
225 default_separator_factory, # type: Callable[[], ST]
226 render, # type: Callable[[VE], str]
227 ):
228 # type: (...) -> None
229 self._kvpair_element = kvpair_element
230 self._proxy_element = interpreted_value_element
231 self._token_list = LinkedList(interpreted_value_element.parts)
232 self._vtype = vtype
233 self._stype = stype
234 self._str2value_parser = str2value_parser
235 self._default_separator_factory = default_separator_factory
236 self._value_factory = _parser_to_value_factory(str2value_parser, vtype)
237 self._render = render
238 self._format_preserve_original_formatting = True
239 self._formatter = (
240 one_value_per_line_trailing_separator
241 ) # type: FormatterCallback
242 self._changed = False
243 self.__continuation_line_char = None # type: Optional[str]
244 assert self._token_list
245 last_token = self._token_list.tail
247 if last_token is not None and isinstance( 247 ↛ exitline 247 didn't return from function '__init__', because the condition on line 247 was never false
248 last_token, Deb822NewlineAfterValueToken
249 ):
250 # We always remove the last newline (if present), because then
251 # adding values will happen after the last value rather than on
252 # a new line by default.
253 #
254 # On write, we always ensure the value ends on a newline (even
255 # if it did not before). This is simpler and should be a
256 # non-issue in practise.
257 self._token_list.pop()
259 def __iter__(self):
260 # type: () -> Iterator[str]
261 yield from (self._render(v) for v in self.value_parts)
263 def __bool__(self):
264 # type: () -> bool
265 return next(iter(self), None) is not None
267 def __exit__(
268 self,
269 exc_type, # type: Optional[Type[BaseException]]
270 exc_val, # type: Optional[BaseException]
271 exc_tb, # type: Optional[TracebackType]
272 ):
273 # type: (...) -> Optional[bool]
274 if exc_type is None and self._changed: 274 ↛ 276line 274 didn't jump to line 276, because the condition on line 274 was never false
275 self._update_field()
276 return super().__exit__(exc_type, exc_val, exc_tb)
278 @property
279 def value_parts(self):
280 # type: () -> Iterator[VE]
281 yield from (v for v in self._token_list if isinstance(v, self._vtype))
283 def _mark_changed(self):
284 # type: () -> None
285 self._changed = True
287 def iter_value_references(self):
288 # type: () -> Iterator[ValueReference[VE]]
289 """Iterate over all values in the list (as ValueReferences)
291 This is useful for doing inplace modification of the values or even
292 streaming removal of field values. It is in general also more
293 efficient when more than one value is updated or removed.
294 """
295 yield from (
296 ValueReference(
297 cast("LinkedListNode[VE]", n),
298 self._render,
299 self._value_factory,
300 self._remove_node,
301 self._mark_changed,
302 )
303 for n in self._token_list.iter_nodes()
304 if isinstance(n.value, self._vtype)
305 )
307 def append_separator(self, space_after_separator=True):
308 # type: (bool) -> None
310 separator_token = self._default_separator_factory()
311 if separator_token.is_whitespace: 311 ↛ 314line 311 didn't jump to line 314, because the condition on line 311 was never false
312 space_after_separator = False
314 self._changed = True
315 self._append_continuation_line_token_if_necessary()
316 self._token_list.append(separator_token)
318 if space_after_separator and not separator_token.is_whitespace: 318 ↛ 319line 318 didn't jump to line 319, because the condition on line 318 was never true
319 self._token_list.append(Deb822WhitespaceToken(" "))
321 def replace(self, orig_value, new_value):
322 # type: (str, str) -> None
323 """Replace the first instance of a value with another
325 This method will *not* affect the validity of ValueReferences.
326 """
327 vtype = self._vtype
328 for node in self._token_list.iter_nodes(): 328 ↛ 334line 328 didn't jump to line 334, because the loop on line 328 didn't complete
329 if isinstance(node.value, vtype) and self._render(node.value) == orig_value:
330 node.value = self._value_factory(new_value)
331 self._changed = True
332 break
333 else:
334 raise ValueError("list.replace(x, y): x not in list")
336 def remove(self, value):
337 # type: (str) -> None
338 """Remove the first instance of a value
340 Removal will invalidate ValueReferences to the value being removed.
341 ValueReferences to other values will be unaffected.
342 """
343 vtype = self._vtype
344 for node in self._token_list.iter_nodes():
345 if isinstance(node.value, vtype) and self._render(node.value) == value:
346 node_to_remove = node
347 break
348 else:
349 raise ValueError("list.remove(x): x not in list")
351 return self._remove_node(node_to_remove)
353 def _remove_node(self, node_to_remove):
354 # type: (LinkedListNode[TokenOrElement]) -> None
355 vtype = self._vtype
356 self._changed = True
358 # We naively want to remove the node and every thing to the left of it
359 # until the previous value. That is the basic idea for now (ignoring
360 # special-cases for now).
361 #
362 # Example:
363 #
364 # """
365 # Multiline-Keywords: bar[
366 # # Comment about foo
367 # foo]
368 # baz
369 # Keywords: bar[ foo] baz
370 # Comma-List: bar[, foo], baz,
371 # Multiline-Comma-List: bar[,
372 # # Comment about foo
373 # foo],
374 # baz,
375 # """
376 #
377 # Assuming we want to remove "foo" for the lists, the []-markers
378 # show what we aim to remove. This has the nice side-effect of
379 # preserving whether nor not the value has a trailing separator.
380 # Note that we do *not* attempt to repair missing separators but
381 # it may fix duplicated separators by "accident".
382 #
383 # Now, there are two special cases to be aware of, where this approach
384 # has short comings:
385 #
386 # 1) If foo is the only value (in which case, "delete everything"
387 # is the only option).
388 # 2) If foo is the first value
389 # 3) If foo is not the only value on the line and we see a comment
390 # inside the deletion range.
391 #
392 # For 2) + 3), we attempt to flip and range to delete and every
393 # thing after it (up to but exclusion "baz") instead. This
394 # definitely fixes 3), but 2) has yet another corner case, namely:
395 #
396 # """
397 # Multiline-Comma-List: foo,
398 # # Remark about bar
399 # bar,
400 # Another-Case: foo
401 # # Remark, also we use leading separator
402 # , bar
403 # """
404 #
405 # The options include:
406 #
407 # A) Discard the comment - brain-dead simple
408 # B) Hoist the comment up to a field comment, but then what if the
409 # field already has a comment?
410 # C) Clear the first value line leaving just the newline and
411 # replace the separator before "bar" (if present) with a space.
412 # (leaving you with the value of the form "\n# ...\n bar")
413 #
415 first_value_on_lhs = None # type: Optional[LinkedListNode[TokenOrElement]]
416 first_value_on_rhs = None # type: Optional[LinkedListNode[TokenOrElement]]
417 comment_before_previous_value = False
418 comment_before_next_value = False
419 for past_node in node_to_remove.iter_previous(skip_current=True):
420 past_token = past_node.value
421 if isinstance(past_token, Deb822Token) and past_token.is_comment:
422 comment_before_previous_value = True
423 continue
424 if isinstance(past_token, vtype):
425 first_value_on_lhs = past_node
426 break
428 for future_node in node_to_remove.iter_next(skip_current=True):
429 future_token = future_node.value
430 if isinstance(future_token, Deb822Token) and future_token.is_comment:
431 comment_before_next_value = True
432 continue
433 if isinstance(future_token, vtype):
434 first_value_on_rhs = future_node
435 break
437 if first_value_on_rhs is None and first_value_on_lhs is None:
438 # This was the last value, just remove everything.
439 self._token_list.clear()
440 return
442 if first_value_on_lhs is not None and not comment_before_previous_value:
443 # Delete left
444 delete_lhs_of_node = True
445 elif first_value_on_rhs is not None and not comment_before_next_value:
446 # Delete right
447 delete_lhs_of_node = False
448 else:
449 # There is a comment on either side (or no value on one and a
450 # comment and the other). Keep it simple, we just delete to
451 # one side (preferring deleting to left if possible).
452 delete_lhs_of_node = first_value_on_lhs is not None
454 if delete_lhs_of_node:
455 first_remain_lhs = first_value_on_lhs
456 first_remain_rhs = node_to_remove.next_node
457 else:
458 first_remain_lhs = node_to_remove.previous_node
459 first_remain_rhs = first_value_on_rhs
461 # Actual deletion - with some manual labour to update HEAD/TAIL of
462 # the list in case we do a "delete everything left/right this node".
463 if first_remain_lhs is None:
464 self._token_list.head_node = first_remain_rhs
465 if first_remain_rhs is None:
466 self._token_list.tail_node = first_remain_lhs
467 LinkedListNode.link_nodes(first_remain_lhs, first_remain_rhs)
469 def append(self, value):
470 # type: (str) -> None
471 vt = self._value_factory(value)
472 self.append_value(vt)
474 def append_value(self, vt):
475 # type: (VE) -> None
476 value_parts = self._token_list
477 if value_parts:
478 needs_separator = False
479 stype = self._stype
480 vtype = self._vtype
481 for t in reversed(value_parts): 481 ↛ 488line 481 didn't jump to line 488, because the loop on line 481 didn't complete
482 if isinstance(t, vtype):
483 needs_separator = True
484 break
485 if isinstance(t, stype):
486 break
488 if needs_separator:
489 self.append_separator()
490 else:
491 # Looks nicer if there is a space before the very first value
492 self._token_list.append(Deb822WhitespaceToken(" "))
493 self._append_continuation_line_token_if_necessary()
494 self._changed = True
495 value_parts.append(vt)
497 def _previous_is_newline(self):
498 # type: () -> bool
499 tail = self._token_list.tail
500 return tail is not None and tail.convert_to_text().endswith("\n")
502 def append_newline(self):
503 # type: () -> None
504 if self._previous_is_newline(): 504 ↛ 505line 504 didn't jump to line 505, because the condition on line 504 was never true
505 raise ValueError(
506 "Cannot add a newline after a token that ends on a newline"
507 )
508 self._token_list.append(Deb822NewlineAfterValueToken())
510 def append_comment(self, comment_text):
511 # type: (str) -> None
512 tail = self._token_list.tail
513 if tail is None or not tail.convert_to_text().endswith("\n"):
514 self.append_newline()
515 comment_token = Deb822CommentToken(_format_comment(comment_text))
516 self._token_list.append(comment_token)
518 @property
519 def _continuation_line_char(self):
520 # type: () -> str
521 char = self.__continuation_line_char
522 if char is None:
523 # Use ' ' by default but match the existing field if possible.
524 char = " "
525 for token in self._token_list:
526 if isinstance(token, Deb822ValueContinuationToken):
527 char = token.text
528 break
529 self.__continuation_line_char = char
530 return char
532 def _append_continuation_line_token_if_necessary(self):
533 # type: () -> None
534 tail = self._token_list.tail
535 if tail is not None and tail.convert_to_text().endswith("\n"): 535 ↛ 536line 535 didn't jump to line 536, because the condition on line 535 was never true
536 self._token_list.append(
537 Deb822ValueContinuationToken(self._continuation_line_char)
538 )
540 def reformat_when_finished(self):
541 # type: () -> None
542 self._enable_reformatting()
543 self._changed = True
545 def _enable_reformatting(self):
546 # type: () -> None
547 self._format_preserve_original_formatting = False
549 def no_reformatting_when_finished(self):
550 # type: () -> None
551 self._format_preserve_original_formatting = True
553 def value_formatter(
554 self,
555 formatter, # type: FormatterCallback
556 force_reformat=False, # type: bool
557 ):
558 # type: (...) -> None
559 """Use a custom formatter when formatting the value
561 :param formatter: A formatter (see debian._deb822_repro.formatter.format_field
562 for details)
563 :param force_reformat: If True, always reformat the field even if there are
564 no (other) changes performed. By default, fields are only reformatted if
565 they are changed.
566 """
567 self._formatter = formatter
568 self._format_preserve_original_formatting = False
569 if force_reformat:
570 self._changed = True
572 def clear(self):
573 # type: () -> None
574 """Like list.clear() - removes all content (including comments and spaces)"""
575 if self._token_list:
576 self._changed = True
577 self._token_list.clear()
579 def _iter_content_as_tokens(self):
580 # type: () -> Iterable[Deb822Token]
581 for te in self._token_list:
582 if isinstance(te, Deb822Element):
583 yield from te.iter_tokens()
584 else:
585 yield te
587 def _generate_reformatted_field_content(self):
588 # type: () -> str
589 separator_token = self._default_separator_factory()
590 vtype = self._vtype
591 stype = self._stype
592 token_list = self._token_list
594 def _token_iter():
595 # type: () -> Iterator[FormatterContentToken]
596 text = "" # type: str
597 for te in token_list:
598 if isinstance(te, Deb822Token):
599 if te.is_comment:
600 yield FormatterContentToken.comment_token(te.text)
601 elif isinstance(te, stype):
602 text = te.text
603 yield FormatterContentToken.separator_token(text)
604 else:
605 assert isinstance(te, vtype)
606 text = te.convert_to_text()
607 yield FormatterContentToken.value_token(text)
609 return format_field(
610 self._formatter,
611 self._kvpair_element.field_name,
612 FormatterContentToken.separator_token(separator_token.text),
613 _token_iter(),
614 )
616 def _generate_field_content(self):
617 # type: () -> str
618 return "".join(t.text for t in self._iter_content_as_tokens())
620 def _update_field(self):
621 # type: () -> None
622 kvpair_element = self._kvpair_element
623 field_name = kvpair_element.field_name
624 token_list = self._token_list
625 tail = token_list.tail
626 had_tokens = False
628 for t in self._iter_content_as_tokens(): 628 ↛ 633line 628 didn't jump to line 633, because the loop on line 628 didn't complete
629 had_tokens = True
630 if not t.is_comment and not t.is_whitespace:
631 break
632 else:
633 if had_tokens:
634 raise ValueError(
635 "Field must be completely empty or have content "
636 "(i.e. non-whitespace and non-comments)"
637 )
638 if tail is not None: 638 ↛ 656line 638 didn't jump to line 656, because the condition on line 638 was never false
639 if isinstance(tail, Deb822Token) and tail.is_comment: 639 ↛ 640line 639 didn't jump to line 640, because the condition on line 639 was never true
640 raise ValueError("Fields must not end on a comment")
641 if not tail.convert_to_text().endswith("\n"): 641 ↛ 645line 641 didn't jump to line 645, because the condition on line 641 was never false
642 # Always end on a newline
643 self.append_newline()
645 if self._format_preserve_original_formatting:
646 value_text = self._generate_field_content()
647 text = ":".join((field_name, value_text))
648 else:
649 text = self._generate_reformatted_field_content()
651 new_content = text.splitlines(keepends=True)
652 else:
653 # Special-case for the empty list which will be mapped to
654 # an empty field. Always end on a newline (avoids errors
655 # if there is a field after this)
656 new_content = [field_name + ":\n"]
658 # As absurd as it might seem, it is easier to just use the parser to
659 # construct the AST correctly
660 deb822_file = parse_deb822_file(iter(new_content))
661 error_token = deb822_file.find_first_error_element()
662 if error_token: 662 ↛ 664line 662 didn't jump to line 664, because the condition on line 662 was never true
663 # _print_ast(deb822_file)
664 raise ValueError("Syntax error in new field value for " + field_name)
665 paragraph = next(iter(deb822_file))
666 assert isinstance(paragraph, Deb822NoDuplicateFieldsParagraphElement)
667 new_kvpair_element = paragraph.get_kvpair_element(field_name)
668 assert new_kvpair_element is not None
669 kvpair_element.value_element = new_kvpair_element.value_element
670 self._changed = False
672 def sort_elements(
673 self,
674 *,
675 key=None, # type: Optional[Callable[[VE], Any]]
676 reverse=False, # type: bool
677 ):
678 # type: (...) -> None
679 """Sort the elements (abstract values) in this list.
681 This method will sort the logical values of the list. It will
682 attempt to preserve comments associated with a given value where
683 possible. Whether space and separators are preserved depends on
684 the contents of the field as well as the formatting settings.
686 Sorting (without reformatting) is likely to leave you with "awkward"
687 whitespace. Therefore, you almost always want to apply reformatting
688 such as the reformat_when_finished() method.
690 Sorting will invalidate all ValueReferences.
691 """
692 comment_start_node = None
693 vtype = self._vtype
694 stype = self._stype
696 def key_func(x):
697 # type: (Tuple[VE, List[TokenOrElement]]) -> Any
698 if key: 698 ↛ 699line 698 didn't jump to line 699, because the condition on line 698 was never true
699 return key(x[0])
700 return x[0].convert_to_text()
702 parts = []
704 for node in self._token_list.iter_nodes():
705 value = node.value
706 if isinstance(value, Deb822Token) and value.is_comment:
707 if comment_start_node is None: 707 ↛ 709line 707 didn't jump to line 709, because the condition on line 707 was never false
708 comment_start_node = node
709 continue
711 if isinstance(value, vtype):
712 comments = []
713 if comment_start_node is not None:
714 for keep_node in comment_start_node.iter_next(skip_current=False): 714 ↛ 718line 714 didn't jump to line 718, because the loop on line 714 didn't complete
715 if keep_node is node:
716 break
717 comments.append(keep_node.value)
718 parts.append((value, comments))
719 comment_start_node = None
721 parts.sort(key=key_func, reverse=reverse)
723 self._changed = True
724 self._token_list.clear()
725 first_value = True
727 separator_is_space = self._default_separator_factory().is_whitespace
729 for value, comments in parts:
730 if first_value:
731 first_value = False
732 if comments: 732 ↛ 735line 732 didn't jump to line 735, because the condition on line 732 was never true
733 # While unlikely, there could be a separator between the comments.
734 # It would be in the way and we remove it.
735 comments = [x for x in comments if not isinstance(x, stype)]
736 # Comments cannot start the field, so inject a newline to
737 # work around that
738 self.append_newline()
739 else:
740 if not separator_is_space and not any( 740 ↛ exit, 740 ↛ 7472 missed branches: 1) line 740 didn't run the generator expression on line 740, 2) line 740 didn't jump to line 747, because the condition on line 740 was never true
741 isinstance(x, stype) for x in comments
742 ):
743 # While unlikely, you can hide a comma between two comments and expect
744 # us to preserve it. However, the more common case is that the separator
745 # appeared before the comments and was thus omitted (leaving us to re-add
746 # it here).
747 self.append_separator(space_after_separator=False)
748 if comments:
749 self.append_newline()
750 else:
751 self._token_list.append(Deb822WhitespaceToken(" "))
753 self._token_list.extend(comments)
754 self.append_value(value)
756 def sort(
757 self,
758 *,
759 key=None, # type: Optional[Callable[[str], Any]]
760 **kwargs, # type: Any
761 ):
762 # type: (...) -> None
763 """Sort the values (rendered as str) in this list.
765 This method will sort the logical values of the list. It will
766 attempt to preserve comments associated with a given value where
767 possible. Whether space and separators are preserved depends on
768 the contents of the field as well as the formatting settings.
770 Sorting (without reformatting) is likely to leave you with "awkward"
771 whitespace. Therefore, you almost always want to apply reformatting
772 such as the reformat_when_finished() method.
774 Sorting will invalidate all ValueReferences.
775 """
776 if key is not None: 776 ↛ 777line 776 didn't jump to line 777, because the condition on line 776 was never true
777 render = self._render
778 kwargs["key"] = lambda vt: key(render(vt))
779 self.sort_elements(**kwargs)
782class Interpretation(Generic[T]):
784 def interpret(
785 self,
786 kvpair_element, # type: Deb822KeyValuePairElement
787 discard_comments_on_read=True, # type: bool
788 ):
789 # type: (...) -> T
790 raise NotImplementedError # pragma: no cover
793class GenericContentBasedInterpretation(Interpretation[T], Generic[T, VE]):
795 def __init__(
796 self,
797 tokenizer, # type: Callable[[str], Iterable['Deb822Token']]
798 value_parser, # type: StreamingValueParser[VE]
799 ):
800 # type: (...) -> None
801 super().__init__()
802 self._tokenizer = tokenizer
803 self._value_parser = value_parser
805 def _high_level_interpretation(
806 self,
807 kvpair_element, # type: Deb822KeyValuePairElement
808 proxy_element, # type: Deb822InterpretationProxyElement
809 discard_comments_on_read=True, # type: bool
810 ):
811 # type: (...) -> T
812 raise NotImplementedError # pragma: no cover
814 def _parse_stream(
815 self, buffered_iterator # type: BufferingIterator[Deb822Token]
816 ):
817 # type: (...) -> Iterable[Union[Deb822Token, VE]]
819 value_parser = self._value_parser
820 for token in buffered_iterator:
821 if isinstance(token, Deb822ValueToken):
822 yield value_parser(token, buffered_iterator)
823 else:
824 yield token
826 def _parse_kvpair(
827 self, kvpair # type: Deb822KeyValuePairElement
828 ):
829 # type: (...) -> Deb822InterpretationProxyElement
830 value_element = kvpair.value_element
831 content = value_element.convert_to_text()
832 token_list = [] # type: List['TokenOrElement']
833 token_list.extend(self._parse_str(content))
834 return Deb822InterpretationProxyElement(value_element, token_list)
836 def _parse_str(self, content):
837 # type: (str) -> Iterable[Union[Deb822Token, VE]]
838 content_len = len(content)
839 biter = BufferingIterator(
840 len_check_iterator(
841 content,
842 self._tokenizer(content),
843 content_len=content_len,
844 )
845 )
846 yield from len_check_iterator(
847 content,
848 self._parse_stream(biter),
849 content_len=content_len,
850 )
852 def interpret(
853 self,
854 kvpair_element, # type: Deb822KeyValuePairElement
855 discard_comments_on_read=True, # type: bool
856 ):
857 # type: (...) -> T
858 proxy_element = self._parse_kvpair(kvpair_element)
859 return self._high_level_interpretation(
860 kvpair_element,
861 proxy_element,
862 discard_comments_on_read=discard_comments_on_read,
863 )
866def _parser_to_value_factory(
867 parser, # type: StrToValueParser[VE]
868 vtype, # type: Type[VE]
869):
870 # type: (...) -> Callable[[str], VE]
871 def _value_factory(v):
872 # type: (str) -> VE
873 if v == "": 873 ↛ 874line 873 didn't jump to line 874, because the condition on line 873 was never true
874 raise ValueError("The empty string is not a value")
875 token_iter = iter(parser(v))
876 t1 = next(token_iter, None) # type: Optional[Union[TokenOrElement]]
877 t2 = next(token_iter, None)
878 assert t1 is not None, (
879 'Bad parser - it returned None (or no TE) for "' + v + '"'
880 )
881 if t2 is not None: 881 ↛ 882line 881 didn't jump to line 882, because the condition on line 881 was never true
882 msg = textwrap.dedent(
883 """\
884 The input "{v}" should have been exactly one element, but the parser provided at
885 least two. This can happen with unnecessary leading/trailing whitespace
886 or including commas the value for a comma list.
887 """
888 ).format(v=v)
889 raise ValueError(msg)
890 if not isinstance(t1, vtype): 890 ↛ 891line 890 didn't jump to line 891, because the condition on line 890 was never true
891 if isinstance(t1, Deb822Token) and (t1.is_comment or t1.is_whitespace):
892 raise ValueError(
893 'The input "{v}" is whitespace or a comment: Expected a value'
894 )
895 msg = (
896 'The input "{v}" should have produced a element of type {vtype_name}, but'
897 " instead it produced {t1}"
898 )
899 raise ValueError(msg.format(v=v, vtype_name=vtype.__name__, t1=t1))
901 assert len(t1.convert_to_text()) == len(v), (
902 "Bad tokenizer - the token did not cover the input text"
903 " exactly ({t1_len} != {v_len}".format(
904 t1_len=len(t1.convert_to_text()), v_len=len(v)
905 )
906 )
907 return t1
909 return _value_factory
912class ListInterpretation(
913 GenericContentBasedInterpretation[Deb822ParsedTokenList[VE, ST], VE]
914):
916 def __init__(
917 self,
918 tokenizer, # type: Callable[[str], Iterable['Deb822Token']]
919 value_parser, # type: StreamingValueParser[VE]
920 vtype, # type: Type[VE]
921 stype, # type: Type[ST]
922 default_separator_factory, # type: Callable[[], ST]
923 render_factory, # type: Callable[[bool], Callable[[VE], str]]
924 ):
925 # type: (...) -> None
926 super().__init__(tokenizer, value_parser)
927 self._vtype = vtype
928 self._stype = stype
929 self._default_separator_factory = default_separator_factory
930 self._render_factory = render_factory
932 def _high_level_interpretation(
933 self,
934 kvpair_element, # type: Deb822KeyValuePairElement
935 proxy_element, # type: Deb822InterpretationProxyElement
936 discard_comments_on_read=True, # type: bool
937 ):
938 # type: (...) -> Deb822ParsedTokenList[VE, ST]
939 return Deb822ParsedTokenList(
940 kvpair_element,
941 proxy_element,
942 self._vtype,
943 self._stype,
944 self._parse_str,
945 self._default_separator_factory,
946 self._render_factory(discard_comments_on_read),
947 )
950def _parse_whitespace_list_value(token, _):
951 # type: (Deb822Token, BufferingIterator[Deb822Token]) -> Deb822ParsedValueElement
952 return Deb822ParsedValueElement([token])
955def _is_comma_token(v):
956 # type: (TokenOrElement) -> bool
957 # Consume tokens until the next comma
958 return isinstance(v, Deb822CommaToken)
961def _parse_comma_list_value(token, buffered_iterator):
962 # type: (Deb822Token, BufferingIterator[Deb822Token]) -> Deb822ParsedValueElement
963 comma_offset = buffered_iterator.peek_find(_is_comma_token)
964 value_parts = [token]
965 if comma_offset is not None:
966 # The value is followed by a comma and now we know where it ends
967 value_parts.extend(buffered_iterator.peek_many(comma_offset - 1))
968 else:
969 # The value is the last value there is. Consume all remaining tokens
970 # and then trim from the right.
971 value_parts.extend(buffered_iterator.peek_buffer())
972 while value_parts and not isinstance(value_parts[-1], Deb822ValueToken):
973 value_parts.pop()
975 buffered_iterator.consume_many(len(value_parts) - 1)
976 return Deb822ParsedValueElement(value_parts)
979def _parse_uploaders_list_value(token, buffered_iterator):
980 # type: (Deb822Token, BufferingIterator[Deb822Token]) -> Deb822ParsedValueElement
982 # This is similar to _parse_comma_list_value *except* that there is an extra special
983 # case. Namely comma only counts as a true separator if it follows ">"
984 value_parts = [token]
985 comma_offset = -1 # type: Optional[int]
986 while comma_offset is not None:
987 comma_offset = buffered_iterator.peek_find(_is_comma_token)
988 if comma_offset is not None:
989 # The value is followed by a comma. Verify that this is a terminating
990 # comma (comma may appear in the name or email)
991 #
992 # We include value_parts[-1] to easily cope with the common case of
993 # "foo <a@b.com>," where we will have 0 peeked element to examine.
994 peeked_elements = [value_parts[-1]]
995 peeked_elements.extend(buffered_iterator.peek_many(comma_offset - 1))
996 comma_was_separator = False
997 i = len(peeked_elements) - 1
998 while i >= 0:
999 token = peeked_elements[i]
1000 if isinstance(token, Deb822ValueToken):
1001 if token.text.endswith(">"):
1002 # The comma terminates the value
1003 value_parts.extend(buffered_iterator.consume_many(i))
1004 assert isinstance(
1005 value_parts[-1], Deb822ValueToken
1006 ) and value_parts[-1].text.endswith(">"), "Got: " + str(
1007 value_parts
1008 )
1009 comma_was_separator = True
1010 break
1011 i -= 1
1012 if comma_was_separator:
1013 break
1014 value_parts.extend(buffered_iterator.consume_many(comma_offset))
1015 assert isinstance(value_parts[-1], Deb822CommaToken)
1016 else:
1017 # The value is the last value there is. Consume all remaining tokens
1018 # and then trim from the right.
1019 remaining_part = buffered_iterator.peek_buffer()
1020 consume_elements = len(remaining_part)
1021 value_parts.extend(remaining_part)
1022 while value_parts and not isinstance(value_parts[-1], Deb822ValueToken):
1023 value_parts.pop()
1024 consume_elements -= 1
1025 buffered_iterator.consume_many(consume_elements)
1027 return Deb822ParsedValueElement(value_parts)
1030class Deb822Element(Locatable):
1031 """Composite elements (consists of 1 or more tokens)"""
1033 __slots__ = ("_parent_element", "_full_size_cache", "__weakref__")
1035 def __init__(self):
1036 # type: () -> None
1037 self._parent_element = None # type: Optional[ReferenceType['Deb822Element']]
1038 self._full_size_cache = None # type: Optional[Range]
1040 def iter_parts(self):
1041 # type: () -> Iterable[TokenOrElement]
1042 raise NotImplementedError # pragma: no cover
1044 def iter_parts_of_type(self, only_element_or_token_type):
1045 # type: (Type[TE]) -> Iterable[TE]
1046 for part in self.iter_parts():
1047 if isinstance(part, only_element_or_token_type):
1048 yield part
1050 def iter_tokens(self):
1051 # type: () -> Iterable[Deb822Token]
1052 for part in self.iter_parts():
1053 # Control check to catch bugs early
1054 assert part._parent_element is not None
1055 if isinstance(part, Deb822Element):
1056 yield from part.iter_tokens()
1057 else:
1058 yield part
1060 def iter_recurse(
1061 self, *, only_element_or_token_type=None # type: Optional[Type[TE]]
1062 ):
1063 # type: (...) -> Iterable[TE]
1064 for part in self.iter_parts():
1065 if only_element_or_token_type is None or isinstance( 1065 ↛ 1068line 1065 didn't jump to line 1068, because the condition on line 1065 was never true
1066 part, only_element_or_token_type
1067 ):
1068 yield cast("TE", part)
1069 if isinstance(part, Deb822Element):
1070 yield from part.iter_recurse(
1071 only_element_or_token_type=only_element_or_token_type
1072 )
1074 @property
1075 def is_error(self):
1076 # type: () -> bool
1077 return False
1079 @property
1080 def is_comment(self):
1081 # type: () -> bool
1082 return False
1084 @property
1085 def parent_element(self):
1086 # type: () -> Optional[Deb822Element]
1087 return resolve_ref(self._parent_element)
1089 @parent_element.setter
1090 def parent_element(self, new_parent):
1091 # type: (Optional[Deb822Element]) -> None
1092 self._parent_element = (
1093 weakref.ref(new_parent) if new_parent is not None else None
1094 )
1096 def _init_parent_of_parts(self):
1097 # type: () -> None
1098 for part in self.iter_parts():
1099 part.parent_element = self
1101 # Deliberately not a "text" property, to signal that it is not necessary cheap.
1102 def convert_to_text(self):
1103 # type: () -> str
1104 return "".join(t.text for t in self.iter_tokens())
1106 def clear_parent_if_parent(self, parent):
1107 # type: (Deb822Element) -> None
1108 if parent is self.parent_element: 1108 ↛ exitline 1108 didn't return from function 'clear_parent_if_parent', because the condition on line 1108 was never false
1109 self._parent_element = None
1111 def size(self, *, skip_leading_comments: bool = True) -> Range:
1112 size_cache = self._full_size_cache
1113 if size_cache is None:
1114 size_cache = Range.from_position_and_sizes(
1115 START_POSITION,
1116 (p.size(skip_leading_comments=False) for p in self.iter_parts()),
1117 )
1118 self._full_size_cache = size_cache
1119 return size_cache
1122class Deb822InterpretationProxyElement(Deb822Element):
1124 __slots__ = ("parts",)
1126 def __init__(
1127 self, real_element: Deb822Element, parts: List[TokenOrElement]
1128 ) -> None:
1129 super().__init__()
1130 self.parent_element = real_element
1131 self.parts = parts
1132 for p in parts:
1133 p.parent_element = self
1135 def iter_parts(self):
1136 # type: () -> Iterable[TokenOrElement]
1137 return iter(self.parts)
1139 def position_in_parent(self, *, skip_leading_comments: bool = True) -> Position:
1140 parent = self.parent_element
1141 if parent is None:
1142 raise RuntimeError("parent was garbage collected")
1143 return parent.position_in_parent()
1145 def position_in_file(self, *, skip_leading_comments: bool = True) -> Position:
1146 parent = self.parent_element
1147 if parent is None:
1148 raise RuntimeError("parent was garbage collected")
1149 return parent.position_in_file()
1151 def size(self, *, skip_leading_comments: bool = True) -> Range:
1152 # Same as parent except we never use a cache.
1153 sizes = (p.size(skip_leading_comments=False) for p in self.iter_parts())
1154 return Range.from_position_and_sizes(START_POSITION, sizes)
1157class Deb822ErrorElement(Deb822Element):
1158 """Element representing elements or tokens that are out of place
1160 Commonly, it will just be instances of Deb822ErrorToken, but it can be other
1161 things. As an example if a parser discovers out of order elements/tokens,
1162 it can bundle them in a Deb822ErrorElement to signal that the sequence of
1163 elements/tokens are invalid (even if the tokens themselves are valid).
1164 """
1166 __slots__ = ("_parts",)
1168 def __init__(self, parts):
1169 # type: (Sequence[TokenOrElement]) -> None
1170 super().__init__()
1171 self._parts = tuple(parts)
1172 self._init_parent_of_parts()
1174 def iter_parts(self):
1175 # type: () -> Iterable[TokenOrElement]
1176 yield from self._parts
1178 @property
1179 def is_error(self):
1180 # type: () -> bool
1181 return True
1184class Deb822ValueLineElement(Deb822Element):
1185 """Consists of one "line" of a value"""
1187 __slots__ = (
1188 "_comment_element",
1189 "_continuation_line_token",
1190 "_leading_whitespace_token",
1191 "_value_tokens",
1192 "_trailing_whitespace_token",
1193 "_newline_token",
1194 )
1196 def __init__(
1197 self,
1198 comment_element, # type: Optional[Deb822CommentElement]
1199 continuation_line_token, # type: Optional[Deb822ValueContinuationToken]
1200 leading_whitespace_token, # type: Optional[Deb822WhitespaceToken]
1201 value_parts, # type: List[TokenOrElement]
1202 trailing_whitespace_token, # type: Optional[Deb822WhitespaceToken]
1203 # only optional if it is the last line of the file and the file does not
1204 # end with a newline.
1205 newline_token, # type: Optional[Deb822WhitespaceToken]
1206 ):
1207 # type: (...) -> None
1208 super().__init__()
1209 if comment_element is not None and continuation_line_token is None: 1209 ↛ 1210line 1209 didn't jump to line 1210, because the condition on line 1209 was never true
1210 raise ValueError("Only continuation lines can have comments")
1211 self._comment_element = comment_element # type: Optional[Deb822CommentElement]
1212 self._continuation_line_token = continuation_line_token
1213 self._leading_whitespace_token = (
1214 leading_whitespace_token
1215 ) # type: Optional[Deb822WhitespaceToken]
1216 self._value_tokens = value_parts # type: List[TokenOrElement]
1217 self._trailing_whitespace_token = trailing_whitespace_token
1218 self._newline_token = newline_token # type: Optional[Deb822WhitespaceToken]
1219 self._init_parent_of_parts()
1221 @property
1222 def comment_element(self):
1223 # type: () -> Optional[Deb822CommentElement]
1224 return self._comment_element
1226 @property
1227 def continuation_line_token(self):
1228 # type: () -> Optional[Deb822ValueContinuationToken]
1229 return self._continuation_line_token
1231 @property
1232 def newline_token(self):
1233 # type: () -> Optional[Deb822WhitespaceToken]
1234 return self._newline_token
1236 def add_newline_if_missing(self):
1237 # type: () -> bool
1238 if self._newline_token is None:
1239 self._newline_token = Deb822NewlineAfterValueToken()
1240 self._newline_token.parent_element = self
1241 self._full_size_cache = None
1242 return True
1243 return False
1245 def _iter_content_parts(self):
1246 # type: () -> Iterable[TokenOrElement]
1247 if self._leading_whitespace_token: 1247 ↛ 1248line 1247 didn't jump to line 1248, because the condition on line 1247 was never true
1248 yield self._leading_whitespace_token
1249 yield from self._value_tokens
1250 if self._trailing_whitespace_token:
1251 yield self._trailing_whitespace_token
1253 def _iter_content_tokens(self):
1254 # type: () -> Iterable[Deb822Token]
1255 for part in self._iter_content_parts():
1256 if isinstance(part, Deb822Element):
1257 yield from part.iter_tokens()
1258 else:
1259 yield part
1261 def convert_content_to_text(self):
1262 # type: () -> str
1263 if (
1264 len(self._value_tokens) == 1
1265 and not self._leading_whitespace_token
1266 and not self._trailing_whitespace_token
1267 and isinstance(self._value_tokens[0], Deb822Token)
1268 ):
1269 # By default, we get a single value spanning the entire line
1270 # (minus continuation line and newline, but we are supposed to
1271 # exclude those)
1272 return self._value_tokens[0].text
1274 return "".join(t.text for t in self._iter_content_tokens())
1276 def iter_parts(self):
1277 # type: () -> Iterable[TokenOrElement]
1278 if self._comment_element:
1279 yield self._comment_element
1280 if self._continuation_line_token:
1281 yield self._continuation_line_token
1282 yield from self._iter_content_parts()
1283 if self._newline_token: 1283 ↛ exitline 1283 didn't return from function 'iter_parts', because the condition on line 1283 was never false
1284 yield self._newline_token
1286 def size(self, *, skip_leading_comments: bool = True) -> Range:
1287 if skip_leading_comments: 1287 ↛ 1288line 1287 didn't jump to line 1288, because the condition on line 1287 was never true
1288 return Range.from_position_and_sizes(
1289 START_POSITION,
1290 (
1291 p.size(skip_leading_comments=False)
1292 for p in self.iter_parts()
1293 if not p.is_comment
1294 ),
1295 )
1296 return super().size(skip_leading_comments=skip_leading_comments)
1298 def position_in_parent(self, *, skip_leading_comments: bool = True) -> Position:
1299 base_pos = super().position_in_parent(skip_leading_comments=False)
1300 if skip_leading_comments:
1301 for p in self.iter_parts():
1302 if p.is_comment:
1303 continue
1304 non_comment_pos = p.position_in_parent(skip_leading_comments=False)
1305 base_pos = non_comment_pos.relative_to(base_pos)
1306 return base_pos
1309class Deb822ValueElement(Deb822Element):
1310 __slots__ = ("_value_entry_elements",)
1312 def __init__(self, value_entry_elements):
1313 # type: (Sequence[Deb822ValueLineElement]) -> None
1314 super().__init__()
1315 # Split over two lines due to line length issues
1316 v = tuple(value_entry_elements)
1317 self._value_entry_elements = v # type: Sequence[Deb822ValueLineElement]
1318 self._init_parent_of_parts()
1320 @property
1321 def value_lines(self):
1322 # type: () -> Sequence[Deb822ValueLineElement]
1323 """Read-only list of value entries"""
1324 return self._value_entry_elements
1326 def iter_parts(self):
1327 # type: () -> Iterable[TokenOrElement]
1328 yield from self._value_entry_elements
1330 def add_final_newline_if_missing(self):
1331 # type: () -> bool
1332 if self._value_entry_elements:
1333 changed = self._value_entry_elements[-1].add_newline_if_missing()
1334 if changed:
1335 self._full_size_cache = None
1336 return changed
1337 return False
1340class Deb822ParsedValueElement(Deb822Element):
1342 __slots__ = ("_text_cached", "_text_no_comments_cached", "_token_list")
1344 def __init__(self, tokens):
1345 # type: (List[Deb822Token]) -> None
1346 super().__init__()
1347 self._token_list = tokens
1348 self._init_parent_of_parts()
1349 if not isinstance(tokens[0], Deb822ValueToken) or not isinstance( 1349 ↛ 1352line 1349 didn't jump to line 1352, because the condition on line 1349 was never true
1350 tokens[-1], Deb822ValueToken
1351 ):
1352 raise ValueError(
1353 self.__class__.__name__ + " MUST start and end on a Deb822ValueToken"
1354 )
1355 if len(tokens) == 1: 1355 ↛ 1360line 1355 didn't jump to line 1360, because the condition on line 1355 was never false
1356 token = tokens[0]
1357 self._text_cached = token.text # type: Optional[str]
1358 self._text_no_comments_cached = token.text # type: Optional[str]
1359 else:
1360 self._text_cached = None
1361 self._text_no_comments_cached = None
1363 def convert_to_text(self):
1364 # type: () -> str
1365 if self._text_no_comments_cached is None: 1365 ↛ 1366line 1365 didn't jump to line 1366, because the condition on line 1365 was never true
1366 self._text_no_comments_cached = super().convert_to_text()
1367 return self._text_no_comments_cached
1369 def convert_to_text_without_comments(self):
1370 # type: () -> str
1371 if self._text_no_comments_cached is None: 1371 ↛ 1372line 1371 didn't jump to line 1372, because the condition on line 1371 was never true
1372 self._text_no_comments_cached = "".join(
1373 t.text for t in self.iter_tokens() if not t.is_comment
1374 )
1375 return self._text_no_comments_cached
1377 def iter_parts(self):
1378 # type: () -> Iterable[TokenOrElement]
1379 yield from self._token_list
1382class Deb822CommentElement(Deb822Element):
1383 __slots__ = ("_comment_tokens",)
1385 def __init__(self, comment_tokens):
1386 # type: (Sequence[Deb822CommentToken]) -> None
1387 super().__init__()
1388 self._comment_tokens = tuple(
1389 comment_tokens
1390 ) # type: Sequence[Deb822CommentToken]
1391 if not comment_tokens: # pragma: no cover
1392 raise ValueError("Comment elements must have at least one comment token")
1393 self._init_parent_of_parts()
1395 @property
1396 def is_comment(self):
1397 # type: () -> bool
1398 return True
1400 def __len__(self):
1401 # type: () -> int
1402 return len(self._comment_tokens)
1404 def __getitem__(self, item):
1405 # type: (int) -> Deb822CommentToken
1406 return self._comment_tokens[item]
1408 def iter_parts(self):
1409 # type: () -> Iterable[TokenOrElement]
1410 yield from self._comment_tokens
1413class Deb822KeyValuePairElement(Deb822Element):
1414 __slots__ = (
1415 "_comment_element",
1416 "_field_token",
1417 "_separator_token",
1418 "_value_element",
1419 )
1421 def __init__(
1422 self,
1423 comment_element, # type: Optional[Deb822CommentElement]
1424 field_token, # type: Deb822FieldNameToken
1425 separator_token, # type: Deb822FieldSeparatorToken
1426 value_element, # type: Deb822ValueElement
1427 ):
1428 # type: (...) -> None
1429 super().__init__()
1430 self._comment_element = comment_element # type: Optional[Deb822CommentElement]
1431 self._field_token = field_token # type: Deb822FieldNameToken
1432 self._separator_token = separator_token # type: Deb822FieldSeparatorToken
1433 self._value_element = value_element # type: Deb822ValueElement
1434 self._init_parent_of_parts()
1436 @property
1437 def field_name(self):
1438 # type: () -> _strI
1439 return self.field_token.text
1441 @property
1442 def field_token(self):
1443 # type: () -> Deb822FieldNameToken
1444 return self._field_token
1446 @property
1447 def value_element(self):
1448 # type: () -> Deb822ValueElement
1449 return self._value_element
1451 @value_element.setter
1452 def value_element(self, new_value):
1453 # type: (Deb822ValueElement) -> None
1454 self._full_size_cache = None
1455 self._value_element.clear_parent_if_parent(self)
1456 self._value_element = new_value
1457 new_value.parent_element = self
1459 def interpret_as(
1460 self,
1461 interpreter, # type: Interpretation[T]
1462 discard_comments_on_read=True, # type: bool
1463 ):
1464 # type: (...) -> T
1465 return interpreter.interpret(
1466 self, discard_comments_on_read=discard_comments_on_read
1467 )
1469 @property
1470 def comment_element(self):
1471 # type: () -> Optional[Deb822CommentElement]
1472 return self._comment_element
1474 @comment_element.setter
1475 def comment_element(self, value):
1476 # type: (Optional[Deb822CommentElement]) -> None
1477 self._full_size_cache = None
1478 if value is not None: 1478 ↛ 1479line 1478 didn't jump to line 1479, because the condition on line 1478 was never true
1479 if not value[-1].text.endswith("\n"):
1480 raise ValueError("Field comments must end with a newline")
1481 if self._comment_element: 1481 ↛ 1482line 1481 didn't jump to line 1482, because the condition on line 1481 was never true
1482 self._comment_element.clear_parent_if_parent(self)
1483 if value is not None: 1483 ↛ 1484line 1483 didn't jump to line 1484, because the condition on line 1483 was never true
1484 value.parent_element = self
1485 self._comment_element = value
1487 def iter_parts(self):
1488 # type: () -> Iterable[TokenOrElement]
1489 if self._comment_element:
1490 yield self._comment_element
1491 yield self._field_token
1492 yield self._separator_token
1493 yield self._value_element
1495 def position_in_parent(
1496 self,
1497 *,
1498 skip_leading_comments: bool = True,
1499 ) -> Position:
1500 position = super().position_in_parent(skip_leading_comments=False)
1501 if skip_leading_comments: 1501 ↛ 1505line 1501 didn't jump to line 1505, because the condition on line 1501 was never false
1502 if self._comment_element:
1503 field_pos = self._field_token.position_in_parent()
1504 position = field_pos.relative_to(position)
1505 return position
1507 def size(self, *, skip_leading_comments: bool = True) -> Range:
1508 if skip_leading_comments:
1509 return Range.from_position_and_sizes(
1510 START_POSITION,
1511 (
1512 p.size(skip_leading_comments=False)
1513 for p in self.iter_parts()
1514 if not p.is_comment
1515 ),
1516 )
1517 return super().size(skip_leading_comments=False)
1520def _format_comment(c):
1521 # type: (str) -> str
1522 if c == "": 1522 ↛ 1524line 1522 didn't jump to line 1524, because the condition on line 1522 was never true
1523 # Special-case: Empty strings are mapped to an empty comment line
1524 return "#\n"
1525 if "\n" in c[:-1]: 1525 ↛ 1526line 1525 didn't jump to line 1526, because the condition on line 1525 was never true
1526 raise ValueError("Comment lines must not have embedded newlines")
1527 if not c.endswith("\n"): 1527 ↛ 1529line 1527 didn't jump to line 1529, because the condition on line 1527 was never false
1528 c = c.rstrip() + "\n"
1529 if not c.startswith("#"): 1529 ↛ 1531line 1529 didn't jump to line 1531, because the condition on line 1529 was never false
1530 c = "# " + c.lstrip()
1531 return c
1534def _unpack_key(
1535 item, # type: ParagraphKey
1536 raise_if_indexed=False, # type: bool
1537):
1538 # type: (...) -> Tuple[_strI, Optional[int], Optional[Deb822FieldNameToken]]
1539 index = None # type: Optional[int]
1540 name_token = None # type: Optional[Deb822FieldNameToken]
1541 if isinstance(item, tuple):
1542 key, index = item
1543 if raise_if_indexed: 1543 ↛ 1550line 1543 didn't jump to line 1550, because the condition on line 1543 was never false
1544 # Fudge "(key, 0)" into a "key" callers to defensively support
1545 # both paragraph styles with the same key.
1546 if index != 0: 1546 ↛ 1547line 1546 didn't jump to line 1547, because the condition on line 1546 was never true
1547 msg = 'Cannot resolve key "{key}" with index {index}. The key is not indexed'
1548 raise KeyError(msg.format(key=key, index=index))
1549 index = None
1550 key = _strI(key)
1551 else:
1552 index = None
1553 if isinstance(item, Deb822FieldNameToken): 1553 ↛ 1554line 1553 didn't jump to line 1554, because the condition on line 1553 was never true
1554 name_token = item
1555 key = name_token.text
1556 else:
1557 key = _strI(item)
1559 return key, index, name_token
1562def _convert_value_lines_to_lines(
1563 value_lines, # type: Iterable[Deb822ValueLineElement]
1564 strip_comments, # type: bool
1565):
1566 # type: (...) -> Iterable[str]
1567 if not strip_comments: 1567 ↛ 1568line 1567 didn't jump to line 1568, because the condition on line 1567 was never true
1568 yield from (v.convert_to_text() for v in value_lines)
1569 else:
1570 for element in value_lines:
1571 yield "".join(x.text for x in element.iter_tokens() if not x.is_comment)
1574if sys.version_info >= (3, 9) or TYPE_CHECKING: 1574 ↛ 1579line 1574 didn't jump to line 1579, because the condition on line 1574 was never false
1575 _ParagraphMapping_Base = collections.abc.Mapping[ParagraphKey, T]
1576else:
1577 # Python 3.5 - 3.8 compat - we are not allowed to subscript the abc.Iterator
1578 # - use this little hack to work around it
1579 class _ParagraphMapping_Base(collections.abc.Mapping, Generic[T], ABC):
1580 pass
1583# Deb822ParagraphElement uses this Mixin (by having `_paragraph` return self).
1584# Therefore, the Mixin needs to call the "proper" methods on the paragraph to
1585# avoid doing infinite recursion.
1586class AutoResolvingMixin(Generic[T], _ParagraphMapping_Base[T]):
1588 @property
1589 def _auto_resolve_ambiguous_fields(self):
1590 # type: () -> bool
1591 return True
1593 @property
1594 def _paragraph(self):
1595 # type: () -> Deb822ParagraphElement
1596 raise NotImplementedError # pragma: no cover
1598 def __len__(self):
1599 # type: () -> int
1600 return self._paragraph.kvpair_count
1602 def __contains__(self, item):
1603 # type: (object) -> bool
1604 return self._paragraph.contains_kvpair_element(item)
1606 def __iter__(self):
1607 # type: () -> Iterator[ParagraphKey]
1608 return iter(self._paragraph.iter_keys())
1610 def __getitem__(self, item):
1611 # type: (ParagraphKey) -> T
1612 if self._auto_resolve_ambiguous_fields and isinstance(item, str):
1613 v = self._paragraph.get_kvpair_element((item, 0))
1614 else:
1615 v = self._paragraph.get_kvpair_element(item)
1616 assert v is not None
1617 return self._interpret_value(item, v)
1619 def __delitem__(self, item):
1620 # type: (ParagraphKey) -> None
1621 self._paragraph.remove_kvpair_element(item)
1623 def _interpret_value(self, key, value):
1624 # type: (ParagraphKey, Deb822KeyValuePairElement) -> T
1625 raise NotImplementedError # pragma: no cover
1628# Deb822ParagraphElement uses this Mixin (by having `_paragraph` return self).
1629# Therefore, the Mixin needs to call the "proper" methods on the paragraph to
1630# avoid doing infinite recursion.
1631class Deb822ParagraphToStrWrapperMixin(AutoResolvingMixin[str], ABC):
1633 @property
1634 def _auto_map_initial_line_whitespace(self):
1635 # type: () -> bool
1636 return True
1638 @property
1639 def _discard_comments_on_read(self):
1640 # type: () -> bool
1641 return True
1643 @property
1644 def _auto_map_final_newline_in_multiline_values(self):
1645 # type: () -> bool
1646 return True
1648 @property
1649 def _preserve_field_comments_on_field_updates(self):
1650 # type: () -> bool
1651 return True
1653 def _convert_value_to_str(self, kvpair_element):
1654 # type: (Deb822KeyValuePairElement) -> str
1655 value_element = kvpair_element.value_element
1656 value_entries = value_element.value_lines
1657 if len(value_entries) == 1:
1658 # Special case single line entry (e.g. "Package: foo") as they never
1659 # have comments and we can do some parts more efficient.
1660 value_entry = value_entries[0]
1661 t = value_entry.convert_to_text()
1662 if self._auto_map_initial_line_whitespace:
1663 t = t.strip()
1664 return t
1666 if self._auto_map_initial_line_whitespace or self._discard_comments_on_read:
1667 converter = _convert_value_lines_to_lines(
1668 value_entries,
1669 self._discard_comments_on_read,
1670 )
1672 auto_map_space = self._auto_map_initial_line_whitespace
1674 # Because we know there are more than one line, we can unconditionally inject
1675 # the newline after the first line
1676 as_text = "".join(
1677 line.strip() + "\n" if auto_map_space and i == 1 else line
1678 for i, line in enumerate(converter, start=1)
1679 )
1680 else:
1681 # No rewrite necessary.
1682 as_text = value_element.convert_to_text()
1684 if self._auto_map_final_newline_in_multiline_values and as_text[-1] == "\n":
1685 as_text = as_text[:-1]
1686 return as_text
1688 def __setitem__(self, item, value):
1689 # type: (ParagraphKey, str) -> None
1690 keep_comments = (
1691 self._preserve_field_comments_on_field_updates
1692 ) # type: Optional[bool]
1693 comment = None
1694 if keep_comments and self._auto_resolve_ambiguous_fields:
1695 # For ambiguous fields, we have to resolve the original field as
1696 # the set_field_* methods do not cope with ambiguous fields. This
1697 # means we might as well clear the keep_comments flag as we have
1698 # resolved the comment.
1699 keep_comments = None
1700 key_lookup = item
1701 if isinstance(item, str): 1701 ↛ 1703line 1701 didn't jump to line 1703, because the condition on line 1701 was never false
1702 key_lookup = (item, 0)
1703 orig_kvpair = self._paragraph.get_kvpair_element(key_lookup, use_get=True)
1704 if orig_kvpair is not None:
1705 comment = orig_kvpair.comment_element
1707 if self._auto_map_initial_line_whitespace:
1708 try:
1709 idx = value.index("\n")
1710 except ValueError:
1711 idx = -1
1712 if idx == -1 or idx == len(value):
1713 self._paragraph.set_field_to_simple_value(
1714 item,
1715 value.strip(),
1716 preserve_original_field_comment=keep_comments,
1717 field_comment=comment,
1718 )
1719 return
1720 # Regenerate the first line with normalized whitespace if necessary
1721 first_line, rest = value.split("\n", 1)
1722 if first_line and first_line[:1] not in ("\t", " "): 1722 ↛ 1723line 1722 didn't jump to line 1723, because the condition on line 1722 was never true
1723 value = "".join((" ", first_line.strip(), "\n", rest))
1724 else:
1725 value = "".join((first_line, "\n", rest))
1726 if not value.endswith("\n"):
1727 if not self._auto_map_final_newline_in_multiline_values: 1727 ↛ 1732line 1727 didn't jump to line 1732, because the condition on line 1727 was never false
1728 raise ValueError(
1729 "Values must end with a newline (or be single line"
1730 " values and use the auto whitespace mapping feature)"
1731 )
1732 value += "\n"
1733 self._paragraph.set_field_from_raw_string(
1734 item,
1735 value,
1736 preserve_original_field_comment=keep_comments,
1737 field_comment=comment,
1738 )
1740 def _interpret_value(self, key, value):
1741 # type: (ParagraphKey, Deb822KeyValuePairElement) -> str
1742 # mypy is a bit dense and cannot see that T == str
1743 return self._convert_value_to_str(value)
1746class AbstractDeb822ParagraphWrapper(AutoResolvingMixin[T], ABC):
1748 def __init__(
1749 self,
1750 paragraph, # type: Deb822ParagraphElement
1751 *,
1752 auto_resolve_ambiguous_fields=False, # type: bool
1753 discard_comments_on_read=True, # type: bool
1754 ):
1755 # type: (...) -> None
1756 self.__paragraph = paragraph
1757 self.__auto_resolve_ambiguous_fields = auto_resolve_ambiguous_fields
1758 self.__discard_comments_on_read = discard_comments_on_read
1760 @property
1761 def _paragraph(self):
1762 # type: () -> Deb822ParagraphElement
1763 return self.__paragraph
1765 @property
1766 def _discard_comments_on_read(self):
1767 # type: () -> bool
1768 return self.__discard_comments_on_read
1770 @property
1771 def _auto_resolve_ambiguous_fields(self):
1772 # type: () -> bool
1773 return self.__auto_resolve_ambiguous_fields
1776class Deb822InterpretingParagraphWrapper(AbstractDeb822ParagraphWrapper[T]):
1778 def __init__(
1779 self,
1780 paragraph, # type: Deb822ParagraphElement
1781 interpretation, # type: Interpretation[T]
1782 *,
1783 auto_resolve_ambiguous_fields=False, # type: bool
1784 discard_comments_on_read=True, # type: bool
1785 ):
1786 # type: (...) -> None
1787 super().__init__(
1788 paragraph,
1789 auto_resolve_ambiguous_fields=auto_resolve_ambiguous_fields,
1790 discard_comments_on_read=discard_comments_on_read,
1791 )
1792 self._interpretation = interpretation
1794 def _interpret_value(self, key, value):
1795 # type: (ParagraphKey, Deb822KeyValuePairElement) -> T
1796 return self._interpretation.interpret(value)
1799class Deb822DictishParagraphWrapper(
1800 AbstractDeb822ParagraphWrapper[str], Deb822ParagraphToStrWrapperMixin
1801):
1803 def __init__(
1804 self,
1805 paragraph, # type: Deb822ParagraphElement
1806 *,
1807 discard_comments_on_read=True, # type: bool
1808 auto_map_initial_line_whitespace=True, # type: bool
1809 auto_resolve_ambiguous_fields=False, # type: bool
1810 preserve_field_comments_on_field_updates=True, # type: bool
1811 auto_map_final_newline_in_multiline_values=True, # type: bool
1812 ):
1813 # type: (...) -> None
1814 super().__init__(
1815 paragraph,
1816 auto_resolve_ambiguous_fields=auto_resolve_ambiguous_fields,
1817 discard_comments_on_read=discard_comments_on_read,
1818 )
1819 self.__auto_map_initial_line_whitespace = auto_map_initial_line_whitespace
1820 self.__preserve_field_comments_on_field_updates = (
1821 preserve_field_comments_on_field_updates
1822 )
1823 self.__auto_map_final_newline_in_multiline_values = (
1824 auto_map_final_newline_in_multiline_values
1825 )
1827 @property
1828 def _auto_map_initial_line_whitespace(self):
1829 # type: () -> bool
1830 return self.__auto_map_initial_line_whitespace
1832 @property
1833 def _preserve_field_comments_on_field_updates(self):
1834 # type: () -> bool
1835 return self.__preserve_field_comments_on_field_updates
1837 @property
1838 def _auto_map_final_newline_in_multiline_values(self):
1839 # type: () -> bool
1840 return self.__auto_map_final_newline_in_multiline_values
1843class Deb822ParagraphElement(Deb822Element, Deb822ParagraphToStrWrapperMixin, ABC):
1845 @classmethod
1846 def new_empty_paragraph(cls):
1847 # type: () -> Deb822ParagraphElement
1848 return Deb822NoDuplicateFieldsParagraphElement([], OrderedSet())
1850 @classmethod
1851 def from_dict(cls, mapping):
1852 # type: (Mapping[str, str]) -> Deb822ParagraphElement
1853 paragraph = cls.new_empty_paragraph()
1854 for k, v in mapping.items():
1855 paragraph[k] = v
1856 return paragraph
1858 @classmethod
1859 def from_kvpairs(cls, kvpair_elements):
1860 # type: (List[Deb822KeyValuePairElement]) -> Deb822ParagraphElement
1861 if not kvpair_elements: 1861 ↛ 1862line 1861 didn't jump to line 1862, because the condition on line 1861 was never true
1862 raise ValueError(
1863 "A paragraph must consist of at least one field/value pair"
1864 )
1865 kvpair_order = OrderedSet(kv.field_name for kv in kvpair_elements)
1866 if len(kvpair_order) == len(kvpair_elements): 1866 ↛ 1875line 1866 didn't jump to line 1875, because the condition on line 1866 was never false
1867 # Each field occurs at most once, which is good because that
1868 # means it is a valid paragraph and we can use the optimized
1869 # implementation.
1870 return Deb822NoDuplicateFieldsParagraphElement(
1871 kvpair_elements, kvpair_order
1872 )
1873 # Fallback implementation, that can cope with the repeated field names
1874 # at the cost of complexity.
1875 return Deb822DuplicateFieldsParagraphElement(kvpair_elements)
1877 @property
1878 def has_duplicate_fields(self):
1879 # type: () -> bool
1880 """Tell whether this paragraph has duplicate fields"""
1881 return False
1883 def as_interpreted_dict_view(
1884 self,
1885 interpretation, # type: Interpretation[T]
1886 *,
1887 auto_resolve_ambiguous_fields=True, # type: bool
1888 ):
1889 # type: (...) -> Deb822InterpretingParagraphWrapper[T]
1890 r"""Provide a Dict-like view of the paragraph
1892 This method returns a dict-like object representing this paragraph and
1893 is useful for accessing fields in a given interpretation. It is possible
1894 to use multiple versions of this dict-like view with different interpretations
1895 on the same paragraph at the same time (for different fields).
1897 >>> example_deb822_paragraph = '''
1898 ... Package: foo
1899 ... # Field comment (because it becomes just before a field)
1900 ... Architecture: amd64
1901 ... # Inline comment (associated with the next line)
1902 ... i386
1903 ... # We also support arm
1904 ... arm64
1905 ... armel
1906 ... '''
1907 >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines())
1908 >>> paragraph = next(iter(dfile))
1909 >>> list_view = paragraph.as_interpreted_dict_view(LIST_SPACE_SEPARATED_INTERPRETATION)
1910 >>> # With the defaults, you only deal with the semantic values
1911 >>> # - no leading or trailing whitespace on the first part of the value
1912 >>> list(list_view["Package"])
1913 ['foo']
1914 >>> with list_view["Architecture"] as arch_list:
1915 ... orig_arch_list = list(arch_list)
1916 ... arch_list.replace('i386', 'kfreebsd-amd64')
1917 >>> orig_arch_list
1918 ['amd64', 'i386', 'arm64', 'armel']
1919 >>> list(list_view["Architecture"])
1920 ['amd64', 'kfreebsd-amd64', 'arm64', 'armel']
1921 >>> print(paragraph.dump(), end='')
1922 Package: foo
1923 # Field comment (because it becomes just before a field)
1924 Architecture: amd64
1925 # Inline comment (associated with the next line)
1926 kfreebsd-amd64
1927 # We also support arm
1928 arm64
1929 armel
1930 >>> # Format preserved and architecture replaced
1931 >>> with list_view["Architecture"] as arch_list:
1932 ... # Prettify the result as sorting will cause awkward whitespace
1933 ... arch_list.reformat_when_finished()
1934 ... arch_list.sort()
1935 >>> print(paragraph.dump(), end='')
1936 Package: foo
1937 # Field comment (because it becomes just before a field)
1938 Architecture: amd64
1939 # We also support arm
1940 arm64
1941 armel
1942 # Inline comment (associated with the next line)
1943 kfreebsd-amd64
1944 >>> list(list_view["Architecture"])
1945 ['amd64', 'arm64', 'armel', 'kfreebsd-amd64']
1946 >>> # Format preserved and architecture values sorted
1948 :param interpretation: Decides how the field values are interpreted. As an example,
1949 use LIST_SPACE_SEPARATED_INTERPRETATION for fields such as Architecture in the
1950 debian/control file.
1951 :param auto_resolve_ambiguous_fields: This parameter is only relevant for paragraphs
1952 that contain the same field multiple times (these are generally invalid). If the
1953 caller requests an ambiguous field from an invalid paragraph via a plain field name,
1954 the return dict-like object will refuse to resolve the field (not knowing which
1955 version to pick). This parameter (if set to True) instead changes the error into
1956 assuming the caller wants the *first* variant.
1957 """
1958 return Deb822InterpretingParagraphWrapper(
1959 self,
1960 interpretation,
1961 auto_resolve_ambiguous_fields=auto_resolve_ambiguous_fields,
1962 )
1964 def configured_view(
1965 self,
1966 *,
1967 discard_comments_on_read=True, # type: bool
1968 auto_map_initial_line_whitespace=True, # type: bool
1969 auto_resolve_ambiguous_fields=True, # type: bool
1970 preserve_field_comments_on_field_updates=True, # type: bool
1971 auto_map_final_newline_in_multiline_values=True, # type: bool
1972 ):
1973 # type: (...) -> Deb822DictishParagraphWrapper
1974 r"""Provide a Dict[str, str]-like view of this paragraph with non-standard parameters
1976 This method returns a dict-like object representing this paragraph that is
1977 optionally configured differently from the default view.
1979 >>> example_deb822_paragraph = '''
1980 ... Package: foo
1981 ... # Field comment (because it becomes just before a field)
1982 ... Depends: libfoo,
1983 ... # Inline comment (associated with the next line)
1984 ... libbar,
1985 ... '''
1986 >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines())
1987 >>> paragraph = next(iter(dfile))
1988 >>> # With the defaults, you only deal with the semantic values
1989 >>> # - no leading or trailing whitespace on the first part of the value
1990 >>> paragraph["Package"]
1991 'foo'
1992 >>> # - no inline comments in multiline values (but whitespace will be present
1993 >>> # subsequent lines.)
1994 >>> print(paragraph["Depends"])
1995 libfoo,
1996 libbar,
1997 >>> paragraph['Foo'] = 'bar'
1998 >>> paragraph.get('Foo')
1999 'bar'
2000 >>> paragraph.get('Unknown-Field') is None
2001 True
2002 >>> # But you get asymmetric behaviour with set vs. get
2003 >>> paragraph['Foo'] = ' bar\n'
2004 >>> paragraph['Foo']
2005 'bar'
2006 >>> paragraph['Bar'] = ' bar\n#Comment\n another value\n'
2007 >>> # Note that the whitespace on the first line has been normalized.
2008 >>> print("Bar: " + paragraph['Bar'])
2009 Bar: bar
2010 another value
2011 >>> # The comment is present (in case you where wondering)
2012 >>> print(paragraph.get_kvpair_element('Bar').convert_to_text(), end='')
2013 Bar: bar
2014 #Comment
2015 another value
2016 >>> # On the other hand, you can choose to see the values as they are
2017 >>> # - We will just reset the paragraph as a "nothing up my sleeve"
2018 >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines())
2019 >>> paragraph = next(iter(dfile))
2020 >>> nonstd_dictview = paragraph.configured_view(
2021 ... discard_comments_on_read=False,
2022 ... auto_map_initial_line_whitespace=False,
2023 ... # For paragraphs with duplicate fields, you can choose to get an error
2024 ... # rather than the dict picking the first value available.
2025 ... auto_resolve_ambiguous_fields=False,
2026 ... auto_map_final_newline_in_multiline_values=False,
2027 ... )
2028 >>> # Because we have reset the state, Foo and Bar are no longer there.
2029 >>> 'Bar' not in paragraph and 'Foo' not in paragraph
2030 True
2031 >>> # We can now see the comments (discard_comments_on_read=False)
2032 >>> # (The leading whitespace in front of "libfoo" is due to
2033 >>> # auto_map_initial_line_whitespace=False)
2034 >>> print(nonstd_dictview["Depends"], end='')
2035 libfoo,
2036 # Inline comment (associated with the next line)
2037 libbar,
2038 >>> # And all the optional whitespace on the first value line
2039 >>> # (auto_map_initial_line_whitespace=False)
2040 >>> nonstd_dictview["Package"] == ' foo\n'
2041 True
2042 >>> # ... which will give you symmetric behaviour with set vs. get
2043 >>> nonstd_dictview['Foo'] = ' bar \n'
2044 >>> nonstd_dictview['Foo']
2045 ' bar \n'
2046 >>> nonstd_dictview['Bar'] = ' bar \n#Comment\n another value\n'
2047 >>> nonstd_dictview['Bar']
2048 ' bar \n#Comment\n another value\n'
2049 >>> # But then you get no help either.
2050 >>> try:
2051 ... nonstd_dictview["Baz"] = "foo"
2052 ... except ValueError:
2053 ... print("Rejected")
2054 Rejected
2055 >>> # With auto_map_initial_line_whitespace=False, you have to include minimum a newline
2056 >>> nonstd_dictview["Baz"] = "foo\n"
2057 >>> # The absence of leading whitespace gives you the terse variant at the expensive
2058 >>> # readability
2059 >>> paragraph.get_kvpair_element('Baz').convert_to_text()
2060 'Baz:foo\n'
2061 >>> # But because they are views, changes performed via one view is visible in the other
2062 >>> paragraph['Foo']
2063 'bar'
2064 >>> # The views show the values according to their own rules. Therefore, there is an
2065 >>> # asymmetric between paragraph['Foo'] and nonstd_dictview['Foo']
2066 >>> # Nevertheless, you can read or write the fields via either - enabling you to use
2067 >>> # the view that best suit your use-case for the given field.
2068 >>> 'Baz' in paragraph and nonstd_dictview.get('Baz') is not None
2069 True
2070 >>> # Deletion via the view also works
2071 >>> del nonstd_dictview['Baz']
2072 >>> 'Baz' not in paragraph and nonstd_dictview.get('Baz') is None
2073 True
2076 :param discard_comments_on_read: When getting a field value from the dict,
2077 this parameter decides how in-line comments are handled. When setting
2078 the value, inline comments are still allowed and will be retained.
2079 However, keep in mind that this option makes getter and setter asymmetric
2080 as a "get" following a "set" with inline comments will omit the comments
2081 even if they are there (see the code example).
2082 :param auto_map_initial_line_whitespace: Special-case the first value line
2083 by trimming unnecessary whitespace leaving only the value. For single-line
2084 values, all space including newline is pruned. For multi-line values, the
2085 newline is preserved / needed to distinguish the first line from the
2086 following lines. When setting a value, this option normalizes the
2087 whitespace of the initial line of the value field.
2088 When this option is set to True makes the dictionary behave more like the
2089 original Deb822 module.
2090 :param preserve_field_comments_on_field_updates: Whether to preserve the field
2091 comments when mutating the field.
2092 :param auto_resolve_ambiguous_fields: This parameter is only relevant for paragraphs
2093 that contain the same field multiple times (these are generally invalid). If the
2094 caller requests an ambiguous field from an invalid paragraph via a plain field name,
2095 the return dict-like object will refuse to resolve the field (not knowing which
2096 version to pick). This parameter (if set to True) instead changes the error into
2097 assuming the caller wants the *first* variant.
2098 :param auto_map_final_newline_in_multiline_values: This parameter controls whether
2099 a multiline field with have / need a trailing newline. If True, the trailing
2100 newline is hidden on get and automatically added in set (if missing).
2101 When this option is set to True makes the dictionary behave more like the
2102 original Deb822 module.
2103 """
2104 return Deb822DictishParagraphWrapper(
2105 self,
2106 discard_comments_on_read=discard_comments_on_read,
2107 auto_map_initial_line_whitespace=auto_map_initial_line_whitespace,
2108 auto_resolve_ambiguous_fields=auto_resolve_ambiguous_fields,
2109 preserve_field_comments_on_field_updates=preserve_field_comments_on_field_updates,
2110 auto_map_final_newline_in_multiline_values=auto_map_final_newline_in_multiline_values,
2111 )
2113 @property
2114 def _paragraph(self):
2115 # type: () -> Deb822ParagraphElement
2116 return self
2118 def order_last(self, field):
2119 # type: (ParagraphKey) -> None
2120 """Re-order the given field so it is "last" in the paragraph"""
2121 raise NotImplementedError # pragma: no cover
2123 def order_first(self, field):
2124 # type: (ParagraphKey) -> None
2125 """Re-order the given field so it is "first" in the paragraph"""
2126 raise NotImplementedError # pragma: no cover
2128 def order_before(self, field, reference_field):
2129 # type: (ParagraphKey, ParagraphKey) -> None
2130 """Re-order the given field so appears directly after the reference field in the paragraph
2132 The reference field must be present."""
2133 raise NotImplementedError # pragma: no cover
2135 def order_after(self, field, reference_field):
2136 # type: (ParagraphKey, ParagraphKey) -> None
2137 """Re-order the given field so appears directly before the reference field in the paragraph
2139 The reference field must be present.
2140 """
2141 raise NotImplementedError # pragma: no cover
2143 @property
2144 def kvpair_count(self):
2145 # type: () -> int
2146 raise NotImplementedError # pragma: no cover
2148 def iter_keys(self):
2149 # type: () -> Iterable[ParagraphKey]
2150 raise NotImplementedError # pragma: no cover
2152 def contains_kvpair_element(self, item):
2153 # type: (object) -> bool
2154 raise NotImplementedError # pragma: no cover
2156 def get_kvpair_element(
2157 self,
2158 item, # type: ParagraphKey
2159 use_get=False, # type: bool
2160 ):
2161 # type: (...) -> Optional[Deb822KeyValuePairElement]
2162 raise NotImplementedError # pragma: no cover
2164 def set_kvpair_element(self, key, value):
2165 # type: (ParagraphKey, Deb822KeyValuePairElement) -> None
2166 raise NotImplementedError # pragma: no cover
2168 def remove_kvpair_element(self, key):
2169 # type: (ParagraphKey) -> None
2170 raise NotImplementedError # pragma: no cover
2172 def sort_fields(
2173 self, key=None # type: Optional[Callable[[str], Any]]
2174 ):
2175 # type: (...) -> None
2176 """Re-order all fields
2178 :param key: Provide a key function (same semantics as for sorted). Keep in mind that
2179 the module preserve the cases for field names - in generally, callers are recommended
2180 to use "lower()" to normalize the case.
2181 """
2182 raise NotImplementedError # pragma: no cover
2184 def set_field_to_simple_value(
2185 self,
2186 item, # type: ParagraphKey
2187 simple_value, # type: str
2188 *,
2189 preserve_original_field_comment=None, # type: Optional[bool]
2190 field_comment=None, # type: Optional[Commentish]
2191 ):
2192 # type: (...) -> None
2193 r"""Sets a field in this paragraph to a simple "word" or "phrase"
2195 In many cases, it is better for callers to just use the paragraph as
2196 if it was a dictionary. However, this method does enable to you choose
2197 the field comment (if any), which can be a reason for using it.
2199 This is suitable for "simple" fields like "Package". Example:
2201 >>> example_deb822_paragraph = '''
2202 ... Package: foo
2203 ... '''
2204 >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines())
2205 >>> p = next(iter(dfile))
2206 >>> p.set_field_to_simple_value("Package", "mscgen")
2207 >>> p.set_field_to_simple_value("Architecture", "linux-any kfreebsd-any",
2208 ... field_comment=['Only ported to linux and kfreebsd'])
2209 >>> p.set_field_to_simple_value("Priority", "optional")
2210 >>> print(p.dump(), end='')
2211 Package: mscgen
2212 # Only ported to linux and kfreebsd
2213 Architecture: linux-any kfreebsd-any
2214 Priority: optional
2215 >>> # Values are formatted nicely by default, but it does not work with
2216 >>> # multi-line values
2217 >>> p.set_field_to_simple_value("Foo", "bar\nbin\n")
2218 Traceback (most recent call last):
2219 ...
2220 ValueError: Cannot use set_field_to_simple_value for values with newlines
2222 :param item: Name of the field to set. If the paragraph already
2223 contains the field, then it will be replaced. If the field exists,
2224 then it will preserve its order in the paragraph. Otherwise, it is
2225 added to the end of the paragraph.
2226 Note this can be a "paragraph key", which enables you to control
2227 *which* instance of a field is being replaced (in case of duplicate
2228 fields).
2229 :param simple_value: The text to use as the value. The value must not
2230 contain newlines. Leading and trailing will be stripped but space
2231 within the value is preserved. The value cannot contain comments
2232 (i.e. if the "#" token appears in the value, then it is considered
2233 a value rather than "start of a comment)
2234 :param preserve_original_field_comment: See the description for the
2235 parameter with the same name in the set_field_from_raw_string method.
2236 :param field_comment: See the description for the parameter with the same
2237 name in the set_field_from_raw_string method.
2238 """
2239 if "\n" in simple_value:
2240 raise ValueError(
2241 "Cannot use set_field_to_simple_value for values with newlines"
2242 )
2244 # Reformat it with a leading space and trailing newline. The latter because it is
2245 # necessary if there are any fields after it and the former because it looks nicer so
2246 # have single space after the field separator
2247 stripped = simple_value.strip()
2248 if stripped: 2248 ↛ 2252line 2248 didn't jump to line 2252, because the condition on line 2248 was never false
2249 raw_value = " " + stripped + "\n"
2250 else:
2251 # Special-case for empty values
2252 raw_value = "\n"
2253 self.set_field_from_raw_string(
2254 item,
2255 raw_value,
2256 preserve_original_field_comment=preserve_original_field_comment,
2257 field_comment=field_comment,
2258 )
2260 def set_field_from_raw_string(
2261 self,
2262 item, # type: ParagraphKey
2263 raw_string_value, # type: str
2264 *,
2265 preserve_original_field_comment=None, # type: Optional[bool]
2266 field_comment=None, # type: Optional[Commentish]
2267 ):
2268 # type: (...) -> None
2269 """Sets a field in this paragraph to a given text value
2271 In many cases, it is better for callers to just use the paragraph as
2272 if it was a dictionary. However, this method does enable to you choose
2273 the field comment (if any) and lets to have a higher degree of control
2274 over whitespace (on the first line), which can be a reason for using it.
2276 Example usage:
2278 >>> example_deb822_paragraph = '''
2279 ... Package: foo
2280 ... '''
2281 >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines())
2282 >>> p = next(iter(dfile))
2283 >>> raw_value = '''
2284 ... Build-Depends: debhelper-compat (= 12),
2285 ... some-other-bd,
2286 ... # Comment
2287 ... another-bd,
2288 ... '''.lstrip() # Remove leading newline, but *not* the trailing newline
2289 >>> fname, new_value = raw_value.split(':', 1)
2290 >>> p.set_field_from_raw_string(fname, new_value)
2291 >>> print(p.dump(), end='')
2292 Package: foo
2293 Build-Depends: debhelper-compat (= 12),
2294 some-other-bd,
2295 # Comment
2296 another-bd,
2297 >>> # Format preserved
2299 :param item: Name of the field to set. If the paragraph already
2300 contains the field, then it will be replaced. Otherwise, it is
2301 added to the end of the paragraph.
2302 Note this can be a "paragraph key", which enables you to control
2303 *which* instance of a field is being replaced (in case of duplicate
2304 fields).
2305 :param raw_string_value: The text to use as the value. The text must
2306 be valid deb822 syntax and is used *exactly* as it is given.
2307 Accordingly, multi-line values must include mandatory leading space
2308 on continuation lines, newlines after the value, etc. On the
2309 flip-side, any optional space or comments will be included.
2311 Note that the first line will *never* be read as a comment (if the
2312 first line of the value starts with a "#" then it will result
2313 in "Field-Name:#..." which is parsed as a value starting with "#"
2314 rather than a comment).
2315 :param preserve_original_field_comment: If True, then if there is an
2316 existing field and that has a comment, then the comment will remain
2317 after this operation. This is the default is the `field_comment`
2318 parameter is omitted.
2319 Note that if the parameter is True and the item is ambiguous, this
2320 will raise an AmbiguousDeb822FieldKeyError. When the parameter is
2321 omitted, the ambiguity is resolved automatically and if the resolved
2322 field has a comment then that will be preserved (assuming
2323 field_comment is None).
2324 :param field_comment: If not None, add or replace the comment for
2325 the field. Each string in the list will become one comment
2326 line (inserted directly before the field name). Will appear in the
2327 same order as they do in the list.
2329 If you want complete control over the formatting of the comments,
2330 then ensure that each line start with "#" and end with "\\n" before
2331 the call. Otherwise, leading/trailing whitespace is normalized
2332 and the missing "#"/"\\n" character is inserted.
2333 """
2335 new_content = [] # type: List[str]
2336 if preserve_original_field_comment is not None:
2337 if field_comment is not None: 2337 ↛ 2338line 2337 didn't jump to line 2338, because the condition on line 2337 was never true
2338 raise ValueError(
2339 'The "preserve_original_field_comment" conflicts with'
2340 ' "field_comment" parameter'
2341 )
2342 elif field_comment is not None:
2343 if not isinstance(field_comment, Deb822CommentElement): 2343 ↛ 2346line 2343 didn't jump to line 2346, because the condition on line 2343 was never false
2344 new_content.extend(_format_comment(x) for x in field_comment)
2345 field_comment = None
2346 preserve_original_field_comment = False
2348 field_name, _, _ = _unpack_key(item)
2350 cased_field_name = field_name
2351 try:
2352 original = self.get_kvpair_element(item, use_get=True)
2353 except AmbiguousDeb822FieldKeyError:
2354 if preserve_original_field_comment:
2355 # If we were asked to preserve the original comment, then we
2356 # require a strict lookup
2357 raise
2358 original = self.get_kvpair_element((field_name, 0), use_get=True)
2360 if preserve_original_field_comment is None:
2361 # We simplify preserve_original_field_comment after the lookup of the field.
2362 # Otherwise, we can get ambiguous key errors when updating an ambiguous field
2363 # when the caller did not explicitly ask for that behaviour.
2364 preserve_original_field_comment = True
2366 if original:
2367 # If we already have the field, then preserve the original case
2368 cased_field_name = original.field_name
2369 raw = ":".join((cased_field_name, raw_string_value))
2370 raw_lines = raw.splitlines(keepends=True)
2371 for i, line in enumerate(raw_lines, start=1):
2372 if not line.endswith("\n"): 2372 ↛ 2373line 2372 didn't jump to line 2373, because the condition on line 2372 was never true
2373 raise ValueError(
2374 "Line {i} in new value was missing trailing newline".format(i=i)
2375 )
2376 if i != 1 and line[0] not in (" ", "\t", "#"): 2376 ↛ 2377line 2376 didn't jump to line 2377
2377 msg = (
2378 "Line {i} in new value was invalid. It must either start"
2379 ' with " " space (continuation line) or "#" (comment line).'
2380 ' The line started with "{line}"'
2381 )
2382 raise ValueError(msg.format(i=i, line=line[0]))
2383 if len(raw_lines) > 1 and raw_lines[-1].startswith("#"): 2383 ↛ 2384line 2383 didn't jump to line 2384, because the condition on line 2383 was never true
2384 raise ValueError("The last line in a value field cannot be a comment")
2385 new_content.extend(raw_lines)
2386 # As absurd as it might seem, it is easier to just use the parser to
2387 # construct the AST correctly
2388 deb822_file = parse_deb822_file(iter(new_content))
2389 error_token = deb822_file.find_first_error_element()
2390 if error_token: 2390 ↛ 2391line 2390 didn't jump to line 2391, because the condition on line 2390 was never true
2391 raise ValueError("Syntax error in new field value for " + field_name)
2392 paragraph = next(iter(deb822_file))
2393 assert isinstance(paragraph, Deb822NoDuplicateFieldsParagraphElement)
2394 value = paragraph.get_kvpair_element(field_name)
2395 assert value is not None
2396 if preserve_original_field_comment:
2397 if original:
2398 value.comment_element = original.comment_element
2399 original.comment_element = None
2400 elif field_comment is not None: 2400 ↛ 2401line 2400 didn't jump to line 2401, because the condition on line 2400 was never true
2401 value.comment_element = field_comment
2402 self.set_kvpair_element(item, value)
2404 @overload
2405 def dump(
2406 self, fd # type: IO[bytes]
2407 ):
2408 # type: (...) -> None
2409 pass
2411 @overload
2412 def dump(self):
2413 # type: () -> str
2414 pass
2416 def dump(
2417 self, fd=None # type: Optional[IO[bytes]]
2418 ):
2419 # type: (...) -> Optional[str]
2420 if fd is None: 2420 ↛ 2422line 2420 didn't jump to line 2422, because the condition on line 2420 was never false
2421 return "".join(t.text for t in self.iter_tokens())
2422 for token in self.iter_tokens():
2423 fd.write(token.text.encode("utf-8"))
2424 return None
2427class Deb822NoDuplicateFieldsParagraphElement(Deb822ParagraphElement):
2428 """Paragraph implementation optimized for valid deb822 files
2430 When there are no duplicated fields, we can use simpler and faster
2431 datastructures for common operations.
2432 """
2434 def __init__(
2435 self,
2436 kvpair_elements, # type: List[Deb822KeyValuePairElement]
2437 kvpair_order, # type: OrderedSet
2438 ):
2439 # type: (...) -> None
2440 super().__init__()
2441 self._kvpair_elements = {kv.field_name: kv for kv in kvpair_elements}
2442 self._kvpair_order = kvpair_order
2443 self._init_parent_of_parts()
2445 @property
2446 def kvpair_count(self):
2447 # type: () -> int
2448 return len(self._kvpair_elements)
2450 def order_last(self, field):
2451 # type: (ParagraphKey) -> None
2452 """Re-order the given field so it is "last" in the paragraph"""
2453 unpacked_field, _, _ = _unpack_key(field, raise_if_indexed=True)
2454 self._kvpair_order.order_last(unpacked_field)
2456 def order_first(self, field):
2457 # type: (ParagraphKey) -> None
2458 """Re-order the given field so it is "first" in the paragraph"""
2459 unpacked_field, _, _ = _unpack_key(field, raise_if_indexed=True)
2460 self._kvpair_order.order_first(unpacked_field)
2462 def order_before(self, field, reference_field):
2463 # type: (ParagraphKey, ParagraphKey) -> None
2464 """Re-order the given field so appears directly after the reference field in the paragraph
2466 The reference field must be present."""
2467 unpacked_field, _, _ = _unpack_key(field, raise_if_indexed=True)
2468 unpacked_ref_field, _, _ = _unpack_key(reference_field, raise_if_indexed=True)
2469 self._kvpair_order.order_before(unpacked_field, unpacked_ref_field)
2471 def order_after(self, field, reference_field):
2472 # type: (ParagraphKey, ParagraphKey) -> None
2473 """Re-order the given field so appears directly before the reference field in the paragraph
2475 The reference field must be present.
2476 """
2477 unpacked_field, _, _ = _unpack_key(field, raise_if_indexed=True)
2478 unpacked_ref_field, _, _ = _unpack_key(reference_field, raise_if_indexed=True)
2479 self._kvpair_order.order_after(unpacked_field, unpacked_ref_field)
2481 # Overload to narrow the type to just str.
2482 def __iter__(self):
2483 # type: () -> Iterator[str]
2484 return iter(str(k) for k in self._kvpair_order)
2486 def iter_keys(self):
2487 # type: () -> Iterable[str]
2488 yield from (str(k) for k in self._kvpair_order)
2490 def remove_kvpair_element(self, key):
2491 # type: (ParagraphKey) -> None
2492 self._full_size_cache = None
2493 key, _, _ = _unpack_key(key, raise_if_indexed=True)
2494 del self._kvpair_elements[key]
2495 self._kvpair_order.remove(key)
2497 def contains_kvpair_element(self, item):
2498 # type: (object) -> bool
2499 if not isinstance(item, (str, tuple, Deb822FieldNameToken)): 2499 ↛ 2500line 2499 didn't jump to line 2500, because the condition on line 2499 was never true
2500 return False
2501 item = cast("ParagraphKey", item)
2502 key, _, _ = _unpack_key(item, raise_if_indexed=True)
2503 return key in self._kvpair_elements
2505 def get_kvpair_element(
2506 self,
2507 item, # type: ParagraphKey
2508 use_get=False, # type: bool
2509 ):
2510 # type: (...) -> Optional[Deb822KeyValuePairElement]
2511 item, _, _ = _unpack_key(item, raise_if_indexed=True)
2512 if use_get:
2513 return self._kvpair_elements.get(item)
2514 return self._kvpair_elements[item]
2516 def set_kvpair_element(self, key, value):
2517 # type: (ParagraphKey, Deb822KeyValuePairElement) -> None
2518 key, _, _ = _unpack_key(key, raise_if_indexed=True)
2519 if isinstance(key, Deb822FieldNameToken): 2519 ↛ 2520line 2519 didn't jump to line 2520, because the condition on line 2519 was never true
2520 if key is not value.field_token:
2521 raise ValueError(
2522 "Key is a Deb822FieldNameToken, but not *the* Deb822FieldNameToken"
2523 " for the value"
2524 )
2525 key = value.field_name
2526 else:
2527 if key != value.field_name: 2527 ↛ 2528line 2527 didn't jump to line 2528, because the condition on line 2527 was never true
2528 raise ValueError(
2529 "Cannot insert value under a different field value than field name"
2530 " from its Deb822FieldNameToken implies"
2531 )
2532 # Use the string from the Deb822FieldNameToken as we need to keep that in memory either
2533 # way
2534 key = value.field_name
2535 original_value = self._kvpair_elements.get(key)
2536 self._full_size_cache = None
2537 self._kvpair_elements[key] = value
2538 self._kvpair_order.append(key)
2539 if original_value is not None:
2540 original_value.parent_element = None
2541 value.parent_element = self
2543 def sort_fields(self, key=None):
2544 # type: (Optional[Callable[[str], Any]]) -> None
2545 """Re-order all fields
2547 :param key: Provide a key function (same semantics as for sorted). Keep in mind that
2548 the module preserve the cases for field names - in generally, callers are recommended
2549 to use "lower()" to normalize the case.
2550 """
2551 for last_field_name in reversed(self._kvpair_order):
2552 last_kvpair = self._kvpair_elements[cast("_strI", last_field_name)]
2553 if last_kvpair.value_element.add_final_newline_if_missing():
2554 self._full_size_cache = None
2555 break
2557 if key is None:
2558 key = default_field_sort_key
2560 self._kvpair_order = OrderedSet(sorted(self._kvpair_order, key=key))
2562 def iter_parts(self):
2563 # type: () -> Iterable[TokenOrElement]
2564 yield from (
2565 self._kvpair_elements[x]
2566 for x in cast("Iterable[_strI]", self._kvpair_order)
2567 )
2570class Deb822DuplicateFieldsParagraphElement(Deb822ParagraphElement):
2572 def __init__(self, kvpair_elements):
2573 # type: (List[Deb822KeyValuePairElement]) -> None
2574 super().__init__()
2575 self._kvpair_order = LinkedList() # type: LinkedList[Deb822KeyValuePairElement]
2576 self._kvpair_elements = {} # type: Dict[_strI, List[KVPNode]]
2577 self._init_kvpair_fields(kvpair_elements)
2578 self._init_parent_of_parts()
2580 @property
2581 def has_duplicate_fields(self):
2582 # type: () -> bool
2583 # Most likely, the answer is "True" but if the caller "fixes" the problem
2584 # then this can return "False"
2585 return len(self._kvpair_order) > len(self._kvpair_elements)
2587 def _init_kvpair_fields(self, kvpairs):
2588 # type: (Iterable[Deb822KeyValuePairElement]) -> None
2589 assert not self._kvpair_order
2590 assert not self._kvpair_elements
2591 for kv in kvpairs:
2592 field_name = kv.field_name
2593 node = self._kvpair_order.append(kv)
2594 if field_name not in self._kvpair_elements:
2595 self._kvpair_elements[field_name] = [node]
2596 else:
2597 self._kvpair_elements[field_name].append(node)
2599 def _nodes_being_relocated(self, field):
2600 # type: (ParagraphKey) -> Tuple[List[KVPNode], List[KVPNode]]
2601 key, index, name_token = _unpack_key(field)
2602 nodes = self._kvpair_elements[key]
2603 nodes_being_relocated = []
2605 if name_token is not None or index is not None:
2606 single_node = self._resolve_to_single_node(nodes, key, index, name_token)
2607 assert single_node is not None
2608 nodes_being_relocated.append(single_node)
2609 else:
2610 nodes_being_relocated = nodes
2611 return nodes, nodes_being_relocated
2613 def order_last(self, field):
2614 # type: (ParagraphKey) -> None
2615 """Re-order the given field so it is "last" in the paragraph"""
2616 nodes, nodes_being_relocated = self._nodes_being_relocated(field)
2617 assert len(nodes_being_relocated) == 1 or len(nodes) == len(
2618 nodes_being_relocated
2619 )
2621 kvpair_order = self._kvpair_order
2622 for node in nodes_being_relocated:
2623 if kvpair_order.tail_node is node:
2624 # Special case for relocating a single node that happens to be the last.
2625 continue
2626 kvpair_order.remove_node(node)
2627 # assertion for mypy
2628 assert kvpair_order.tail_node is not None
2629 kvpair_order.insert_node_after(node, kvpair_order.tail_node)
2631 if (
2632 len(nodes_being_relocated) == 1
2633 and nodes_being_relocated[0] is not nodes[-1]
2634 ):
2635 single_node = nodes_being_relocated[0]
2636 nodes.remove(single_node)
2637 nodes.append(single_node)
2639 def order_first(self, field):
2640 # type: (ParagraphKey) -> None
2641 """Re-order the given field so it is "first" in the paragraph"""
2642 nodes, nodes_being_relocated = self._nodes_being_relocated(field)
2643 assert len(nodes_being_relocated) == 1 or len(nodes) == len(
2644 nodes_being_relocated
2645 )
2647 kvpair_order = self._kvpair_order
2648 for node in nodes_being_relocated:
2649 if kvpair_order.head_node is node:
2650 # Special case for relocating a single node that happens to be the first.
2651 continue
2652 kvpair_order.remove_node(node)
2653 # assertion for mypy
2654 assert kvpair_order.head_node is not None
2655 kvpair_order.insert_node_before(node, kvpair_order.head_node)
2657 if len(nodes_being_relocated) == 1 and nodes_being_relocated[0] is not nodes[0]:
2658 single_node = nodes_being_relocated[0]
2659 nodes.remove(single_node)
2660 nodes.insert(0, single_node)
2662 def order_before(self, field, reference_field):
2663 # type: (ParagraphKey, ParagraphKey) -> None
2664 """Re-order the given field so appears directly after the reference field in the paragraph
2666 The reference field must be present."""
2667 nodes, nodes_being_relocated = self._nodes_being_relocated(field)
2668 assert len(nodes_being_relocated) == 1 or len(nodes) == len(
2669 nodes_being_relocated
2670 )
2671 # For "before" we always use the "first" variant as reference in case of doubt
2672 _, reference_nodes = self._nodes_being_relocated(reference_field)
2673 reference_node = reference_nodes[0]
2674 if reference_node in nodes_being_relocated:
2675 raise ValueError("Cannot re-order a field relative to itself")
2677 kvpair_order = self._kvpair_order
2678 for node in nodes_being_relocated:
2679 kvpair_order.remove_node(node)
2680 kvpair_order.insert_node_before(node, reference_node)
2682 if len(nodes_being_relocated) == 1 and len(nodes) > 1:
2683 # Regenerate the (new) relative field order.
2684 field_name = nodes_being_relocated[0].value.field_name
2685 self._regenerate_relative_kvapir_order(field_name)
2687 def order_after(self, field, reference_field):
2688 # type: (ParagraphKey, ParagraphKey) -> None
2689 """Re-order the given field so appears directly before the reference field in the paragraph
2691 The reference field must be present.
2692 """
2693 nodes, nodes_being_relocated = self._nodes_being_relocated(field)
2694 assert len(nodes_being_relocated) == 1 or len(nodes) == len(
2695 nodes_being_relocated
2696 )
2697 _, reference_nodes = self._nodes_being_relocated(reference_field)
2698 # For "after" we always use the "last" variant as reference in case of doubt
2699 reference_node = reference_nodes[-1]
2700 if reference_node in nodes_being_relocated:
2701 raise ValueError("Cannot re-order a field relative to itself")
2703 kvpair_order = self._kvpair_order
2704 # Use "reversed" to preserve the relative order of the nodes assuming a bulk reorder
2705 for node in reversed(nodes_being_relocated):
2706 kvpair_order.remove_node(node)
2707 kvpair_order.insert_node_after(node, reference_node)
2709 if len(nodes_being_relocated) == 1 and len(nodes) > 1:
2710 # Regenerate the (new) relative field order.
2711 field_name = nodes_being_relocated[0].value.field_name
2712 self._regenerate_relative_kvapir_order(field_name)
2714 def _regenerate_relative_kvapir_order(self, field_name):
2715 # type: (_strI) -> None
2716 nodes = []
2717 for node in self._kvpair_order.iter_nodes():
2718 if node.value.field_name == field_name:
2719 nodes.append(node)
2720 self._kvpair_elements[field_name] = nodes
2722 def iter_parts(self):
2723 # type: () -> Iterable[TokenOrElement]
2724 yield from self._kvpair_order
2726 @property
2727 def kvpair_count(self):
2728 # type: () -> int
2729 return len(self._kvpair_order)
2731 def iter_keys(self):
2732 # type: () -> Iterable[ParagraphKey]
2733 yield from (kv.field_name for kv in self._kvpair_order)
2735 def _resolve_to_single_node(
2736 self,
2737 nodes, # type: List[KVPNode]
2738 key, # type: str
2739 index, # type: Optional[int]
2740 name_token, # type: Optional[Deb822FieldNameToken]
2741 use_get=False, # type: bool
2742 ):
2743 # type: (...) -> Optional[KVPNode]
2744 if index is None:
2745 if len(nodes) != 1:
2746 if name_token is not None:
2747 node = self._find_node_via_name_token(name_token, nodes)
2748 if node is not None:
2749 return node
2750 msg = (
2751 "Ambiguous key {key} - the field appears {res_len} times. Use"
2752 " ({key}, index) to denote which instance of the field you want. (Index"
2753 " can be 0..{res_len_1} or e.g. -1 to denote the last field)"
2754 )
2755 raise AmbiguousDeb822FieldKeyError(
2756 msg.format(key=key, res_len=len(nodes), res_len_1=len(nodes) - 1)
2757 )
2758 index = 0
2759 try:
2760 return nodes[index]
2761 except IndexError:
2762 if use_get:
2763 return None
2764 msg = 'Field "{key}" was present but the index "{index}" was invalid.'
2765 raise KeyError(msg.format(key=key, index=index))
2767 def get_kvpair_element(
2768 self,
2769 item, # type: ParagraphKey
2770 use_get=False, # type: bool
2771 ):
2772 # type: (...) -> Optional[Deb822KeyValuePairElement]
2773 key, index, name_token = _unpack_key(item)
2774 if use_get:
2775 nodes = self._kvpair_elements.get(key)
2776 if nodes is None:
2777 return None
2778 else:
2779 nodes = self._kvpair_elements[key]
2780 node = self._resolve_to_single_node(
2781 nodes, key, index, name_token, use_get=use_get
2782 )
2783 if node is not None:
2784 return node.value
2785 return None
2787 @staticmethod
2788 def _find_node_via_name_token(
2789 name_token, # type: Deb822FieldNameToken
2790 elements, # type: Iterable[KVPNode]
2791 ):
2792 # type: (...) -> Optional[KVPNode]
2793 # if we are given a name token, then it is non-ambiguous if we have exactly
2794 # that name token in our list of nodes. It will be an O(n) lookup but we
2795 # probably do not have that many duplicate fields (and even if do, it is not
2796 # exactly a valid file, so there little reason to optimize for it)
2797 for node in elements:
2798 if name_token is node.value.field_token:
2799 return node
2800 return None
2802 def contains_kvpair_element(self, item):
2803 # type: (object) -> bool
2804 if not isinstance(item, (str, tuple, Deb822FieldNameToken)):
2805 return False
2806 item = cast("ParagraphKey", item)
2807 try:
2808 return self.get_kvpair_element(item, use_get=True) is not None
2809 except AmbiguousDeb822FieldKeyError:
2810 return True
2812 def set_kvpair_element(self, key, value):
2813 # type: (ParagraphKey, Deb822KeyValuePairElement) -> None
2814 key, index, name_token = _unpack_key(key)
2815 if name_token:
2816 if name_token is not value.field_token:
2817 original_nodes = self._kvpair_elements.get(value.field_name)
2818 original_node = None
2819 if original_nodes is not None:
2820 original_node = self._find_node_via_name_token(
2821 name_token, original_nodes
2822 )
2824 if original_node is None:
2825 raise ValueError(
2826 "Key is a Deb822FieldNameToken, but not *the*"
2827 " Deb822FieldNameToken for the value nor the"
2828 " Deb822FieldNameToken for an existing field in the paragraph"
2829 )
2830 # Primarily for mypy's sake
2831 assert original_nodes is not None
2832 # Rely on the index-based code below to handle update.
2833 index = original_nodes.index(original_node)
2834 key = value.field_name
2835 else:
2836 if key != value.field_name:
2837 raise ValueError(
2838 "Cannot insert value under a different field value than field name"
2839 " from its Deb822FieldNameToken implies"
2840 )
2841 # Use the string from the Deb822FieldNameToken as it is a _strI and has the same value
2842 # (memory optimization)
2843 key = value.field_name
2844 self._full_size_cache = None
2845 original_nodes = self._kvpair_elements.get(key)
2846 if original_nodes is None or not original_nodes:
2847 if index is not None and index != 0:
2848 msg = (
2849 "Cannot replace field ({key}, {index}) as the field does not exist"
2850 " in the first place. Please index-less key or ({key}, 0) if you"
2851 " want to add the field."
2852 )
2853 raise KeyError(msg.format(key=key, index=index))
2854 node = self._kvpair_order.append(value)
2855 if key not in self._kvpair_elements:
2856 self._kvpair_elements[key] = [node]
2857 else:
2858 self._kvpair_elements[key].append(node)
2859 return
2861 replace_all = False
2862 if index is None:
2863 replace_all = True
2864 node = original_nodes[0]
2865 if len(original_nodes) != 1:
2866 self._kvpair_elements[key] = [node]
2867 else:
2868 # We insist on there being an original node, which as a side effect ensures
2869 # you cannot add additional copies of the field. This means that you cannot
2870 # make the problem worse.
2871 node = original_nodes[index]
2873 # Replace the value of the existing node plus do a little dance
2874 # for the parent element part.
2875 node.value.parent_element = None
2876 value.parent_element = self
2877 node.value = value
2879 if replace_all and len(original_nodes) != 1:
2880 # If we were in a replace-all mode, discard any remaining nodes
2881 for n in original_nodes[1:]:
2882 n.value.parent_element = None
2883 self._kvpair_order.remove_node(n)
2885 def remove_kvpair_element(self, key):
2886 # type: (ParagraphKey) -> None
2887 key, idx, name_token = _unpack_key(key)
2888 field_list = self._kvpair_elements[key]
2890 if name_token is None and idx is None:
2891 self._full_size_cache = None
2892 # Remove all case
2893 for node in field_list:
2894 node.value.parent_element = None
2895 self._kvpair_order.remove_node(node)
2896 del self._kvpair_elements[key]
2897 return
2899 if name_token is not None:
2900 # Indirection between original_node and node for mypy's sake
2901 original_node = self._find_node_via_name_token(name_token, field_list)
2902 if original_node is None:
2903 msg = 'The field "{key}" is present but key used to access it is not.'
2904 raise KeyError(msg.format(key=key))
2905 node = original_node
2906 else:
2907 assert idx is not None
2908 try:
2909 node = field_list[idx]
2910 except KeyError:
2911 msg = 'The field "{key}" is present, but the index "{idx}" was invalid.'
2912 raise KeyError(msg.format(key=key, idx=idx))
2914 self._full_size_cache = None
2915 if len(field_list) == 1:
2916 del self._kvpair_elements[key]
2917 else:
2918 field_list.remove(node)
2919 node.value.parent_element = None
2920 self._kvpair_order.remove_node(node)
2922 def sort_fields(self, key=None):
2923 # type: (Optional[Callable[[str], Any]]) -> None
2924 """Re-order all fields
2926 :param key: Provide a key function (same semantics as for sorted). Keep in mind that
2927 the module preserve the cases for field names - in generally, callers are recommended
2928 to use "lower()" to normalize the case.
2929 """
2931 if key is None:
2932 key = default_field_sort_key
2934 # Work around mypy that cannot seem to shred the Optional notion
2935 # without this little indirection
2936 key_impl = key
2938 def _actual_key(kvpair):
2939 # type: (Deb822KeyValuePairElement) -> Any
2940 return key_impl(kvpair.field_name)
2942 for last_kvpair in reversed(self._kvpair_order):
2943 if last_kvpair.value_element.add_final_newline_if_missing():
2944 self._full_size_cache = None
2945 break
2947 sorted_kvpair_list = sorted(self._kvpair_order, key=_actual_key)
2948 self._kvpair_order = LinkedList()
2949 self._kvpair_elements = {}
2950 self._init_kvpair_fields(sorted_kvpair_list)
2953class Deb822FileElement(Deb822Element):
2954 """Represents the entire deb822 file"""
2956 def __init__(self, token_and_elements):
2957 # type: (LinkedList[TokenOrElement]) -> None
2958 super().__init__()
2959 self._token_and_elements = token_and_elements
2960 self._init_parent_of_parts()
2962 @classmethod
2963 def new_empty_file(cls):
2964 # type: () -> Deb822FileElement
2965 """Creates a new Deb822FileElement with no contents
2967 Note that a deb822 file must be non-empty to be considered valid
2968 """
2969 return cls(LinkedList())
2971 @property
2972 def is_valid_file(self):
2973 # type: () -> bool
2974 """Returns true if the file is valid
2976 Invalid elements include error elements (Deb822ErrorElement) but also
2977 issues such as paragraphs with duplicate fields or "empty" files
2978 (a valid deb822 file contains at least one paragraph).
2979 """
2980 had_paragraph = False
2981 for paragraph in self:
2982 had_paragraph = True
2983 if not paragraph or paragraph.has_duplicate_fields:
2984 return False
2986 if not had_paragraph:
2987 return False
2989 return self.find_first_error_element() is None
2991 def find_first_error_element(self):
2992 # type: () -> Optional[Deb822ErrorElement]
2993 """Returns the first Deb822ErrorElement (or None) in the file"""
2994 return next(
2995 iter(self.iter_recurse(only_element_or_token_type=Deb822ErrorElement)), None
2996 )
2998 def __iter__(self):
2999 # type: () -> Iterator[Deb822ParagraphElement]
3000 return iter(self.iter_parts_of_type(Deb822ParagraphElement))
3002 def iter_parts(self):
3003 # type: () -> Iterable[TokenOrElement]
3004 yield from self._token_and_elements
3006 def insert(self, idx, para):
3007 # type: (int, Deb822ParagraphElement) -> None
3008 """Inserts a paragraph into the file at the given "index" of paragraphs
3010 Note that if the index is between two paragraphs containing a "free
3011 floating" comment (e.g. paragraph/start-of-file, empty line, comment,
3012 empty line, paragraph) then it is unspecified which "side" of the
3013 comment the new paragraph will appear and this may change between
3014 versions of python-debian.
3017 >>> original = '''
3018 ... Package: libfoo-dev
3019 ... Depends: libfoo1 (= ${binary:Version}), ${shlib:Depends}, ${misc:Depends}
3020 ... '''.lstrip()
3021 >>> deb822_file = parse_deb822_file(original.splitlines())
3022 >>> para1 = Deb822ParagraphElement.new_empty_paragraph()
3023 >>> para1["Source"] = "foo"
3024 >>> para1["Build-Depends"] = "debhelper-compat (= 13)"
3025 >>> para2 = Deb822ParagraphElement.new_empty_paragraph()
3026 >>> para2["Package"] = "libfoo1"
3027 >>> para2["Depends"] = "${shlib:Depends}, ${misc:Depends}"
3028 >>> deb822_file.insert(0, para1)
3029 >>> deb822_file.insert(1, para2)
3030 >>> expected = '''
3031 ... Source: foo
3032 ... Build-Depends: debhelper-compat (= 13)
3033 ...
3034 ... Package: libfoo1
3035 ... Depends: ${shlib:Depends}, ${misc:Depends}
3036 ...
3037 ... Package: libfoo-dev
3038 ... Depends: libfoo1 (= ${binary:Version}), ${shlib:Depends}, ${misc:Depends}
3039 ... '''.lstrip()
3040 >>> deb822_file.dump() == expected
3041 True
3042 """
3044 anchor_node = None
3045 needs_newline = True
3046 self._full_size_cache = None
3047 if idx == 0:
3048 # Special-case, if idx is 0, then we insert it before everything else.
3049 # This is mostly a cosmetic choice for corner cases involving free-floating
3050 # comments in the file.
3051 if not self._token_and_elements: 3051 ↛ 3052line 3051 didn't jump to line 3052, because the condition on line 3051 was never true
3052 self.append(para)
3053 return
3054 anchor_node = self._token_and_elements.head_node
3055 needs_newline = bool(self._token_and_elements)
3056 else:
3057 i = 0
3058 for node in self._token_and_elements.iter_nodes(): 3058 ↛ 3066line 3058 didn't jump to line 3066, because the loop on line 3058 didn't complete
3059 entry = node.value
3060 if isinstance(entry, Deb822ParagraphElement):
3061 i += 1
3062 if idx == i - 1:
3063 anchor_node = node
3064 break
3066 if anchor_node is None: 3066 ↛ 3068line 3066 didn't jump to line 3068, because the condition on line 3066 was never true
3067 # Empty list or idx after the last paragraph both degenerate into append
3068 self.append(para)
3069 else:
3070 if needs_newline: 3070 ↛ 3076line 3070 didn't jump to line 3076, because the condition on line 3070 was never false
3071 # Remember to inject the "separating" newline between two paragraphs
3072 nl_token = self._set_parent(Deb822WhitespaceToken("\n"))
3073 anchor_node = self._token_and_elements.insert_before(
3074 nl_token, anchor_node
3075 )
3076 self._token_and_elements.insert_before(self._set_parent(para), anchor_node)
3078 def append(self, paragraph):
3079 # type: (Deb822ParagraphElement) -> None
3080 """Appends a paragraph to the file
3082 >>> deb822_file = Deb822FileElement.new_empty_file()
3083 >>> para1 = Deb822ParagraphElement.new_empty_paragraph()
3084 >>> para1["Source"] = "foo"
3085 >>> para1["Build-Depends"] = "debhelper-compat (= 13)"
3086 >>> para2 = Deb822ParagraphElement.new_empty_paragraph()
3087 >>> para2["Package"] = "foo"
3088 >>> para2["Depends"] = "${shlib:Depends}, ${misc:Depends}"
3089 >>> deb822_file.append(para1)
3090 >>> deb822_file.append(para2)
3091 >>> expected = '''
3092 ... Source: foo
3093 ... Build-Depends: debhelper-compat (= 13)
3094 ...
3095 ... Package: foo
3096 ... Depends: ${shlib:Depends}, ${misc:Depends}
3097 ... '''.lstrip()
3098 >>> deb822_file.dump() == expected
3099 True
3100 """
3101 tail_element = self._token_and_elements.tail
3102 if paragraph.parent_element is not None: 3102 ↛ 3103line 3102 didn't jump to line 3103, because the condition on line 3102 was never true
3103 if paragraph.parent_element is self:
3104 raise ValueError("Paragraph is already a part of this file")
3105 raise ValueError("Paragraph is already part of another Deb822File")
3107 self._full_size_cache = None
3108 # We need a separating newline if there is not a whitespace token at the end of the file.
3109 # Note the special case where the file ends on a comment; here we insert a whitespace too
3110 # to be sure. Otherwise, we would have to check that there is an empty line before that
3111 # comment and that is too much effort.
3112 if tail_element and not isinstance(tail_element, Deb822WhitespaceToken):
3113 self._token_and_elements.append(
3114 self._set_parent(Deb822WhitespaceToken("\n"))
3115 )
3116 self._token_and_elements.append(self._set_parent(paragraph))
3118 def remove(self, paragraph):
3119 # type: (Deb822ParagraphElement) -> None
3120 if paragraph.parent_element is not self:
3121 raise ValueError("Paragraph is part of a different file")
3122 node = None
3123 for node in self._token_and_elements.iter_nodes():
3124 if node.value is paragraph:
3125 break
3126 if node is None:
3127 raise RuntimeError("unable to find paragraph")
3128 self._full_size_cache = None
3129 previous_node = node.previous_node
3130 next_node = node.next_node
3131 self._token_and_elements.remove_node(node)
3132 if next_node is None:
3133 if previous_node and isinstance(previous_node.value, Deb822WhitespaceToken):
3134 self._token_and_elements.remove_node(previous_node)
3135 else:
3136 if isinstance(next_node.value, Deb822WhitespaceToken):
3137 self._token_and_elements.remove_node(next_node)
3138 paragraph.parent_element = None
3140 def _set_parent(self, t):
3141 # type: (TE) -> TE
3142 t.parent_element = self
3143 return t
3145 def position_in_parent(self, *, skip_leading_comments: bool = True) -> Position:
3146 # Recursive base-case
3147 return START_POSITION
3149 def position_in_file(self, *, skip_leading_comments: bool = True) -> Position:
3150 # By definition
3151 return START_POSITION
3153 @overload
3154 def dump(
3155 self, fd # type: IO[bytes]
3156 ):
3157 # type: (...) -> None
3158 pass
3160 @overload
3161 def dump(self):
3162 # type: () -> str
3163 pass
3165 def dump(
3166 self, fd=None # type: Optional[IO[bytes]]
3167 ):
3168 # type: (...) -> Optional[str]
3169 if fd is None: 3169 ↛ 3171line 3169 didn't jump to line 3171, because the condition on line 3169 was never false
3170 return "".join(t.text for t in self.iter_tokens())
3171 for token in self.iter_tokens():
3172 fd.write(token.text.encode("utf-8"))
3173 return None
3176_combine_error_tokens_into_elements = combine_into_replacement(
3177 Deb822ErrorToken, Deb822ErrorElement
3178)
3179_combine_comment_tokens_into_elements = combine_into_replacement(
3180 Deb822CommentToken, Deb822CommentElement
3181)
3182_combine_vl_elements_into_value_elements = combine_into_replacement(
3183 Deb822ValueLineElement, Deb822ValueElement
3184)
3185_combine_kvp_elements_into_paragraphs = combine_into_replacement(
3186 Deb822KeyValuePairElement,
3187 Deb822ParagraphElement,
3188 constructor=Deb822ParagraphElement.from_kvpairs,
3189)
3192def _parsed_value_render_factory(discard_comments):
3193 # type: (bool) -> Callable[[Deb822ParsedValueElement], str]
3194 return (
3195 Deb822ParsedValueElement.convert_to_text_without_comments
3196 if discard_comments
3197 else Deb822ParsedValueElement.convert_to_text
3198 )
3201LIST_SPACE_SEPARATED_INTERPRETATION = ListInterpretation(
3202 whitespace_split_tokenizer,
3203 _parse_whitespace_list_value,
3204 Deb822ParsedValueElement,
3205 Deb822SemanticallySignificantWhiteSpace,
3206 lambda: Deb822SpaceSeparatorToken(" "),
3207 _parsed_value_render_factory,
3208)
3209LIST_COMMA_SEPARATED_INTERPRETATION = ListInterpretation(
3210 comma_split_tokenizer,
3211 _parse_comma_list_value,
3212 Deb822ParsedValueElement,
3213 Deb822CommaToken,
3214 Deb822CommaToken,
3215 _parsed_value_render_factory,
3216)
3217LIST_UPLOADERS_INTERPRETATION = ListInterpretation(
3218 comma_split_tokenizer,
3219 _parse_uploaders_list_value,
3220 Deb822ParsedValueElement,
3221 Deb822CommaToken,
3222 Deb822CommaToken,
3223 _parsed_value_render_factory,
3224)
3227def _non_end_of_line_token(v):
3228 # type: (TokenOrElement) -> bool
3229 # Consume tokens until the newline
3230 return not isinstance(v, Deb822WhitespaceToken) or v.text != "\n"
3233def _build_value_line(
3234 token_stream, # type: Iterable[Union[TokenOrElement, Deb822CommentElement]]
3235):
3236 # type: (...) -> Iterable[Union[TokenOrElement, Deb822ValueLineElement]]
3237 """Parser helper - consumes tokens part of a Deb822ValueEntryElement and turns them into one"""
3238 buffered_stream = BufferingIterator(token_stream)
3240 # Deb822ValueLineElement is a bit tricky because of how we handle whitespace
3241 # and comments.
3242 #
3243 # In relation to comments, then only continuation lines can have comments.
3244 # If there is a comment before a "K: V" line, then the comment is associated
3245 # with the field rather than the value.
3246 #
3247 # On the whitespace front, then we separate syntactical mandatory whitespace
3248 # from optional whitespace. As an example:
3249 #
3250 # """
3251 # # some comment associated with the Depends field
3252 # Depends:_foo_$
3253 # # some comment associated with the line containing "bar"
3254 # !________bar_$
3255 # """
3256 #
3257 # Where "$" and "!" represents mandatory whitespace (the newline and the first
3258 # space are required for the file to be parsed correctly), where as "_" is
3259 # "optional" whitespace (from a syntactical point of view).
3260 #
3261 # This distinction enable us to facilitate APIs for easy removal/normalization
3262 # of redundant whitespaces without having programmers worry about trashing
3263 # the file.
3264 #
3265 #
3267 comment_element = None
3268 continuation_line_token = None
3269 token = None # type: Optional[TokenOrElement]
3271 for token in buffered_stream:
3272 start_of_value_entry = False
3273 if isinstance(token, Deb822ValueContinuationToken):
3274 continuation_line_token = token
3275 start_of_value_entry = True
3276 token = None
3277 elif isinstance(token, Deb822FieldSeparatorToken):
3278 start_of_value_entry = True
3279 elif isinstance(token, Deb822CommentElement):
3280 next_token = buffered_stream.peek()
3281 # If the next token is a continuation line token, then this comment
3282 # belong to a value and we might as well just start the value
3283 # parsing now.
3284 #
3285 # Note that we rely on this behaviour to avoid emitting the comment
3286 # token (failing to do so would cause the comment to appear twice
3287 # in the file).
3288 if isinstance(next_token, Deb822ValueContinuationToken):
3289 start_of_value_entry = True
3290 comment_element = token
3291 token = None
3292 # Use next with None to avoid raising StopIteration inside a generator
3293 # It won't happen, but pylint cannot see that, so we do this instead.
3294 continuation_line_token = cast(
3295 "Deb822ValueContinuationToken", next(buffered_stream, None)
3296 )
3297 assert continuation_line_token is not None
3299 if token is not None:
3300 yield token
3301 if start_of_value_entry:
3302 tokens_in_value = list(buffered_stream.takewhile(_non_end_of_line_token))
3303 eol_token = cast("Deb822WhitespaceToken", next(buffered_stream, None))
3304 assert eol_token is None or eol_token.text == "\n"
3305 leading_whitespace = None
3306 trailing_whitespace = None
3307 # "Depends:\n foo" would cause tokens_in_value to be empty for the
3308 # first "value line" (the empty part between ":" and "\n")
3309 if tokens_in_value: 3309 ↛ 3323line 3309 didn't jump to line 3323, because the condition on line 3309 was never false
3310 # Another special-case, "Depends: \n foo" (i.e. space after colon)
3311 # should not introduce an IndexError
3312 if isinstance(tokens_in_value[-1], Deb822WhitespaceToken):
3313 trailing_whitespace = cast(
3314 "Deb822WhitespaceToken", tokens_in_value.pop()
3315 )
3316 if tokens_in_value and isinstance( 3316 ↛ 3319line 3316 didn't jump to line 3319, because the condition on line 3316 was never true
3317 tokens_in_value[-1], Deb822WhitespaceToken
3318 ):
3319 leading_whitespace = cast(
3320 "Deb822WhitespaceToken", tokens_in_value[0]
3321 )
3322 tokens_in_value = tokens_in_value[1:]
3323 yield Deb822ValueLineElement(
3324 comment_element,
3325 continuation_line_token,
3326 leading_whitespace,
3327 tokens_in_value,
3328 trailing_whitespace,
3329 eol_token,
3330 )
3331 comment_element = None
3332 continuation_line_token = None
3335def _build_field_with_value(
3336 token_stream, # type: Iterable[Union[TokenOrElement, Deb822ValueElement]]
3337):
3338 # type: (...) -> Iterable[Union[TokenOrElement, Deb822KeyValuePairElement]]
3339 buffered_stream = BufferingIterator(token_stream)
3340 for token_or_element in buffered_stream:
3341 start_of_field = False
3342 comment_element = None
3343 if isinstance(token_or_element, Deb822FieldNameToken):
3344 start_of_field = True
3345 elif isinstance(token_or_element, Deb822CommentElement):
3346 comment_element = token_or_element
3347 next_token = buffered_stream.peek()
3348 start_of_field = isinstance(next_token, Deb822FieldNameToken)
3349 if start_of_field: 3349 ↛ 3356line 3349 didn't jump to line 3356, because the condition on line 3349 was never false
3350 # Remember to consume the field token
3351 try:
3352 token_or_element = next(buffered_stream)
3353 except StopIteration: # pragma: no cover
3354 raise AssertionError
3356 if start_of_field:
3357 field_name = token_or_element
3358 separator = next(buffered_stream, None)
3359 value_element = next(buffered_stream, None)
3360 if separator is None or value_element is None: 3360 ↛ 3363line 3360 didn't jump to line 3363, because the condition on line 3360 was never true
3361 # Early EOF - should not be possible with how the tokenizer works
3362 # right now, but now it is future-proof.
3363 if comment_element:
3364 yield comment_element
3365 error_elements = [field_name]
3366 if separator is not None:
3367 error_elements.append(separator)
3368 yield Deb822ErrorElement(error_elements)
3369 return
3371 if isinstance(separator, Deb822FieldSeparatorToken) and isinstance( 3371 ↛ 3382line 3371 didn't jump to line 3382, because the condition on line 3371 was never false
3372 value_element, Deb822ValueElement
3373 ):
3374 yield Deb822KeyValuePairElement(
3375 comment_element,
3376 cast("Deb822FieldNameToken", field_name),
3377 separator,
3378 value_element,
3379 )
3380 else:
3381 # We had a parse error, consume until the newline.
3382 error_tokens = [token_or_element] # type: List[TokenOrElement]
3383 error_tokens.extend(buffered_stream.takewhile(_non_end_of_line_token))
3384 nl = buffered_stream.peek()
3385 # Take the newline as well if present
3386 if nl and isinstance(nl, Deb822NewlineAfterValueToken):
3387 next(buffered_stream, None)
3388 error_tokens.append(nl)
3389 yield Deb822ErrorElement(error_tokens)
3390 else:
3391 # Token is not part of a field, emit it as-is
3392 yield token_or_element
3395def _abort_on_error_tokens(sequence):
3396 # type: (Iterable[TokenOrElement]) -> Iterable[TokenOrElement]
3397 line_no = 1
3398 for token in sequence:
3399 # We are always called while the sequence consists entirely of tokens
3400 if token.is_error: 3400 ↛ 3401line 3400 didn't jump to line 3401, because the condition on line 3400 was never true
3401 error_as_text = token.convert_to_text().replace("\n", "\\n")
3402 raise SyntaxOrParseError(
3403 'Syntax or Parse error on or near line {line_no}: "{error_as_text}"'.format(
3404 error_as_text=error_as_text, line_no=line_no
3405 )
3406 )
3407 line_no += token.convert_to_text().count("\n")
3408 yield token
3411def parse_deb822_file(
3412 sequence, # type: Union[Iterable[Union[str, bytes]], str]
3413 *,
3414 accept_files_with_error_tokens=False, # type: bool
3415 accept_files_with_duplicated_fields=False, # type: bool
3416 encoding="utf-8", # type: str
3417):
3418 # type: (...) -> Deb822FileElement
3419 """
3421 :param sequence: An iterable over lines of str or bytes (an open file for
3422 reading will do). If line endings are provided in the input, then they
3423 must be present on every line (except the last) will be preserved as-is.
3424 If omitted and the content is at least 2 lines, then parser will assume
3425 implicit newlines.
3426 :param accept_files_with_error_tokens: If True, files with critical syntax
3427 or parse errors will be returned as "successfully" parsed. Usually,
3428 working on files with this kind of errors are not desirable as it is
3429 hard to make sense of such files (and they might in fact not be a deb822
3430 file at all). When set to False (the default) a ValueError is raised if
3431 there is a critical syntax or parse error.
3432 Note that duplicated fields in a paragraph is not considered a critical
3433 parse error by this parser as the implementation can gracefully cope
3434 with these. Use accept_files_with_duplicated_fields to determine if
3435 such files should be accepted.
3436 :param accept_files_with_duplicated_fields: If True, then
3437 files containing paragraphs with duplicated fields will be returned as
3438 "successfully" parsed even though they are invalid according to the
3439 specification. The paragraphs will prefer the first appearance of the
3440 field unless caller explicitly requests otherwise (e.g., via
3441 Deb822ParagraphElement.configured_view). If False, then this method
3442 will raise a ValueError if any duplicated fields are seen inside any
3443 paragraph.
3444 :param encoding: The encoding to use (this is here to support Deb822-like
3445 APIs, new code should not use this parameter).
3446 """
3448 if isinstance(sequence, (str, bytes)): 3448 ↛ 3450line 3448 didn't jump to line 3450, because the condition on line 3448 was never true
3449 # Match the deb822 API.
3450 sequence = sequence.splitlines(True)
3452 # The order of operations are important here. As an example,
3453 # _build_value_line assumes that all comment tokens have been merged
3454 # into comment elements. Likewise, _build_field_and_value assumes
3455 # that value tokens (along with their comments) have been combined
3456 # into elements.
3457 tokens = tokenize_deb822_file(
3458 sequence, encoding=encoding
3459 ) # type: Iterable[TokenOrElement]
3460 if not accept_files_with_error_tokens:
3461 tokens = _abort_on_error_tokens(tokens)
3462 tokens = _combine_comment_tokens_into_elements(tokens)
3463 tokens = _build_value_line(tokens)
3464 tokens = _combine_vl_elements_into_value_elements(tokens)
3465 tokens = _build_field_with_value(tokens)
3466 tokens = _combine_kvp_elements_into_paragraphs(tokens)
3467 # Combine any free-floating error tokens into error elements. We do
3468 # this last as it enables other parts of the parser to include error
3469 # tokens in their error elements if they discover something is wrong.
3470 tokens = _combine_error_tokens_into_elements(tokens)
3472 deb822_file = Deb822FileElement(LinkedList(tokens))
3474 if not accept_files_with_duplicated_fields:
3475 for no, paragraph in enumerate(deb822_file):
3476 if isinstance(paragraph, Deb822DuplicateFieldsParagraphElement): 3476 ↛ 3477line 3476 didn't jump to line 3477, because the condition on line 3476 was never true
3477 field_names = set()
3478 dup_field = None
3479 for field in paragraph.keys():
3480 field_name, _, _ = _unpack_key(field)
3481 # assert for mypy
3482 assert isinstance(field_name, str)
3483 if field_name in field_names:
3484 dup_field = field_name
3485 break
3486 field_names.add(field_name)
3487 if dup_field is not None:
3488 msg = 'Duplicate field "{dup_field}" in paragraph number {no}'
3489 raise ValueError(msg.format(dup_field=dup_field, no=no))
3491 return deb822_file
3494if __name__ == "__main__": # pragma: no cover
3495 import doctest
3497 doctest.testmod()