Coverage for src/debputy/lsp/vendoring/_deb822

1# -*- coding: utf-8 -*- vim: fileencoding=utf-8 :

3import collections.abc

4import contextlib

5import sys

6import textwrap

7import weakref

8from abc import ABC

9from types import TracebackType

10from weakref import ReferenceType

12from ._util import (

13 combine_into_replacement,

14 BufferingIterator,

15 len_check_iterator,

16)

17from .formatter import (

18 FormatterContentToken,

19 one_value_per_line_trailing_separator,

20 format_field,

21)

22from .locatable import Locatable, START_POSITION, Position, Range

23from .tokens import (

24 Deb822Token,

25 Deb822ValueToken,

26 Deb822SemanticallySignificantWhiteSpace,

27 Deb822SpaceSeparatorToken,

28 Deb822CommentToken,

29 Deb822WhitespaceToken,

30 Deb822ValueContinuationToken,

31 Deb822NewlineAfterValueToken,

32 Deb822CommaToken,

33 Deb822FieldNameToken,

34 Deb822FieldSeparatorToken,

35 Deb822ErrorToken,

36 tokenize_deb822_file,

37 comma_split_tokenizer,

38 whitespace_split_tokenizer,

39)

40from .types import AmbiguousDeb822FieldKeyError, SyntaxOrParseError

41from debian._util import (

42 resolve_ref,

43 LinkedList,

44 LinkedListNode,

45 OrderedSet,

46 _strI,

47 default_field_sort_key,

48)

50try:

51 from typing import (

52 Iterable,

53 Iterator,

54 List,

55 Union,

56 Dict,

57 Optional,

58 Callable,

59 Any,

60 Generic,

61 Type,

62 Tuple,

63 IO,

64 cast,

65 overload,

66 Mapping,

67 TYPE_CHECKING,

68 Sequence,

69 )

70 from debian._util import T

72 # for some reason, pylint does not see that Commentish is used in typing

73 from .types import ( # pylint: disable=unused-import

74 ST,

75 VE,

76 TE,

77 ParagraphKey,

78 TokenOrElement,

79 Commentish,

80 ParagraphKeyBase,

81 FormatterCallback,

82 )

84 if TYPE_CHECKING:

85 StreamingValueParser = Callable[

86 [Deb822Token, BufferingIterator[Deb822Token]], VE

87 ]

88 StrToValueParser = Callable[[str], Iterable[Union["Deb822Token", VE]]]

89 KVPNode = LinkedListNode["Deb822KeyValuePairElement"]

90 else:

91 StreamingValueParser = None

92 StrToValueParser = None

93 KVPNode = None

94except ImportError:

95 if not TYPE_CHECKING:

96 # pylint: disable=unnecessary-lambda-assignment

97 cast = lambda t, v: v

98 overload = lambda f: None

100

101class ValueReference(Generic[TE]):

102 """Reference to a value inside a Deb822 paragraph

103

104 This is useful for cases where want to modify values "in-place" or maybe

105 conditionally remove a value after looking at it.

106

107 ValueReferences can be invalidated by various changes or actions performed

108 to the underlying provider of the value reference. As an example, sorting

109 a list of values will generally invalidate all ValueReferences related to

110 that list.

111

112 The ValueReference will raise validity issues where it detects them but most

113 of the time it will not notice. As a means to this end, the ValueReference

114 will *not* keep a strong reference to the underlying value. This enables it

115 to detect when the container goes out of scope. However, keep in mind that

116 the timeliness of garbage collection is implementation defined (e.g., pypy

117 does not use ref-counting).

118 """

119

120 __slots__ = (

121 "_node",

122 "_render",

123 "_value_factory",

124 "_removal_handler",

125 "_mutation_notifier",

126 )

127

128 def __init__(

129 self,

130 node, # type: LinkedListNode[TE]

131 render, # type: Callable[[TE], str]

132 value_factory, # type: Callable[[str], TE]

133 removal_handler, # type: Callable[[LinkedListNode[TokenOrElement]], None]

134 mutation_notifier, # type: Optional[Callable[[], None]]

135 ):

136 self._node = weakref.ref(

137 node

138 ) # type: Optional[ReferenceType[LinkedListNode[TE]]]

139 self._render = render

140 self._value_factory = value_factory

141 self._removal_handler = removal_handler

142 self._mutation_notifier = mutation_notifier

143

144 def _resolve_node(self):

145 # type: () -> LinkedListNode[TE]

146 # NB: We check whether the "ref" itself is None (instead of the ref resolving to None)

147 # This enables us to tell the difference between "known removal" vs. "garbage collected"

148 if self._node is None: 148 ↛ 149line 148 didn't jump to line 149, because the condition on line 148 was never true

149 raise RuntimeError("Cannot use ValueReference after remove()")

150 node = self._node()

151 if node is None: 151 ↛ 152line 151 didn't jump to line 152, because the condition on line 151 was never true

152 raise RuntimeError("ValueReference is invalid (garbage collected)")

153 return node

154

155 @property

156 def value(self):

157 # type: () -> str

158 """Resolve the reference into a str"""

159 return self._render(self._resolve_node().value)

160

161 @value.setter

162 def value(self, new_value):

163 # type: (str) -> None

164 """Update the reference value

165

166 Updating the value via this method will *not* invalidate the reference (or other

167 references to the same container).

168

169 This can raise an exception if the new value does not follow the requirements

170 for the referenced values. As an example, values in whitespace separated

171 lists cannot contain spaces and would trigger an exception.

172 """

173 self._resolve_node().value = self._value_factory(new_value)

174 if self._mutation_notifier is not None:

175 self._mutation_notifier()

176

177 @property

178 def locatable(self):

179 # type: () -> Locatable

180 """Reference to a locatable that can be used to determine where this value is"""

181 return self._resolve_node().value

182

183 def remove(self):

184 # type: () -> None

185 """Remove the underlying value

186

187 This will invalidate the ValueReference (and any other ValueReferences pointing

188 to that exact value). The validity of other ValueReferences to that container

189 remains unaffected.

190 """

191 self._removal_handler(

192 cast("LinkedListNode[TokenOrElement]", self._resolve_node())

193 )

194 self._node = None

195

196

197if sys.version_info >= (3, 9) or TYPE_CHECKING: 197 ↛ 204line 197 didn't jump to line 204, because the condition on line 197 was never false

198 _Deb822ParsedTokenList_ContextManager = contextlib.AbstractContextManager[T]

199else:

200 # Python 3.5 - 3.8 compat - we are not allowed to subscript the abc.Iterator

201 # - use this little hack to work around it

202 # Note that Python 3.5 is so old that it does not have AbstractContextManager,

203 # so we re-implement it here.

204 class _Deb822ParsedTokenList_ContextManager(Generic[T]):

205

206 def __enter__(self):

207 return self

208

209 def __exit__(self, exc_type, exc_val, exc_tb):

210 return None

211

212

213class Deb822ParsedTokenList(

214 Generic[VE, ST],

215 _Deb822ParsedTokenList_ContextManager["Deb822ParsedTokenList[VE, ST]"],

216):

217

218 def __init__(

219 self,

220 kvpair_element, # type: 'Deb822KeyValuePairElement'

221 interpreted_value_element, # type: Deb822InterpretationProxyElement

222 vtype, # type: Type[VE]

223 stype, # type: Type[ST]

224 str2value_parser, # type: StrToValueParser[VE]

225 default_separator_factory, # type: Callable[[], ST]

226 render, # type: Callable[[VE], str]

227 ):

228 # type: (...) -> None

229 self._kvpair_element = kvpair_element

230 self._proxy_element = interpreted_value_element

231 self._token_list = LinkedList(interpreted_value_element.parts)

232 self._vtype = vtype

233 self._stype = stype

234 self._str2value_parser = str2value_parser

235 self._default_separator_factory = default_separator_factory

236 self._value_factory = _parser_to_value_factory(str2value_parser, vtype)

237 self._render = render

238 self._format_preserve_original_formatting = True

239 self._formatter = (

240 one_value_per_line_trailing_separator

241 ) # type: FormatterCallback

242 self._changed = False

243 self.__continuation_line_char = None # type: Optional[str]

244 assert self._token_list

245 last_token = self._token_list.tail

246

247 if last_token is not None and isinstance( 247 ↛ exitline 247 didn't return from function '__init__', because the condition on line 247 was never false

248 last_token, Deb822NewlineAfterValueToken

249 ):

250 # We always remove the last newline (if present), because then

251 # adding values will happen after the last value rather than on

252 # a new line by default.

253 #

254 # On write, we always ensure the value ends on a newline (even

255 # if it did not before). This is simpler and should be a

256 # non-issue in practise.

257 self._token_list.pop()

258

259 def __iter__(self):

260 # type: () -> Iterator[str]

261 yield from (self._render(v) for v in self.value_parts)

262

263 def __bool__(self):

264 # type: () -> bool

265 return next(iter(self), None) is not None

266

267 def __exit__(

268 self,

269 exc_type, # type: Optional[Type[BaseException]]

270 exc_val, # type: Optional[BaseException]

271 exc_tb, # type: Optional[TracebackType]

272 ):

273 # type: (...) -> Optional[bool]

274 if exc_type is None and self._changed: 274 ↛ 276line 274 didn't jump to line 276, because the condition on line 274 was never false

275 self._update_field()

276 return super().__exit__(exc_type, exc_val, exc_tb)

277

278 @property

279 def value_parts(self):

280 # type: () -> Iterator[VE]

281 yield from (v for v in self._token_list if isinstance(v, self._vtype))

282

283 def _mark_changed(self):

284 # type: () -> None

285 self._changed = True

286

287 def iter_value_references(self):

288 # type: () -> Iterator[ValueReference[VE]]

289 """Iterate over all values in the list (as ValueReferences)

290

291 This is useful for doing inplace modification of the values or even

292 streaming removal of field values. It is in general also more

293 efficient when more than one value is updated or removed.

294 """

295 yield from (

296 ValueReference(

297 cast("LinkedListNode[VE]", n),

298 self._render,

299 self._value_factory,

300 self._remove_node,

301 self._mark_changed,

302 )

303 for n in self._token_list.iter_nodes()

304 if isinstance(n.value, self._vtype)

305 )

306

307 def append_separator(self, space_after_separator=True):

308 # type: (bool) -> None

309

310 separator_token = self._default_separator_factory()

311 if separator_token.is_whitespace: 311 ↛ 314line 311 didn't jump to line 314, because the condition on line 311 was never false

312 space_after_separator = False

313

314 self._changed = True

315 self._append_continuation_line_token_if_necessary()

316 self._token_list.append(separator_token)

317

318 if space_after_separator and not separator_token.is_whitespace: 318 ↛ 319line 318 didn't jump to line 319, because the condition on line 318 was never true

319 self._token_list.append(Deb822WhitespaceToken(" "))

320

321 def replace(self, orig_value, new_value):

322 # type: (str, str) -> None

323 """Replace the first instance of a value with another

324

325 This method will *not* affect the validity of ValueReferences.

326 """

327 vtype = self._vtype

328 for node in self._token_list.iter_nodes(): 328 ↛ 334line 328 didn't jump to line 334, because the loop on line 328 didn't complete

329 if isinstance(node.value, vtype) and self._render(node.value) == orig_value:

330 node.value = self._value_factory(new_value)

331 self._changed = True

332 break

333 else:

334 raise ValueError("list.replace(x, y): x not in list")

335

336 def remove(self, value):

337 # type: (str) -> None

338 """Remove the first instance of a value

339

340 Removal will invalidate ValueReferences to the value being removed.

341 ValueReferences to other values will be unaffected.

342 """

343 vtype = self._vtype

344 for node in self._token_list.iter_nodes():

345 if isinstance(node.value, vtype) and self._render(node.value) == value:

346 node_to_remove = node

347 break

348 else:

349 raise ValueError("list.remove(x): x not in list")

350

351 return self._remove_node(node_to_remove)

352

353 def _remove_node(self, node_to_remove):

354 # type: (LinkedListNode[TokenOrElement]) -> None

355 vtype = self._vtype

356 self._changed = True

357

358 # We naively want to remove the node and every thing to the left of it

359 # until the previous value. That is the basic idea for now (ignoring

360 # special-cases for now).

361 #

362 # Example:

363 #

364 # """

365 # Multiline-Keywords: bar[

366 # # Comment about foo

367 # foo]

368 # baz

369 # Keywords: bar[ foo] baz

370 # Comma-List: bar[, foo], baz,

371 # Multiline-Comma-List: bar[,

372 # # Comment about foo

373 # foo],

374 # baz,

375 # """

376 #

377 # Assuming we want to remove "foo" for the lists, the []-markers

378 # show what we aim to remove. This has the nice side-effect of

379 # preserving whether nor not the value has a trailing separator.

380 # Note that we do *not* attempt to repair missing separators but

381 # it may fix duplicated separators by "accident".

382 #

383 # Now, there are two special cases to be aware of, where this approach

384 # has short comings:

385 #

386 # 1) If foo is the only value (in which case, "delete everything"

387 # is the only option).

388 # 2) If foo is the first value

389 # 3) If foo is not the only value on the line and we see a comment

390 # inside the deletion range.

391 #

392 # For 2) + 3), we attempt to flip and range to delete and every

393 # thing after it (up to but exclusion "baz") instead. This

394 # definitely fixes 3), but 2) has yet another corner case, namely:

395 #

396 # """

397 # Multiline-Comma-List: foo,

398 # # Remark about bar

399 # bar,

400 # Another-Case: foo

401 # # Remark, also we use leading separator

402 # , bar

403 # """

404 #

405 # The options include:

406 #

407 # A) Discard the comment - brain-dead simple

408 # B) Hoist the comment up to a field comment, but then what if the

409 # field already has a comment?

410 # C) Clear the first value line leaving just the newline and

411 # replace the separator before "bar" (if present) with a space.

412 # (leaving you with the value of the form "\n# ...\n bar")

413 #

414

415 first_value_on_lhs = None # type: Optional[LinkedListNode[TokenOrElement]]

416 first_value_on_rhs = None # type: Optional[LinkedListNode[TokenOrElement]]

417 comment_before_previous_value = False

418 comment_before_next_value = False

419 for past_node in node_to_remove.iter_previous(skip_current=True):

420 past_token = past_node.value

421 if isinstance(past_token, Deb822Token) and past_token.is_comment:

422 comment_before_previous_value = True

423 continue

424 if isinstance(past_token, vtype):

425 first_value_on_lhs = past_node

426 break

427

428 for future_node in node_to_remove.iter_next(skip_current=True):

429 future_token = future_node.value

430 if isinstance(future_token, Deb822Token) and future_token.is_comment:

431 comment_before_next_value = True

432 continue

433 if isinstance(future_token, vtype):

434 first_value_on_rhs = future_node

435 break

436

437 if first_value_on_rhs is None and first_value_on_lhs is None:

438 # This was the last value, just remove everything.

439 self._token_list.clear()

440 return

441

442 if first_value_on_lhs is not None and not comment_before_previous_value:

443 # Delete left

444 delete_lhs_of_node = True

445 elif first_value_on_rhs is not None and not comment_before_next_value:

446 # Delete right

447 delete_lhs_of_node = False

448 else:

449 # There is a comment on either side (or no value on one and a

450 # comment and the other). Keep it simple, we just delete to

451 # one side (preferring deleting to left if possible).

452 delete_lhs_of_node = first_value_on_lhs is not None

453

454 if delete_lhs_of_node:

455 first_remain_lhs = first_value_on_lhs

456 first_remain_rhs = node_to_remove.next_node

457 else:

458 first_remain_lhs = node_to_remove.previous_node

459 first_remain_rhs = first_value_on_rhs

460

461 # Actual deletion - with some manual labour to update HEAD/TAIL of

462 # the list in case we do a "delete everything left/right this node".

463 if first_remain_lhs is None:

464 self._token_list.head_node = first_remain_rhs

465 if first_remain_rhs is None:

466 self._token_list.tail_node = first_remain_lhs

467 LinkedListNode.link_nodes(first_remain_lhs, first_remain_rhs)

468

469 def append(self, value):

470 # type: (str) -> None

471 vt = self._value_factory(value)

472 self.append_value(vt)

473

474 def append_value(self, vt):

475 # type: (VE) -> None

476 value_parts = self._token_list

477 if value_parts:

478 needs_separator = False

479 stype = self._stype

480 vtype = self._vtype

481 for t in reversed(value_parts): 481 ↛ 488line 481 didn't jump to line 488, because the loop on line 481 didn't complete

482 if isinstance(t, vtype):

483 needs_separator = True

484 break

485 if isinstance(t, stype):

486 break

487

488 if needs_separator:

489 self.append_separator()

490 else:

491 # Looks nicer if there is a space before the very first value

492 self._token_list.append(Deb822WhitespaceToken(" "))

493 self._append_continuation_line_token_if_necessary()

494 self._changed = True

495 value_parts.append(vt)

496

497 def _previous_is_newline(self):

498 # type: () -> bool

499 tail = self._token_list.tail

500 return tail is not None and tail.convert_to_text().endswith("\n")

501

502 def append_newline(self):

503 # type: () -> None

504 if self._previous_is_newline(): 504 ↛ 505line 504 didn't jump to line 505, because the condition on line 504 was never true

505 raise ValueError(

506 "Cannot add a newline after a token that ends on a newline"

507 )

508 self._token_list.append(Deb822NewlineAfterValueToken())

509

510 def append_comment(self, comment_text):

511 # type: (str) -> None

512 tail = self._token_list.tail

513 if tail is None or not tail.convert_to_text().endswith("\n"):

514 self.append_newline()

515 comment_token = Deb822CommentToken(_format_comment(comment_text))

516 self._token_list.append(comment_token)

517

518 @property

519 def _continuation_line_char(self):

520 # type: () -> str

521 char = self.__continuation_line_char

522 if char is None:

523 # Use ' ' by default but match the existing field if possible.

524 char = " "

525 for token in self._token_list:

526 if isinstance(token, Deb822ValueContinuationToken):

527 char = token.text

528 break

529 self.__continuation_line_char = char

530 return char

531

532 def _append_continuation_line_token_if_necessary(self):

533 # type: () -> None

534 tail = self._token_list.tail

535 if tail is not None and tail.convert_to_text().endswith("\n"): 535 ↛ 536line 535 didn't jump to line 536, because the condition on line 535 was never true

536 self._token_list.append(

537 Deb822ValueContinuationToken(self._continuation_line_char)

538 )

539

540 def reformat_when_finished(self):

541 # type: () -> None

542 self._enable_reformatting()

543 self._changed = True

544

545 def _enable_reformatting(self):

546 # type: () -> None

547 self._format_preserve_original_formatting = False

548

549 def no_reformatting_when_finished(self):

550 # type: () -> None

551 self._format_preserve_original_formatting = True

552

553 def value_formatter(

554 self,

555 formatter, # type: FormatterCallback

556 force_reformat=False, # type: bool

557 ):

558 # type: (...) -> None

559 """Use a custom formatter when formatting the value

560

561 :param formatter: A formatter (see debian._deb822_repro.formatter.format_field

562 for details)

563 :param force_reformat: If True, always reformat the field even if there are

564 no (other) changes performed. By default, fields are only reformatted if

565 they are changed.

566 """

567 self._formatter = formatter

568 self._format_preserve_original_formatting = False

569 if force_reformat:

570 self._changed = True

571

572 def clear(self):

573 # type: () -> None

574 """Like list.clear() - removes all content (including comments and spaces)"""

575 if self._token_list:

576 self._changed = True

577 self._token_list.clear()

578

579 def _iter_content_as_tokens(self):

580 # type: () -> Iterable[Deb822Token]

581 for te in self._token_list:

582 if isinstance(te, Deb822Element):

583 yield from te.iter_tokens()

584 else:

585 yield te

586

587 def _generate_reformatted_field_content(self):

588 # type: () -> str

589 separator_token = self._default_separator_factory()

590 vtype = self._vtype

591 stype = self._stype

592 token_list = self._token_list

593

594 def _token_iter():

595 # type: () -> Iterator[FormatterContentToken]

596 text = "" # type: str

597 for te in token_list:

598 if isinstance(te, Deb822Token):

599 if te.is_comment:

600 yield FormatterContentToken.comment_token(te.text)

601 elif isinstance(te, stype):

602 text = te.text

603 yield FormatterContentToken.separator_token(text)

604 else:

605 assert isinstance(te, vtype)

606 text = te.convert_to_text()

607 yield FormatterContentToken.value_token(text)

608

609 return format_field(

610 self._formatter,

611 self._kvpair_element.field_name,

612 FormatterContentToken.separator_token(separator_token.text),

613 _token_iter(),

614 )

615

616 def _generate_field_content(self):

617 # type: () -> str

618 return "".join(t.text for t in self._iter_content_as_tokens())

619

620 def _update_field(self):

621 # type: () -> None

622 kvpair_element = self._kvpair_element

623 field_name = kvpair_element.field_name

624 token_list = self._token_list

625 tail = token_list.tail

626 had_tokens = False

627

628 for t in self._iter_content_as_tokens(): 628 ↛ 633line 628 didn't jump to line 633, because the loop on line 628 didn't complete

629 had_tokens = True

630 if not t.is_comment and not t.is_whitespace:

631 break

632 else:

633 if had_tokens:

634 raise ValueError(

635 "Field must be completely empty or have content "

636 "(i.e. non-whitespace and non-comments)"

637 )

638 if tail is not None: 638 ↛ 656line 638 didn't jump to line 656, because the condition on line 638 was never false

639 if isinstance(tail, Deb822Token) and tail.is_comment: 639 ↛ 640line 639 didn't jump to line 640, because the condition on line 639 was never true

640 raise ValueError("Fields must not end on a comment")

641 if not tail.convert_to_text().endswith("\n"): 641 ↛ 645line 641 didn't jump to line 645, because the condition on line 641 was never false

642 # Always end on a newline

643 self.append_newline()

644

645 if self._format_preserve_original_formatting:

646 value_text = self._generate_field_content()

647 text = ":".join((field_name, value_text))

648 else:

649 text = self._generate_reformatted_field_content()

650

651 new_content = text.splitlines(keepends=True)

652 else:

653 # Special-case for the empty list which will be mapped to

654 # an empty field. Always end on a newline (avoids errors

655 # if there is a field after this)

656 new_content = [field_name + ":\n"]

657

658 # As absurd as it might seem, it is easier to just use the parser to

659 # construct the AST correctly

660 deb822_file = parse_deb822_file(iter(new_content))

661 error_token = deb822_file.find_first_error_element()

662 if error_token: 662 ↛ 664line 662 didn't jump to line 664, because the condition on line 662 was never true

663 # _print_ast(deb822_file)

664 raise ValueError("Syntax error in new field value for " + field_name)

665 paragraph = next(iter(deb822_file))

666 assert isinstance(paragraph, Deb822NoDuplicateFieldsParagraphElement)

667 new_kvpair_element = paragraph.get_kvpair_element(field_name)

668 assert new_kvpair_element is not None

669 kvpair_element.value_element = new_kvpair_element.value_element

670 self._changed = False

671

672 def sort_elements(

673 self,

674 *,

675 key=None, # type: Optional[Callable[[VE], Any]]

676 reverse=False, # type: bool

677 ):

678 # type: (...) -> None

679 """Sort the elements (abstract values) in this list.

680

681 This method will sort the logical values of the list. It will

682 attempt to preserve comments associated with a given value where

683 possible. Whether space and separators are preserved depends on

684 the contents of the field as well as the formatting settings.

685

686 Sorting (without reformatting) is likely to leave you with "awkward"

687 whitespace. Therefore, you almost always want to apply reformatting

688 such as the reformat_when_finished() method.

689

690 Sorting will invalidate all ValueReferences.

691 """

692 comment_start_node = None

693 vtype = self._vtype

694 stype = self._stype

695

696 def key_func(x):

697 # type: (Tuple[VE, List[TokenOrElement]]) -> Any

698 if key: 698 ↛ 699line 698 didn't jump to line 699, because the condition on line 698 was never true

699 return key(x[0])

700 return x[0].convert_to_text()

701

702 parts = []

703

704 for node in self._token_list.iter_nodes():

705 value = node.value

706 if isinstance(value, Deb822Token) and value.is_comment:

707 if comment_start_node is None: 707 ↛ 709line 707 didn't jump to line 709, because the condition on line 707 was never false

708 comment_start_node = node

709 continue

710

711 if isinstance(value, vtype):

712 comments = []

713 if comment_start_node is not None:

714 for keep_node in comment_start_node.iter_next(skip_current=False): 714 ↛ 718line 714 didn't jump to line 718, because the loop on line 714 didn't complete

715 if keep_node is node:

716 break

717 comments.append(keep_node.value)

718 parts.append((value, comments))

719 comment_start_node = None

720

721 parts.sort(key=key_func, reverse=reverse)

722

723 self._changed = True

724 self._token_list.clear()

725 first_value = True

726

727 separator_is_space = self._default_separator_factory().is_whitespace

728

729 for value, comments in parts:

730 if first_value:

731 first_value = False

732 if comments: 732 ↛ 735line 732 didn't jump to line 735, because the condition on line 732 was never true

733 # While unlikely, there could be a separator between the comments.

734 # It would be in the way and we remove it.

735 comments = [x for x in comments if not isinstance(x, stype)]

736 # Comments cannot start the field, so inject a newline to

737 # work around that

738 self.append_newline()

739 else:

740 if not separator_is_space and not any( 740 ↛ exit, 740 ↛ 7472 missed branches: 1) line 740 didn't run the generator expression on line 740, 2) line 740 didn't jump to line 747, because the condition on line 740 was never true

741 isinstance(x, stype) for x in comments

742 ):

743 # While unlikely, you can hide a comma between two comments and expect

744 # us to preserve it. However, the more common case is that the separator

745 # appeared before the comments and was thus omitted (leaving us to re-add

746 # it here).

747 self.append_separator(space_after_separator=False)

748 if comments:

749 self.append_newline()

750 else:

751 self._token_list.append(Deb822WhitespaceToken(" "))

752

753 self._token_list.extend(comments)

754 self.append_value(value)

755

756 def sort(

757 self,

758 *,

759 key=None, # type: Optional[Callable[[str], Any]]

760 **kwargs, # type: Any

761 ):

762 # type: (...) -> None

763 """Sort the values (rendered as str) in this list.

764

765 This method will sort the logical values of the list. It will

766 attempt to preserve comments associated with a given value where

767 possible. Whether space and separators are preserved depends on

768 the contents of the field as well as the formatting settings.

769

770 Sorting (without reformatting) is likely to leave you with "awkward"

771 whitespace. Therefore, you almost always want to apply reformatting

772 such as the reformat_when_finished() method.

773

774 Sorting will invalidate all ValueReferences.

775 """

776 if key is not None: 776 ↛ 777line 776 didn't jump to line 777, because the condition on line 776 was never true

777 render = self._render

778 kwargs["key"] = lambda vt: key(render(vt))

779 self.sort_elements(**kwargs)

780

781

782class Interpretation(Generic[T]):

783

784 def interpret(

785 self,

786 kvpair_element, # type: Deb822KeyValuePairElement

787 discard_comments_on_read=True, # type: bool

788 ):

789 # type: (...) -> T

790 raise NotImplementedError # pragma: no cover

791

792

793class GenericContentBasedInterpretation(Interpretation[T], Generic[T, VE]):

794

795 def __init__(

796 self,

797 tokenizer, # type: Callable[[str], Iterable['Deb822Token']]

798 value_parser, # type: StreamingValueParser[VE]

799 ):

800 # type: (...) -> None

801 super().__init__()

802 self._tokenizer = tokenizer

803 self._value_parser = value_parser

804

805 def _high_level_interpretation(

806 self,

807 kvpair_element, # type: Deb822KeyValuePairElement

808 proxy_element, # type: Deb822InterpretationProxyElement

809 discard_comments_on_read=True, # type: bool

810 ):

811 # type: (...) -> T

812 raise NotImplementedError # pragma: no cover

813

814 def _parse_stream(

815 self, buffered_iterator # type: BufferingIterator[Deb822Token]

816 ):

817 # type: (...) -> Iterable[Union[Deb822Token, VE]]

818

819 value_parser = self._value_parser

820 for token in buffered_iterator:

821 if isinstance(token, Deb822ValueToken):

822 yield value_parser(token, buffered_iterator)

823 else:

824 yield token

825

826 def _parse_kvpair(

827 self, kvpair # type: Deb822KeyValuePairElement

828 ):

829 # type: (...) -> Deb822InterpretationProxyElement

830 value_element = kvpair.value_element

831 content = value_element.convert_to_text()

832 token_list = [] # type: List['TokenOrElement']

833 token_list.extend(self._parse_str(content))

834 return Deb822InterpretationProxyElement(value_element, token_list)

835

836 def _parse_str(self, content):

837 # type: (str) -> Iterable[Union[Deb822Token, VE]]

838 content_len = len(content)

839 biter = BufferingIterator(

840 len_check_iterator(

841 content,

842 self._tokenizer(content),

843 content_len=content_len,

844 )

845 )

846 yield from len_check_iterator(

847 content,

848 self._parse_stream(biter),

849 content_len=content_len,

850 )

851

852 def interpret(

853 self,

854 kvpair_element, # type: Deb822KeyValuePairElement

855 discard_comments_on_read=True, # type: bool

856 ):

857 # type: (...) -> T

858 proxy_element = self._parse_kvpair(kvpair_element)

859 return self._high_level_interpretation(

860 kvpair_element,

861 proxy_element,

862 discard_comments_on_read=discard_comments_on_read,

863 )

864

865

866def _parser_to_value_factory(

867 parser, # type: StrToValueParser[VE]

868 vtype, # type: Type[VE]

869):

870 # type: (...) -> Callable[[str], VE]

871 def _value_factory(v):

872 # type: (str) -> VE

873 if v == "": 873 ↛ 874line 873 didn't jump to line 874, because the condition on line 873 was never true

874 raise ValueError("The empty string is not a value")

875 token_iter = iter(parser(v))

876 t1 = next(token_iter, None) # type: Optional[Union[TokenOrElement]]

877 t2 = next(token_iter, None)

878 assert t1 is not None, (

879 'Bad parser - it returned None (or no TE) for "' + v + '"'

880 )

881 if t2 is not None: 881 ↛ 882line 881 didn't jump to line 882, because the condition on line 881 was never true

882 msg = textwrap.dedent(

883 """\

884 The input "{v}" should have been exactly one element, but the parser provided at

885 least two. This can happen with unnecessary leading/trailing whitespace

886 or including commas the value for a comma list.

887 """

888 ).format(v=v)

889 raise ValueError(msg)

890 if not isinstance(t1, vtype): 890 ↛ 891line 890 didn't jump to line 891, because the condition on line 890 was never true

891 if isinstance(t1, Deb822Token) and (t1.is_comment or t1.is_whitespace):

892 raise ValueError(

893 'The input "{v}" is whitespace or a comment: Expected a value'

894 )

895 msg = (

896 'The input "{v}" should have produced a element of type {vtype_name}, but'

897 " instead it produced {t1}"

898 )

899 raise ValueError(msg.format(v=v, vtype_name=vtype.__name__, t1=t1))

900

901 assert len(t1.convert_to_text()) == len(v), (

902 "Bad tokenizer - the token did not cover the input text"

903 " exactly ({t1_len} != {v_len}".format(

904 t1_len=len(t1.convert_to_text()), v_len=len(v)

905 )

906 )

907 return t1

908

909 return _value_factory

910

911

912class ListInterpretation(

913 GenericContentBasedInterpretation[Deb822ParsedTokenList[VE, ST], VE]

914):

915

916 def __init__(

917 self,

918 tokenizer, # type: Callable[[str], Iterable['Deb822Token']]

919 value_parser, # type: StreamingValueParser[VE]

920 vtype, # type: Type[VE]

921 stype, # type: Type[ST]

922 default_separator_factory, # type: Callable[[], ST]

923 render_factory, # type: Callable[[bool], Callable[[VE], str]]

924 ):

925 # type: (...) -> None

926 super().__init__(tokenizer, value_parser)

927 self._vtype = vtype

928 self._stype = stype

929 self._default_separator_factory = default_separator_factory

930 self._render_factory = render_factory

931

932 def _high_level_interpretation(

933 self,

934 kvpair_element, # type: Deb822KeyValuePairElement

935 proxy_element, # type: Deb822InterpretationProxyElement

936 discard_comments_on_read=True, # type: bool

937 ):

938 # type: (...) -> Deb822ParsedTokenList[VE, ST]

939 return Deb822ParsedTokenList(

940 kvpair_element,

941 proxy_element,

942 self._vtype,

943 self._stype,

944 self._parse_str,

945 self._default_separator_factory,

946 self._render_factory(discard_comments_on_read),

947 )

948

949

950def _parse_whitespace_list_value(token, _):

951 # type: (Deb822Token, BufferingIterator[Deb822Token]) -> Deb822ParsedValueElement

952 return Deb822ParsedValueElement([token])

953

954

955def _is_comma_token(v):

956 # type: (TokenOrElement) -> bool

957 # Consume tokens until the next comma

958 return isinstance(v, Deb822CommaToken)

959

960

961def _parse_comma_list_value(token, buffered_iterator):

962 # type: (Deb822Token, BufferingIterator[Deb822Token]) -> Deb822ParsedValueElement

963 comma_offset = buffered_iterator.peek_find(_is_comma_token)

964 value_parts = [token]

965 if comma_offset is not None:

966 # The value is followed by a comma and now we know where it ends

967 value_parts.extend(buffered_iterator.peek_many(comma_offset - 1))

968 else:

969 # The value is the last value there is. Consume all remaining tokens

970 # and then trim from the right.

971 value_parts.extend(buffered_iterator.peek_buffer())

972 while value_parts and not isinstance(value_parts[-1], Deb822ValueToken):

973 value_parts.pop()

974

975 buffered_iterator.consume_many(len(value_parts) - 1)

976 return Deb822ParsedValueElement(value_parts)

977

978

979def _parse_uploaders_list_value(token, buffered_iterator):

980 # type: (Deb822Token, BufferingIterator[Deb822Token]) -> Deb822ParsedValueElement

981

982 # This is similar to _parse_comma_list_value *except* that there is an extra special

983 # case. Namely comma only counts as a true separator if it follows ">"

984 value_parts = [token]

985 comma_offset = -1 # type: Optional[int]

986 while comma_offset is not None:

987 comma_offset = buffered_iterator.peek_find(_is_comma_token)

988 if comma_offset is not None:

989 # The value is followed by a comma. Verify that this is a terminating

990 # comma (comma may appear in the name or email)

991 #

992 # We include value_parts[-1] to easily cope with the common case of

993 # "foo <a@b.com>," where we will have 0 peeked element to examine.

994 peeked_elements = [value_parts[-1]]

995 peeked_elements.extend(buffered_iterator.peek_many(comma_offset - 1))

996 comma_was_separator = False

997 i = len(peeked_elements) - 1

998 while i >= 0:

999 token = peeked_elements[i]

1000 if isinstance(token, Deb822ValueToken):

1001 if token.text.endswith(">"):

1002 # The comma terminates the value

1003 value_parts.extend(buffered_iterator.consume_many(i))

1004 assert isinstance(

1005 value_parts[-1], Deb822ValueToken

1006 ) and value_parts[-1].text.endswith(">"), "Got: " + str(

1007 value_parts

1008 )

1009 comma_was_separator = True

1010 break

1011 i -= 1

1012 if comma_was_separator:

1013 break

1014 value_parts.extend(buffered_iterator.consume_many(comma_offset))

1015 assert isinstance(value_parts[-1], Deb822CommaToken)

1016 else:

1017 # The value is the last value there is. Consume all remaining tokens

1018 # and then trim from the right.

1019 remaining_part = buffered_iterator.peek_buffer()

1020 consume_elements = len(remaining_part)

1021 value_parts.extend(remaining_part)

1022 while value_parts and not isinstance(value_parts[-1], Deb822ValueToken):

1023 value_parts.pop()

1024 consume_elements -= 1

1025 buffered_iterator.consume_many(consume_elements)

1026

1027 return Deb822ParsedValueElement(value_parts)

1028

1029

1030class Deb822Element(Locatable):

1031 """Composite elements (consists of 1 or more tokens)"""

1032

1033 __slots__ = ("_parent_element", "_full_size_cache", "__weakref__")

1034

1035 def __init__(self):

1036 # type: () -> None

1037 self._parent_element = None # type: Optional[ReferenceType['Deb822Element']]

1038 self._full_size_cache = None # type: Optional[Range]

1039

1040 def iter_parts(self):

1041 # type: () -> Iterable[TokenOrElement]

1042 raise NotImplementedError # pragma: no cover

1043

1044 def iter_parts_of_type(self, only_element_or_token_type):

1045 # type: (Type[TE]) -> Iterable[TE]

1046 for part in self.iter_parts():

1047 if isinstance(part, only_element_or_token_type):

1048 yield part

1049

1050 def iter_tokens(self):

1051 # type: () -> Iterable[Deb822Token]

1052 for part in self.iter_parts():

1053 # Control check to catch bugs early

1054 assert part._parent_element is not None

1055 if isinstance(part, Deb822Element):

1056 yield from part.iter_tokens()

1057 else:

1058 yield part

1059

1060 def iter_recurse(

1061 self, *, only_element_or_token_type=None # type: Optional[Type[TE]]

1062 ):

1063 # type: (...) -> Iterable[TE]

1064 for part in self.iter_parts():

1065 if only_element_or_token_type is None or isinstance( 1065 ↛ 1068line 1065 didn't jump to line 1068, because the condition on line 1065 was never true

1066 part, only_element_or_token_type

1067 ):

1068 yield cast("TE", part)

1069 if isinstance(part, Deb822Element):

1070 yield from part.iter_recurse(

1071 only_element_or_token_type=only_element_or_token_type

1072 )

1073

1074 @property

1075 def is_error(self):

1076 # type: () -> bool

1077 return False

1078

1079 @property

1080 def is_comment(self):

1081 # type: () -> bool

1082 return False

1083

1084 @property

1085 def parent_element(self):

1086 # type: () -> Optional[Deb822Element]

1087 return resolve_ref(self._parent_element)

1088

1089 @parent_element.setter

1090 def parent_element(self, new_parent):

1091 # type: (Optional[Deb822Element]) -> None

1092 self._parent_element = (

1093 weakref.ref(new_parent) if new_parent is not None else None

1094 )

1095

1096 def _init_parent_of_parts(self):

1097 # type: () -> None

1098 for part in self.iter_parts():

1099 part.parent_element = self

1100

1101 # Deliberately not a "text" property, to signal that it is not necessary cheap.

1102 def convert_to_text(self):

1103 # type: () -> str

1104 return "".join(t.text for t in self.iter_tokens())

1105

1106 def clear_parent_if_parent(self, parent):

1107 # type: (Deb822Element) -> None

1108 if parent is self.parent_element: 1108 ↛ exitline 1108 didn't return from function 'clear_parent_if_parent', because the condition on line 1108 was never false

1109 self._parent_element = None

1110

1111 def size(self, *, skip_leading_comments: bool = True) -> Range:

1112 size_cache = self._full_size_cache

1113 if size_cache is None:

1114 size_cache = Range.from_position_and_sizes(

1115 START_POSITION,

1116 (p.size(skip_leading_comments=False) for p in self.iter_parts()),

1117 )

1118 self._full_size_cache = size_cache

1119 return size_cache

1120

1121

1122class Deb822InterpretationProxyElement(Deb822Element):

1123

1124 __slots__ = ("parts",)

1125

1126 def __init__(

1127 self, real_element: Deb822Element, parts: List[TokenOrElement]

1128 ) -> None:

1129 super().__init__()

1130 self.parent_element = real_element

1131 self.parts = parts

1132 for p in parts:

1133 p.parent_element = self

1134

1135 def iter_parts(self):

1136 # type: () -> Iterable[TokenOrElement]

1137 return iter(self.parts)

1138

1139 def position_in_parent(self, *, skip_leading_comments: bool = True) -> Position:

1140 parent = self.parent_element

1141 if parent is None:

1142 raise RuntimeError("parent was garbage collected")

1143 return parent.position_in_parent()

1144

1145 def position_in_file(self, *, skip_leading_comments: bool = True) -> Position:

1146 parent = self.parent_element

1147 if parent is None:

1148 raise RuntimeError("parent was garbage collected")

1149 return parent.position_in_file()

1150

1151 def size(self, *, skip_leading_comments: bool = True) -> Range:

1152 # Same as parent except we never use a cache.

1153 sizes = (p.size(skip_leading_comments=False) for p in self.iter_parts())

1154 return Range.from_position_and_sizes(START_POSITION, sizes)

1155

1156

1157class Deb822ErrorElement(Deb822Element):

1158 """Element representing elements or tokens that are out of place

1159

1160 Commonly, it will just be instances of Deb822ErrorToken, but it can be other

1161 things. As an example if a parser discovers out of order elements/tokens,

1162 it can bundle them in a Deb822ErrorElement to signal that the sequence of

1163 elements/tokens are invalid (even if the tokens themselves are valid).

1164 """

1165

1166 __slots__ = ("_parts",)

1167

1168 def __init__(self, parts):

1169 # type: (Sequence[TokenOrElement]) -> None

1170 super().__init__()

1171 self._parts = tuple(parts)

1172 self._init_parent_of_parts()

1173

1174 def iter_parts(self):

1175 # type: () -> Iterable[TokenOrElement]

1176 yield from self._parts

1177

1178 @property

1179 def is_error(self):

1180 # type: () -> bool

1181 return True

1182

1183

1184class Deb822ValueLineElement(Deb822Element):

1185 """Consists of one "line" of a value"""

1186

1187 __slots__ = (

1188 "_comment_element",

1189 "_continuation_line_token",

1190 "_leading_whitespace_token",

1191 "_value_tokens",

1192 "_trailing_whitespace_token",

1193 "_newline_token",

1194 )

1195

1196 def __init__(

1197 self,

1198 comment_element, # type: Optional[Deb822CommentElement]

1199 continuation_line_token, # type: Optional[Deb822ValueContinuationToken]

1200 leading_whitespace_token, # type: Optional[Deb822WhitespaceToken]

1201 value_parts, # type: List[TokenOrElement]

1202 trailing_whitespace_token, # type: Optional[Deb822WhitespaceToken]

1203 # only optional if it is the last line of the file and the file does not

1204 # end with a newline.

1205 newline_token, # type: Optional[Deb822WhitespaceToken]

1206 ):

1207 # type: (...) -> None

1208 super().__init__()

1209 if comment_element is not None and continuation_line_token is None: 1209 ↛ 1210line 1209 didn't jump to line 1210, because the condition on line 1209 was never true

1210 raise ValueError("Only continuation lines can have comments")

1211 self._comment_element = comment_element # type: Optional[Deb822CommentElement]

1212 self._continuation_line_token = continuation_line_token

1213 self._leading_whitespace_token = (

1214 leading_whitespace_token

1215 ) # type: Optional[Deb822WhitespaceToken]

1216 self._value_tokens = value_parts # type: List[TokenOrElement]

1217 self._trailing_whitespace_token = trailing_whitespace_token

1218 self._newline_token = newline_token # type: Optional[Deb822WhitespaceToken]

1219 self._init_parent_of_parts()

1220

1221 @property

1222 def comment_element(self):

1223 # type: () -> Optional[Deb822CommentElement]

1224 return self._comment_element

1225

1226 @property

1227 def continuation_line_token(self):

1228 # type: () -> Optional[Deb822ValueContinuationToken]

1229 return self._continuation_line_token

1230

1231 @property

1232 def newline_token(self):

1233 # type: () -> Optional[Deb822WhitespaceToken]

1234 return self._newline_token

1235

1236 def add_newline_if_missing(self):

1237 # type: () -> bool

1238 if self._newline_token is None:

1239 self._newline_token = Deb822NewlineAfterValueToken()

1240 self._newline_token.parent_element = self

1241 self._full_size_cache = None

1242 return True

1243 return False

1244

1245 def _iter_content_parts(self):

1246 # type: () -> Iterable[TokenOrElement]

1247 if self._leading_whitespace_token: 1247 ↛ 1248line 1247 didn't jump to line 1248, because the condition on line 1247 was never true

1248 yield self._leading_whitespace_token

1249 yield from self._value_tokens

1250 if self._trailing_whitespace_token:

1251 yield self._trailing_whitespace_token

1252

1253 def _iter_content_tokens(self):

1254 # type: () -> Iterable[Deb822Token]

1255 for part in self._iter_content_parts():

1256 if isinstance(part, Deb822Element):

1257 yield from part.iter_tokens()

1258 else:

1259 yield part

1260

1261 def convert_content_to_text(self):

1262 # type: () -> str

1263 if (

1264 len(self._value_tokens) == 1

1265 and not self._leading_whitespace_token

1266 and not self._trailing_whitespace_token

1267 and isinstance(self._value_tokens[0], Deb822Token)

1268 ):

1269 # By default, we get a single value spanning the entire line

1270 # (minus continuation line and newline, but we are supposed to

1271 # exclude those)

1272 return self._value_tokens[0].text

1273

1274 return "".join(t.text for t in self._iter_content_tokens())

1275

1276 def iter_parts(self):

1277 # type: () -> Iterable[TokenOrElement]

1278 if self._comment_element:

1279 yield self._comment_element

1280 if self._continuation_line_token:

1281 yield self._continuation_line_token

1282 yield from self._iter_content_parts()

1283 if self._newline_token: 1283 ↛ exitline 1283 didn't return from function 'iter_parts', because the condition on line 1283 was never false

1284 yield self._newline_token

1285

1286 def size(self, *, skip_leading_comments: bool = True) -> Range:

1287 if skip_leading_comments: 1287 ↛ 1288line 1287 didn't jump to line 1288, because the condition on line 1287 was never true

1288 return Range.from_position_and_sizes(

1289 START_POSITION,

1290 (

1291 p.size(skip_leading_comments=False)

1292 for p in self.iter_parts()

1293 if not p.is_comment

1294 ),

1295 )

1296 return super().size(skip_leading_comments=skip_leading_comments)

1297

1298 def position_in_parent(self, *, skip_leading_comments: bool = True) -> Position:

1299 base_pos = super().position_in_parent(skip_leading_comments=False)

1300 if skip_leading_comments:

1301 for p in self.iter_parts():

1302 if p.is_comment:

1303 continue

1304 non_comment_pos = p.position_in_parent(skip_leading_comments=False)

1305 base_pos = non_comment_pos.relative_to(base_pos)

1306 return base_pos

1307

1308

1309class Deb822ValueElement(Deb822Element):

1310 __slots__ = ("_value_entry_elements",)

1311

1312 def __init__(self, value_entry_elements):

1313 # type: (Sequence[Deb822ValueLineElement]) -> None

1314 super().__init__()

1315 # Split over two lines due to line length issues

1316 v = tuple(value_entry_elements)

1317 self._value_entry_elements = v # type: Sequence[Deb822ValueLineElement]

1318 self._init_parent_of_parts()

1319

1320 @property

1321 def value_lines(self):

1322 # type: () -> Sequence[Deb822ValueLineElement]

1323 """Read-only list of value entries"""

1324 return self._value_entry_elements

1325

1326 def iter_parts(self):

1327 # type: () -> Iterable[TokenOrElement]

1328 yield from self._value_entry_elements

1329

1330 def add_final_newline_if_missing(self):

1331 # type: () -> bool

1332 if self._value_entry_elements:

1333 changed = self._value_entry_elements[-1].add_newline_if_missing()

1334 if changed:

1335 self._full_size_cache = None

1336 return changed

1337 return False

1338

1339

1340class Deb822ParsedValueElement(Deb822Element):

1341

1342 __slots__ = ("_text_cached", "_text_no_comments_cached", "_token_list")

1343

1344 def __init__(self, tokens):

1345 # type: (List[Deb822Token]) -> None

1346 super().__init__()

1347 self._token_list = tokens

1348 self._init_parent_of_parts()

1349 if not isinstance(tokens[0], Deb822ValueToken) or not isinstance( 1349 ↛ 1352line 1349 didn't jump to line 1352, because the condition on line 1349 was never true

1350 tokens[-1], Deb822ValueToken

1351 ):

1352 raise ValueError(

1353 self.__class__.__name__ + " MUST start and end on a Deb822ValueToken"

1354 )

1355 if len(tokens) == 1: 1355 ↛ 1360line 1355 didn't jump to line 1360, because the condition on line 1355 was never false

1356 token = tokens[0]

1357 self._text_cached = token.text # type: Optional[str]

1358 self._text_no_comments_cached = token.text # type: Optional[str]

1359 else:

1360 self._text_cached = None

1361 self._text_no_comments_cached = None

1362

1363 def convert_to_text(self):

1364 # type: () -> str

1365 if self._text_no_comments_cached is None: 1365 ↛ 1366line 1365 didn't jump to line 1366, because the condition on line 1365 was never true

1366 self._text_no_comments_cached = super().convert_to_text()

1367 return self._text_no_comments_cached

1368

1369 def convert_to_text_without_comments(self):

1370 # type: () -> str

1371 if self._text_no_comments_cached is None: 1371 ↛ 1372line 1371 didn't jump to line 1372, because the condition on line 1371 was never true

1372 self._text_no_comments_cached = "".join(

1373 t.text for t in self.iter_tokens() if not t.is_comment

1374 )

1375 return self._text_no_comments_cached

1376

1377 def iter_parts(self):

1378 # type: () -> Iterable[TokenOrElement]

1379 yield from self._token_list

1380

1381

1382class Deb822CommentElement(Deb822Element):

1383 __slots__ = ("_comment_tokens",)

1384

1385 def __init__(self, comment_tokens):

1386 # type: (Sequence[Deb822CommentToken]) -> None

1387 super().__init__()

1388 self._comment_tokens = tuple(

1389 comment_tokens

1390 ) # type: Sequence[Deb822CommentToken]

1391 if not comment_tokens: # pragma: no cover

1392 raise ValueError("Comment elements must have at least one comment token")

1393 self._init_parent_of_parts()

1394

1395 @property

1396 def is_comment(self):

1397 # type: () -> bool

1398 return True

1399

1400 def __len__(self):

1401 # type: () -> int

1402 return len(self._comment_tokens)

1403

1404 def __getitem__(self, item):

1405 # type: (int) -> Deb822CommentToken

1406 return self._comment_tokens[item]

1407

1408 def iter_parts(self):

1409 # type: () -> Iterable[TokenOrElement]

1410 yield from self._comment_tokens

1411

1412

1413class Deb822KeyValuePairElement(Deb822Element):

1414 __slots__ = (

1415 "_comment_element",

1416 "_field_token",

1417 "_separator_token",

1418 "_value_element",

1419 )

1420

1421 def __init__(

1422 self,

1423 comment_element, # type: Optional[Deb822CommentElement]

1424 field_token, # type: Deb822FieldNameToken

1425 separator_token, # type: Deb822FieldSeparatorToken

1426 value_element, # type: Deb822ValueElement

1427 ):

1428 # type: (...) -> None

1429 super().__init__()

1430 self._comment_element = comment_element # type: Optional[Deb822CommentElement]

1431 self._field_token = field_token # type: Deb822FieldNameToken

1432 self._separator_token = separator_token # type: Deb822FieldSeparatorToken

1433 self._value_element = value_element # type: Deb822ValueElement

1434 self._init_parent_of_parts()

1435

1436 @property

1437 def field_name(self):

1438 # type: () -> _strI

1439 return self.field_token.text

1440

1441 @property

1442 def field_token(self):

1443 # type: () -> Deb822FieldNameToken

1444 return self._field_token

1445

1446 @property

1447 def value_element(self):

1448 # type: () -> Deb822ValueElement

1449 return self._value_element

1450

1451 @value_element.setter

1452 def value_element(self, new_value):

1453 # type: (Deb822ValueElement) -> None

1454 self._full_size_cache = None

1455 self._value_element.clear_parent_if_parent(self)

1456 self._value_element = new_value

1457 new_value.parent_element = self

1458

1459 def interpret_as(

1460 self,

1461 interpreter, # type: Interpretation[T]

1462 discard_comments_on_read=True, # type: bool

1463 ):

1464 # type: (...) -> T

1465 return interpreter.interpret(

1466 self, discard_comments_on_read=discard_comments_on_read

1467 )

1468

1469 @property

1470 def comment_element(self):

1471 # type: () -> Optional[Deb822CommentElement]

1472 return self._comment_element

1473

1474 @comment_element.setter

1475 def comment_element(self, value):

1476 # type: (Optional[Deb822CommentElement]) -> None

1477 self._full_size_cache = None

1478 if value is not None: 1478 ↛ 1479line 1478 didn't jump to line 1479, because the condition on line 1478 was never true

1479 if not value[-1].text.endswith("\n"):

1480 raise ValueError("Field comments must end with a newline")

1481 if self._comment_element: 1481 ↛ 1482line 1481 didn't jump to line 1482, because the condition on line 1481 was never true

1482 self._comment_element.clear_parent_if_parent(self)

1483 if value is not None: 1483 ↛ 1484line 1483 didn't jump to line 1484, because the condition on line 1483 was never true

1484 value.parent_element = self

1485 self._comment_element = value

1486

1487 def iter_parts(self):

1488 # type: () -> Iterable[TokenOrElement]

1489 if self._comment_element:

1490 yield self._comment_element

1491 yield self._field_token

1492 yield self._separator_token

1493 yield self._value_element

1494

1495 def position_in_parent(

1496 self,

1497 *,

1498 skip_leading_comments: bool = True,

1499 ) -> Position:

1500 position = super().position_in_parent(skip_leading_comments=False)

1501 if skip_leading_comments: 1501 ↛ 1505line 1501 didn't jump to line 1505, because the condition on line 1501 was never false

1502 if self._comment_element:

1503 field_pos = self._field_token.position_in_parent()

1504 position = field_pos.relative_to(position)

1505 return position

1506

1507 def size(self, *, skip_leading_comments: bool = True) -> Range:

1508 if skip_leading_comments:

1509 return Range.from_position_and_sizes(

1510 START_POSITION,

1511 (

1512 p.size(skip_leading_comments=False)

1513 for p in self.iter_parts()

1514 if not p.is_comment

1515 ),

1516 )

1517 return super().size(skip_leading_comments=False)

1518

1519

1520def _format_comment(c):

1521 # type: (str) -> str

1522 if c == "": 1522 ↛ 1524line 1522 didn't jump to line 1524, because the condition on line 1522 was never true

1523 # Special-case: Empty strings are mapped to an empty comment line

1524 return "#\n"

1525 if "\n" in c[:-1]: 1525 ↛ 1526line 1525 didn't jump to line 1526, because the condition on line 1525 was never true

1526 raise ValueError("Comment lines must not have embedded newlines")

1527 if not c.endswith("\n"): 1527 ↛ 1529line 1527 didn't jump to line 1529, because the condition on line 1527 was never false

1528 c = c.rstrip() + "\n"

1529 if not c.startswith("#"): 1529 ↛ 1531line 1529 didn't jump to line 1531, because the condition on line 1529 was never false

1530 c = "# " + c.lstrip()

1531 return c

1532

1533

1534def _unpack_key(

1535 item, # type: ParagraphKey

1536 raise_if_indexed=False, # type: bool

1537):

1538 # type: (...) -> Tuple[_strI, Optional[int], Optional[Deb822FieldNameToken]]

1539 index = None # type: Optional[int]

1540 name_token = None # type: Optional[Deb822FieldNameToken]

1541 if isinstance(item, tuple):

1542 key, index = item

1543 if raise_if_indexed: 1543 ↛ 1550line 1543 didn't jump to line 1550, because the condition on line 1543 was never false

1544 # Fudge "(key, 0)" into a "key" callers to defensively support

1545 # both paragraph styles with the same key.

1546 if index != 0: 1546 ↛ 1547line 1546 didn't jump to line 1547, because the condition on line 1546 was never true

1547 msg = 'Cannot resolve key "{key}" with index {index}. The key is not indexed'

1548 raise KeyError(msg.format(key=key, index=index))

1549 index = None

1550 key = _strI(key)

1551 else:

1552 index = None

1553 if isinstance(item, Deb822FieldNameToken): 1553 ↛ 1554line 1553 didn't jump to line 1554, because the condition on line 1553 was never true

1554 name_token = item

1555 key = name_token.text

1556 else:

1557 key = _strI(item)

1558

1559 return key, index, name_token

1560

1561

1562def _convert_value_lines_to_lines(

1563 value_lines, # type: Iterable[Deb822ValueLineElement]

1564 strip_comments, # type: bool

1565):

1566 # type: (...) -> Iterable[str]

1567 if not strip_comments: 1567 ↛ 1568line 1567 didn't jump to line 1568, because the condition on line 1567 was never true

1568 yield from (v.convert_to_text() for v in value_lines)

1569 else:

1570 for element in value_lines:

1571 yield "".join(x.text for x in element.iter_tokens() if not x.is_comment)

1572

1573

1574if sys.version_info >= (3, 9) or TYPE_CHECKING: 1574 ↛ 1579line 1574 didn't jump to line 1579, because the condition on line 1574 was never false

1575 _ParagraphMapping_Base = collections.abc.Mapping[ParagraphKey, T]

1576else:

1577 # Python 3.5 - 3.8 compat - we are not allowed to subscript the abc.Iterator

1578 # - use this little hack to work around it

1579 class _ParagraphMapping_Base(collections.abc.Mapping, Generic[T], ABC):

1580 pass

1581

1582

1583# Deb822ParagraphElement uses this Mixin (by having `_paragraph` return self).

1584# Therefore, the Mixin needs to call the "proper" methods on the paragraph to

1585# avoid doing infinite recursion.

1586class AutoResolvingMixin(Generic[T], _ParagraphMapping_Base[T]):

1587

1588 @property

1589 def _auto_resolve_ambiguous_fields(self):

1590 # type: () -> bool

1591 return True

1592

1593 @property

1594 def _paragraph(self):

1595 # type: () -> Deb822ParagraphElement

1596 raise NotImplementedError # pragma: no cover

1597

1598 def __len__(self):

1599 # type: () -> int

1600 return self._paragraph.kvpair_count

1601

1602 def __contains__(self, item):

1603 # type: (object) -> bool

1604 return self._paragraph.contains_kvpair_element(item)

1605

1606 def __iter__(self):

1607 # type: () -> Iterator[ParagraphKey]

1608 return iter(self._paragraph.iter_keys())

1609

1610 def __getitem__(self, item):

1611 # type: (ParagraphKey) -> T

1612 if self._auto_resolve_ambiguous_fields and isinstance(item, str):

1613 v = self._paragraph.get_kvpair_element((item, 0))

1614 else:

1615 v = self._paragraph.get_kvpair_element(item)

1616 assert v is not None

1617 return self._interpret_value(item, v)

1618

1619 def __delitem__(self, item):

1620 # type: (ParagraphKey) -> None

1621 self._paragraph.remove_kvpair_element(item)

1622

1623 def _interpret_value(self, key, value):

1624 # type: (ParagraphKey, Deb822KeyValuePairElement) -> T

1625 raise NotImplementedError # pragma: no cover

1626

1627

1628# Deb822ParagraphElement uses this Mixin (by having `_paragraph` return self).

1629# Therefore, the Mixin needs to call the "proper" methods on the paragraph to

1630# avoid doing infinite recursion.

1631class Deb822ParagraphToStrWrapperMixin(AutoResolvingMixin[str], ABC):

1632

1633 @property

1634 def _auto_map_initial_line_whitespace(self):

1635 # type: () -> bool

1636 return True

1637

1638 @property

1639 def _discard_comments_on_read(self):

1640 # type: () -> bool

1641 return True

1642

1643 @property

1644 def _auto_map_final_newline_in_multiline_values(self):

1645 # type: () -> bool

1646 return True

1647

1648 @property

1649 def _preserve_field_comments_on_field_updates(self):

1650 # type: () -> bool

1651 return True

1652

1653 def _convert_value_to_str(self, kvpair_element):

1654 # type: (Deb822KeyValuePairElement) -> str

1655 value_element = kvpair_element.value_element

1656 value_entries = value_element.value_lines

1657 if len(value_entries) == 1:

1658 # Special case single line entry (e.g. "Package: foo") as they never

1659 # have comments and we can do some parts more efficient.

1660 value_entry = value_entries[0]

1661 t = value_entry.convert_to_text()

1662 if self._auto_map_initial_line_whitespace:

1663 t = t.strip()

1664 return t

1665

1666 if self._auto_map_initial_line_whitespace or self._discard_comments_on_read:

1667 converter = _convert_value_lines_to_lines(

1668 value_entries,

1669 self._discard_comments_on_read,

1670 )

1671

1672 auto_map_space = self._auto_map_initial_line_whitespace

1673

1674 # Because we know there are more than one line, we can unconditionally inject

1675 # the newline after the first line

1676 as_text = "".join(

1677 line.strip() + "\n" if auto_map_space and i == 1 else line

1678 for i, line in enumerate(converter, start=1)

1679 )

1680 else:

1681 # No rewrite necessary.

1682 as_text = value_element.convert_to_text()

1683

1684 if self._auto_map_final_newline_in_multiline_values and as_text[-1] == "\n":

1685 as_text = as_text[:-1]

1686 return as_text

1687

1688 def __setitem__(self, item, value):

1689 # type: (ParagraphKey, str) -> None

1690 keep_comments = (

1691 self._preserve_field_comments_on_field_updates

1692 ) # type: Optional[bool]

1693 comment = None

1694 if keep_comments and self._auto_resolve_ambiguous_fields:

1695 # For ambiguous fields, we have to resolve the original field as

1696 # the set_field_* methods do not cope with ambiguous fields. This

1697 # means we might as well clear the keep_comments flag as we have

1698 # resolved the comment.

1699 keep_comments = None

1700 key_lookup = item

1701 if isinstance(item, str): 1701 ↛ 1703line 1701 didn't jump to line 1703, because the condition on line 1701 was never false

1702 key_lookup = (item, 0)

1703 orig_kvpair = self._paragraph.get_kvpair_element(key_lookup, use_get=True)

1704 if orig_kvpair is not None:

1705 comment = orig_kvpair.comment_element

1706

1707 if self._auto_map_initial_line_whitespace:

1708 try:

1709 idx = value.index("\n")

1710 except ValueError:

1711 idx = -1

1712 if idx == -1 or idx == len(value):

1713 self._paragraph.set_field_to_simple_value(

1714 item,

1715 value.strip(),

1716 preserve_original_field_comment=keep_comments,

1717 field_comment=comment,

1718 )

1719 return

1720 # Regenerate the first line with normalized whitespace if necessary

1721 first_line, rest = value.split("\n", 1)

1722 if first_line and first_line[:1] not in ("\t", " "): 1722 ↛ 1723line 1722 didn't jump to line 1723, because the condition on line 1722 was never true

1723 value = "".join((" ", first_line.strip(), "\n", rest))

1724 else:

1725 value = "".join((first_line, "\n", rest))

1726 if not value.endswith("\n"):

1727 if not self._auto_map_final_newline_in_multiline_values: 1727 ↛ 1732line 1727 didn't jump to line 1732, because the condition on line 1727 was never false

1728 raise ValueError(

1729 "Values must end with a newline (or be single line"

1730 " values and use the auto whitespace mapping feature)"

1731 )

1732 value += "\n"

1733 self._paragraph.set_field_from_raw_string(

1734 item,

1735 value,

1736 preserve_original_field_comment=keep_comments,

1737 field_comment=comment,

1738 )

1739

1740 def _interpret_value(self, key, value):

1741 # type: (ParagraphKey, Deb822KeyValuePairElement) -> str

1742 # mypy is a bit dense and cannot see that T == str

1743 return self._convert_value_to_str(value)

1744

1745

1746class AbstractDeb822ParagraphWrapper(AutoResolvingMixin[T], ABC):

1747

1748 def __init__(

1749 self,

1750 paragraph, # type: Deb822ParagraphElement

1751 *,

1752 auto_resolve_ambiguous_fields=False, # type: bool

1753 discard_comments_on_read=True, # type: bool

1754 ):

1755 # type: (...) -> None

1756 self.__paragraph = paragraph

1757 self.__auto_resolve_ambiguous_fields = auto_resolve_ambiguous_fields

1758 self.__discard_comments_on_read = discard_comments_on_read

1759

1760 @property

1761 def _paragraph(self):

1762 # type: () -> Deb822ParagraphElement

1763 return self.__paragraph

1764

1765 @property

1766 def _discard_comments_on_read(self):

1767 # type: () -> bool

1768 return self.__discard_comments_on_read

1769

1770 @property

1771 def _auto_resolve_ambiguous_fields(self):

1772 # type: () -> bool

1773 return self.__auto_resolve_ambiguous_fields

1774

1775

1776class Deb822InterpretingParagraphWrapper(AbstractDeb822ParagraphWrapper[T]):

1777

1778 def __init__(

1779 self,

1780 paragraph, # type: Deb822ParagraphElement

1781 interpretation, # type: Interpretation[T]

1782 *,

1783 auto_resolve_ambiguous_fields=False, # type: bool

1784 discard_comments_on_read=True, # type: bool

1785 ):

1786 # type: (...) -> None

1787 super().__init__(

1788 paragraph,

1789 auto_resolve_ambiguous_fields=auto_resolve_ambiguous_fields,

1790 discard_comments_on_read=discard_comments_on_read,

1791 )

1792 self._interpretation = interpretation

1793

1794 def _interpret_value(self, key, value):

1795 # type: (ParagraphKey, Deb822KeyValuePairElement) -> T

1796 return self._interpretation.interpret(value)

1797

1798

1799class Deb822DictishParagraphWrapper(

1800 AbstractDeb822ParagraphWrapper[str], Deb822ParagraphToStrWrapperMixin

1801):

1802

1803 def __init__(

1804 self,

1805 paragraph, # type: Deb822ParagraphElement

1806 *,

1807 discard_comments_on_read=True, # type: bool

1808 auto_map_initial_line_whitespace=True, # type: bool

1809 auto_resolve_ambiguous_fields=False, # type: bool

1810 preserve_field_comments_on_field_updates=True, # type: bool

1811 auto_map_final_newline_in_multiline_values=True, # type: bool

1812 ):

1813 # type: (...) -> None

1814 super().__init__(

1815 paragraph,

1816 auto_resolve_ambiguous_fields=auto_resolve_ambiguous_fields,

1817 discard_comments_on_read=discard_comments_on_read,

1818 )

1819 self.__auto_map_initial_line_whitespace = auto_map_initial_line_whitespace

1820 self.__preserve_field_comments_on_field_updates = (

1821 preserve_field_comments_on_field_updates

1822 )

1823 self.__auto_map_final_newline_in_multiline_values = (

1824 auto_map_final_newline_in_multiline_values

1825 )

1826

1827 @property

1828 def _auto_map_initial_line_whitespace(self):

1829 # type: () -> bool

1830 return self.__auto_map_initial_line_whitespace

1831

1832 @property

1833 def _preserve_field_comments_on_field_updates(self):

1834 # type: () -> bool

1835 return self.__preserve_field_comments_on_field_updates

1836

1837 @property

1838 def _auto_map_final_newline_in_multiline_values(self):

1839 # type: () -> bool

1840 return self.__auto_map_final_newline_in_multiline_values

1841

1842

1843class Deb822ParagraphElement(Deb822Element, Deb822ParagraphToStrWrapperMixin, ABC):

1844

1845 @classmethod

1846 def new_empty_paragraph(cls):

1847 # type: () -> Deb822ParagraphElement

1848 return Deb822NoDuplicateFieldsParagraphElement([], OrderedSet())

1849

1850 @classmethod

1851 def from_dict(cls, mapping):

1852 # type: (Mapping[str, str]) -> Deb822ParagraphElement

1853 paragraph = cls.new_empty_paragraph()

1854 for k, v in mapping.items():

1855 paragraph[k] = v

1856 return paragraph

1857

1858 @classmethod

1859 def from_kvpairs(cls, kvpair_elements):

1860 # type: (List[Deb822KeyValuePairElement]) -> Deb822ParagraphElement

1861 if not kvpair_elements: 1861 ↛ 1862line 1861 didn't jump to line 1862, because the condition on line 1861 was never true

1862 raise ValueError(

1863 "A paragraph must consist of at least one field/value pair"

1864 )

1865 kvpair_order = OrderedSet(kv.field_name for kv in kvpair_elements)

1866 if len(kvpair_order) == len(kvpair_elements): 1866 ↛ 1875line 1866 didn't jump to line 1875, because the condition on line 1866 was never false

1867 # Each field occurs at most once, which is good because that

1868 # means it is a valid paragraph and we can use the optimized

1869 # implementation.

1870 return Deb822NoDuplicateFieldsParagraphElement(

1871 kvpair_elements, kvpair_order

1872 )

1873 # Fallback implementation, that can cope with the repeated field names

1874 # at the cost of complexity.

1875 return Deb822DuplicateFieldsParagraphElement(kvpair_elements)

1876

1877 @property

1878 def has_duplicate_fields(self):

1879 # type: () -> bool

1880 """Tell whether this paragraph has duplicate fields"""

1881 return False

1882

1883 def as_interpreted_dict_view(

1884 self,

1885 interpretation, # type: Interpretation[T]

1886 *,

1887 auto_resolve_ambiguous_fields=True, # type: bool

1888 ):

1889 # type: (...) -> Deb822InterpretingParagraphWrapper[T]

1890 r"""Provide a Dict-like view of the paragraph

1891

1892 This method returns a dict-like object representing this paragraph and

1893 is useful for accessing fields in a given interpretation. It is possible

1894 to use multiple versions of this dict-like view with different interpretations

1895 on the same paragraph at the same time (for different fields).

1896

1897 >>> example_deb822_paragraph = '''

1898 ... Package: foo

1899 ... # Field comment (because it becomes just before a field)

1900 ... Architecture: amd64

1901 ... # Inline comment (associated with the next line)

1902 ... i386

1903 ... # We also support arm

1904 ... arm64

1905 ... armel

1906 ... '''

1907 >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines())

1908 >>> paragraph = next(iter(dfile))

1909 >>> list_view = paragraph.as_interpreted_dict_view(LIST_SPACE_SEPARATED_INTERPRETATION)

1910 >>> # With the defaults, you only deal with the semantic values

1911 >>> # - no leading or trailing whitespace on the first part of the value

1912 >>> list(list_view["Package"])

1913 ['foo']

1914 >>> with list_view["Architecture"] as arch_list:

1915 ... orig_arch_list = list(arch_list)

1916 ... arch_list.replace('i386', 'kfreebsd-amd64')

1917 >>> orig_arch_list

1918 ['amd64', 'i386', 'arm64', 'armel']

1919 >>> list(list_view["Architecture"])

1920 ['amd64', 'kfreebsd-amd64', 'arm64', 'armel']

1921 >>> print(paragraph.dump(), end='')

1922 Package: foo

1923 # Field comment (because it becomes just before a field)

1924 Architecture: amd64

1925 # Inline comment (associated with the next line)

1926 kfreebsd-amd64

1927 # We also support arm

1928 arm64

1929 armel

1930 >>> # Format preserved and architecture replaced

1931 >>> with list_view["Architecture"] as arch_list:

1932 ... # Prettify the result as sorting will cause awkward whitespace

1933 ... arch_list.reformat_when_finished()

1934 ... arch_list.sort()

1935 >>> print(paragraph.dump(), end='')

1936 Package: foo

1937 # Field comment (because it becomes just before a field)

1938 Architecture: amd64

1939 # We also support arm

1940 arm64

1941 armel

1942 # Inline comment (associated with the next line)

1943 kfreebsd-amd64

1944 >>> list(list_view["Architecture"])

1945 ['amd64', 'arm64', 'armel', 'kfreebsd-amd64']

1946 >>> # Format preserved and architecture values sorted

1947

1948 :param interpretation: Decides how the field values are interpreted. As an example,

1949 use LIST_SPACE_SEPARATED_INTERPRETATION for fields such as Architecture in the

1950 debian/control file.

1951 :param auto_resolve_ambiguous_fields: This parameter is only relevant for paragraphs

1952 that contain the same field multiple times (these are generally invalid). If the

1953 caller requests an ambiguous field from an invalid paragraph via a plain field name,

1954 the return dict-like object will refuse to resolve the field (not knowing which

1955 version to pick). This parameter (if set to True) instead changes the error into

1956 assuming the caller wants the *first* variant.

1957 """

1958 return Deb822InterpretingParagraphWrapper(

1959 self,

1960 interpretation,

1961 auto_resolve_ambiguous_fields=auto_resolve_ambiguous_fields,

1962 )

1963

1964 def configured_view(

1965 self,

1966 *,

1967 discard_comments_on_read=True, # type: bool

1968 auto_map_initial_line_whitespace=True, # type: bool

1969 auto_resolve_ambiguous_fields=True, # type: bool

1970 preserve_field_comments_on_field_updates=True, # type: bool

1971 auto_map_final_newline_in_multiline_values=True, # type: bool

1972 ):

1973 # type: (...) -> Deb822DictishParagraphWrapper

1974 r"""Provide a Dict[str, str]-like view of this paragraph with non-standard parameters

1975

1976 This method returns a dict-like object representing this paragraph that is

1977 optionally configured differently from the default view.

1978

1979 >>> example_deb822_paragraph = '''

1980 ... Package: foo

1981 ... # Field comment (because it becomes just before a field)

1982 ... Depends: libfoo,

1983 ... # Inline comment (associated with the next line)

1984 ... libbar,

1985 ... '''

1986 >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines())

1987 >>> paragraph = next(iter(dfile))

1988 >>> # With the defaults, you only deal with the semantic values

1989 >>> # - no leading or trailing whitespace on the first part of the value

1990 >>> paragraph["Package"]

1991 'foo'

1992 >>> # - no inline comments in multiline values (but whitespace will be present

1993 >>> # subsequent lines.)

1994 >>> print(paragraph["Depends"])

1995 libfoo,

1996 libbar,

1997 >>> paragraph['Foo'] = 'bar'

1998 >>> paragraph.get('Foo')

1999 'bar'

2000 >>> paragraph.get('Unknown-Field') is None

2001 True

2002 >>> # But you get asymmetric behaviour with set vs. get

2003 >>> paragraph['Foo'] = ' bar\n'

2004 >>> paragraph['Foo']

2005 'bar'

2006 >>> paragraph['Bar'] = ' bar\n#Comment\n another value\n'

2007 >>> # Note that the whitespace on the first line has been normalized.

2008 >>> print("Bar: " + paragraph['Bar'])

2009 Bar: bar

2010 another value

2011 >>> # The comment is present (in case you where wondering)

2012 >>> print(paragraph.get_kvpair_element('Bar').convert_to_text(), end='')

2013 Bar: bar

2014 #Comment

2015 another value

2016 >>> # On the other hand, you can choose to see the values as they are

2017 >>> # - We will just reset the paragraph as a "nothing up my sleeve"

2018 >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines())

2019 >>> paragraph = next(iter(dfile))

2020 >>> nonstd_dictview = paragraph.configured_view(

2021 ... discard_comments_on_read=False,

2022 ... auto_map_initial_line_whitespace=False,

2023 ... # For paragraphs with duplicate fields, you can choose to get an error

2024 ... # rather than the dict picking the first value available.

2025 ... auto_resolve_ambiguous_fields=False,

2026 ... auto_map_final_newline_in_multiline_values=False,

2027 ... )

2028 >>> # Because we have reset the state, Foo and Bar are no longer there.

2029 >>> 'Bar' not in paragraph and 'Foo' not in paragraph

2030 True

2031 >>> # We can now see the comments (discard_comments_on_read=False)

2032 >>> # (The leading whitespace in front of "libfoo" is due to

2033 >>> # auto_map_initial_line_whitespace=False)

2034 >>> print(nonstd_dictview["Depends"], end='')

2035 libfoo,

2036 # Inline comment (associated with the next line)

2037 libbar,

2038 >>> # And all the optional whitespace on the first value line

2039 >>> # (auto_map_initial_line_whitespace=False)

2040 >>> nonstd_dictview["Package"] == ' foo\n'

2041 True

2042 >>> # ... which will give you symmetric behaviour with set vs. get

2043 >>> nonstd_dictview['Foo'] = ' bar \n'

2044 >>> nonstd_dictview['Foo']

2045 ' bar \n'

2046 >>> nonstd_dictview['Bar'] = ' bar \n#Comment\n another value\n'

2047 >>> nonstd_dictview['Bar']

2048 ' bar \n#Comment\n another value\n'

2049 >>> # But then you get no help either.

2050 >>> try:

2051 ... nonstd_dictview["Baz"] = "foo"

2052 ... except ValueError:

2053 ... print("Rejected")

2054 Rejected

2055 >>> # With auto_map_initial_line_whitespace=False, you have to include minimum a newline

2056 >>> nonstd_dictview["Baz"] = "foo\n"

2057 >>> # The absence of leading whitespace gives you the terse variant at the expensive

2058 >>> # readability

2059 >>> paragraph.get_kvpair_element('Baz').convert_to_text()

2060 'Baz:foo\n'

2061 >>> # But because they are views, changes performed via one view is visible in the other

2062 >>> paragraph['Foo']

2063 'bar'

2064 >>> # The views show the values according to their own rules. Therefore, there is an

2065 >>> # asymmetric between paragraph['Foo'] and nonstd_dictview['Foo']

2066 >>> # Nevertheless, you can read or write the fields via either - enabling you to use

2067 >>> # the view that best suit your use-case for the given field.

2068 >>> 'Baz' in paragraph and nonstd_dictview.get('Baz') is not None

2069 True

2070 >>> # Deletion via the view also works

2071 >>> del nonstd_dictview['Baz']

2072 >>> 'Baz' not in paragraph and nonstd_dictview.get('Baz') is None

2073 True

2074

2075

2076 :param discard_comments_on_read: When getting a field value from the dict,

2077 this parameter decides how in-line comments are handled. When setting

2078 the value, inline comments are still allowed and will be retained.

2079 However, keep in mind that this option makes getter and setter asymmetric

2080 as a "get" following a "set" with inline comments will omit the comments

2081 even if they are there (see the code example).

2082 :param auto_map_initial_line_whitespace: Special-case the first value line

2083 by trimming unnecessary whitespace leaving only the value. For single-line

2084 values, all space including newline is pruned. For multi-line values, the

2085 newline is preserved / needed to distinguish the first line from the

2086 following lines. When setting a value, this option normalizes the

2087 whitespace of the initial line of the value field.

2088 When this option is set to True makes the dictionary behave more like the

2089 original Deb822 module.

2090 :param preserve_field_comments_on_field_updates: Whether to preserve the field

2091 comments when mutating the field.

2092 :param auto_resolve_ambiguous_fields: This parameter is only relevant for paragraphs

2093 that contain the same field multiple times (these are generally invalid). If the

2094 caller requests an ambiguous field from an invalid paragraph via a plain field name,

2095 the return dict-like object will refuse to resolve the field (not knowing which

2096 version to pick). This parameter (if set to True) instead changes the error into

2097 assuming the caller wants the *first* variant.

2098 :param auto_map_final_newline_in_multiline_values: This parameter controls whether

2099 a multiline field with have / need a trailing newline. If True, the trailing

2100 newline is hidden on get and automatically added in set (if missing).

2101 When this option is set to True makes the dictionary behave more like the

2102 original Deb822 module.

2103 """

2104 return Deb822DictishParagraphWrapper(

2105 self,

2106 discard_comments_on_read=discard_comments_on_read,

2107 auto_map_initial_line_whitespace=auto_map_initial_line_whitespace,

2108 auto_resolve_ambiguous_fields=auto_resolve_ambiguous_fields,

2109 preserve_field_comments_on_field_updates=preserve_field_comments_on_field_updates,

2110 auto_map_final_newline_in_multiline_values=auto_map_final_newline_in_multiline_values,

2111 )

2112

2113 @property

2114 def _paragraph(self):

2115 # type: () -> Deb822ParagraphElement

2116 return self

2117

2118 def order_last(self, field):

2119 # type: (ParagraphKey) -> None

2120 """Re-order the given field so it is "last" in the paragraph"""

2121 raise NotImplementedError # pragma: no cover

2122

2123 def order_first(self, field):

2124 # type: (ParagraphKey) -> None

2125 """Re-order the given field so it is "first" in the paragraph"""

2126 raise NotImplementedError # pragma: no cover

2127

2128 def order_before(self, field, reference_field):

2129 # type: (ParagraphKey, ParagraphKey) -> None

2130 """Re-order the given field so appears directly after the reference field in the paragraph

2131

2132 The reference field must be present."""

2133 raise NotImplementedError # pragma: no cover

2134

2135 def order_after(self, field, reference_field):

2136 # type: (ParagraphKey, ParagraphKey) -> None

2137 """Re-order the given field so appears directly before the reference field in the paragraph

2138

2139 The reference field must be present.

2140 """

2141 raise NotImplementedError # pragma: no cover

2142

2143 @property

2144 def kvpair_count(self):

2145 # type: () -> int

2146 raise NotImplementedError # pragma: no cover

2147

2148 def iter_keys(self):

2149 # type: () -> Iterable[ParagraphKey]

2150 raise NotImplementedError # pragma: no cover

2151

2152 def contains_kvpair_element(self, item):

2153 # type: (object) -> bool

2154 raise NotImplementedError # pragma: no cover

2155

2156 def get_kvpair_element(

2157 self,

2158 item, # type: ParagraphKey

2159 use_get=False, # type: bool

2160 ):

2161 # type: (...) -> Optional[Deb822KeyValuePairElement]

2162 raise NotImplementedError # pragma: no cover

2163

2164 def set_kvpair_element(self, key, value):

2165 # type: (ParagraphKey, Deb822KeyValuePairElement) -> None

2166 raise NotImplementedError # pragma: no cover

2167

2168 def remove_kvpair_element(self, key):

2169 # type: (ParagraphKey) -> None

2170 raise NotImplementedError # pragma: no cover

2171

2172 def sort_fields(

2173 self, key=None # type: Optional[Callable[[str], Any]]

2174 ):

2175 # type: (...) -> None

2176 """Re-order all fields

2177

2178 :param key: Provide a key function (same semantics as for sorted). Keep in mind that

2179 the module preserve the cases for field names - in generally, callers are recommended

2180 to use "lower()" to normalize the case.

2181 """

2182 raise NotImplementedError # pragma: no cover

2183

2184 def set_field_to_simple_value(

2185 self,

2186 item, # type: ParagraphKey

2187 simple_value, # type: str

2188 *,

2189 preserve_original_field_comment=None, # type: Optional[bool]

2190 field_comment=None, # type: Optional[Commentish]

2191 ):

2192 # type: (...) -> None

2193 r"""Sets a field in this paragraph to a simple "word" or "phrase"

2194

2195 In many cases, it is better for callers to just use the paragraph as

2196 if it was a dictionary. However, this method does enable to you choose

2197 the field comment (if any), which can be a reason for using it.

2198

2199 This is suitable for "simple" fields like "Package". Example:

2200

2201 >>> example_deb822_paragraph = '''

2202 ... Package: foo

2203 ... '''

2204 >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines())

2205 >>> p = next(iter(dfile))

2206 >>> p.set_field_to_simple_value("Package", "mscgen")

2207 >>> p.set_field_to_simple_value("Architecture", "linux-any kfreebsd-any",

2208 ... field_comment=['Only ported to linux and kfreebsd'])

2209 >>> p.set_field_to_simple_value("Priority", "optional")

2210 >>> print(p.dump(), end='')

2211 Package: mscgen

2212 # Only ported to linux and kfreebsd

2213 Architecture: linux-any kfreebsd-any

2214 Priority: optional

2215 >>> # Values are formatted nicely by default, but it does not work with

2216 >>> # multi-line values

2217 >>> p.set_field_to_simple_value("Foo", "bar\nbin\n")

2218 Traceback (most recent call last):

2219 ...

2220 ValueError: Cannot use set_field_to_simple_value for values with newlines

2221

2222 :param item: Name of the field to set. If the paragraph already

2223 contains the field, then it will be replaced. If the field exists,

2224 then it will preserve its order in the paragraph. Otherwise, it is

2225 added to the end of the paragraph.

2226 Note this can be a "paragraph key", which enables you to control

2227 *which* instance of a field is being replaced (in case of duplicate

2228 fields).

2229 :param simple_value: The text to use as the value. The value must not

2230 contain newlines. Leading and trailing will be stripped but space

2231 within the value is preserved. The value cannot contain comments

2232 (i.e. if the "#" token appears in the value, then it is considered

2233 a value rather than "start of a comment)

2234 :param preserve_original_field_comment: See the description for the

2235 parameter with the same name in the set_field_from_raw_string method.

2236 :param field_comment: See the description for the parameter with the same

2237 name in the set_field_from_raw_string method.

2238 """

2239 if "\n" in simple_value:

2240 raise ValueError(

2241 "Cannot use set_field_to_simple_value for values with newlines"

2242 )

2243

2244 # Reformat it with a leading space and trailing newline. The latter because it is

2245 # necessary if there are any fields after it and the former because it looks nicer so

2246 # have single space after the field separator

2247 stripped = simple_value.strip()

2248 if stripped: 2248 ↛ 2252line 2248 didn't jump to line 2252, because the condition on line 2248 was never false

2249 raw_value = " " + stripped + "\n"

2250 else:

2251 # Special-case for empty values

2252 raw_value = "\n"

2253 self.set_field_from_raw_string(

2254 item,

2255 raw_value,

2256 preserve_original_field_comment=preserve_original_field_comment,

2257 field_comment=field_comment,

2258 )

2259

2260 def set_field_from_raw_string(

2261 self,

2262 item, # type: ParagraphKey

2263 raw_string_value, # type: str

2264 *,

2265 preserve_original_field_comment=None, # type: Optional[bool]

2266 field_comment=None, # type: Optional[Commentish]

2267 ):

2268 # type: (...) -> None

2269 """Sets a field in this paragraph to a given text value

2270

2271 In many cases, it is better for callers to just use the paragraph as

2272 if it was a dictionary. However, this method does enable to you choose

2273 the field comment (if any) and lets to have a higher degree of control

2274 over whitespace (on the first line), which can be a reason for using it.

2275

2276 Example usage:

2277

2278 >>> example_deb822_paragraph = '''

2279 ... Package: foo

2280 ... '''

2281 >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines())

2282 >>> p = next(iter(dfile))

2283 >>> raw_value = '''

2284 ... Build-Depends: debhelper-compat (= 12),

2285 ... some-other-bd,

2286 ... # Comment

2287 ... another-bd,

2288 ... '''.lstrip() # Remove leading newline, but *not* the trailing newline

2289 >>> fname, new_value = raw_value.split(':', 1)

2290 >>> p.set_field_from_raw_string(fname, new_value)

2291 >>> print(p.dump(), end='')

2292 Package: foo

2293 Build-Depends: debhelper-compat (= 12),

2294 some-other-bd,

2295 # Comment

2296 another-bd,

2297 >>> # Format preserved

2298

2299 :param item: Name of the field to set. If the paragraph already

2300 contains the field, then it will be replaced. Otherwise, it is

2301 added to the end of the paragraph.

2302 Note this can be a "paragraph key", which enables you to control

2303 *which* instance of a field is being replaced (in case of duplicate

2304 fields).

2305 :param raw_string_value: The text to use as the value. The text must

2306 be valid deb822 syntax and is used *exactly* as it is given.

2307 Accordingly, multi-line values must include mandatory leading space

2308 on continuation lines, newlines after the value, etc. On the

2309 flip-side, any optional space or comments will be included.

2310

2311 Note that the first line will *never* be read as a comment (if the

2312 first line of the value starts with a "#" then it will result

2313 in "Field-Name:#..." which is parsed as a value starting with "#"

2314 rather than a comment).

2315 :param preserve_original_field_comment: If True, then if there is an

2316 existing field and that has a comment, then the comment will remain

2317 after this operation. This is the default is the `field_comment`

2318 parameter is omitted.

2319 Note that if the parameter is True and the item is ambiguous, this

2320 will raise an AmbiguousDeb822FieldKeyError. When the parameter is

2321 omitted, the ambiguity is resolved automatically and if the resolved

2322 field has a comment then that will be preserved (assuming

2323 field_comment is None).

2324 :param field_comment: If not None, add or replace the comment for

2325 the field. Each string in the list will become one comment

2326 line (inserted directly before the field name). Will appear in the

2327 same order as they do in the list.

2328

2329 If you want complete control over the formatting of the comments,

2330 then ensure that each line start with "#" and end with "\\n" before

2331 the call. Otherwise, leading/trailing whitespace is normalized

2332 and the missing "#"/"\\n" character is inserted.

2333 """

2334

2335 new_content = [] # type: List[str]

2336 if preserve_original_field_comment is not None:

2337 if field_comment is not None: 2337 ↛ 2338line 2337 didn't jump to line 2338, because the condition on line 2337 was never true

2338 raise ValueError(

2339 'The "preserve_original_field_comment" conflicts with'

2340 ' "field_comment" parameter'

2341 )

2342 elif field_comment is not None:

2343 if not isinstance(field_comment, Deb822CommentElement): 2343 ↛ 2346line 2343 didn't jump to line 2346, because the condition on line 2343 was never false

2344 new_content.extend(_format_comment(x) for x in field_comment)

2345 field_comment = None

2346 preserve_original_field_comment = False

2347

2348 field_name, _, _ = _unpack_key(item)

2349

2350 cased_field_name = field_name

2351 try:

2352 original = self.get_kvpair_element(item, use_get=True)

2353 except AmbiguousDeb822FieldKeyError:

2354 if preserve_original_field_comment:

2355 # If we were asked to preserve the original comment, then we

2356 # require a strict lookup

2357 raise

2358 original = self.get_kvpair_element((field_name, 0), use_get=True)

2359

2360 if preserve_original_field_comment is None:

2361 # We simplify preserve_original_field_comment after the lookup of the field.

2362 # Otherwise, we can get ambiguous key errors when updating an ambiguous field

2363 # when the caller did not explicitly ask for that behaviour.

2364 preserve_original_field_comment = True

2365

2366 if original:

2367 # If we already have the field, then preserve the original case

2368 cased_field_name = original.field_name

2369 raw = ":".join((cased_field_name, raw_string_value))

2370 raw_lines = raw.splitlines(keepends=True)

2371 for i, line in enumerate(raw_lines, start=1):

2372 if not line.endswith("\n"): 2372 ↛ 2373line 2372 didn't jump to line 2373, because the condition on line 2372 was never true

2373 raise ValueError(

2374 "Line {i} in new value was missing trailing newline".format(i=i)

2375 )

2376 if i != 1 and line[0] not in (" ", "\t", "#"): 2376 ↛ 2377line 2376 didn't jump to line 2377

2377 msg = (

2378 "Line {i} in new value was invalid. It must either start"

2379 ' with " " space (continuation line) or "#" (comment line).'

2380 ' The line started with "{line}"'

2381 )

2382 raise ValueError(msg.format(i=i, line=line[0]))

2383 if len(raw_lines) > 1 and raw_lines[-1].startswith("#"): 2383 ↛ 2384line 2383 didn't jump to line 2384, because the condition on line 2383 was never true

2384 raise ValueError("The last line in a value field cannot be a comment")

2385 new_content.extend(raw_lines)

2386 # As absurd as it might seem, it is easier to just use the parser to

2387 # construct the AST correctly

2388 deb822_file = parse_deb822_file(iter(new_content))

2389 error_token = deb822_file.find_first_error_element()

2390 if error_token: 2390 ↛ 2391line 2390 didn't jump to line 2391, because the condition on line 2390 was never true

2391 raise ValueError("Syntax error in new field value for " + field_name)

2392 paragraph = next(iter(deb822_file))

2393 assert isinstance(paragraph, Deb822NoDuplicateFieldsParagraphElement)

2394 value = paragraph.get_kvpair_element(field_name)

2395 assert value is not None

2396 if preserve_original_field_comment:

2397 if original:

2398 value.comment_element = original.comment_element

2399 original.comment_element = None

2400 elif field_comment is not None: 2400 ↛ 2401line 2400 didn't jump to line 2401, because the condition on line 2400 was never true

2401 value.comment_element = field_comment

2402 self.set_kvpair_element(item, value)

2403

2404 @overload

2405 def dump(

2406 self, fd # type: IO[bytes]

2407 ):

2408 # type: (...) -> None

2409 pass

2410

2411 @overload

2412 def dump(self):

2413 # type: () -> str

2414 pass

2415

2416 def dump(

2417 self, fd=None # type: Optional[IO[bytes]]

2418 ):

2419 # type: (...) -> Optional[str]

2420 if fd is None: 2420 ↛ 2422line 2420 didn't jump to line 2422, because the condition on line 2420 was never false

2421 return "".join(t.text for t in self.iter_tokens())

2422 for token in self.iter_tokens():

2423 fd.write(token.text.encode("utf-8"))

2424 return None

2425

2426

2427class Deb822NoDuplicateFieldsParagraphElement(Deb822ParagraphElement):

2428 """Paragraph implementation optimized for valid deb822 files

2429

2430 When there are no duplicated fields, we can use simpler and faster

2431 datastructures for common operations.

2432 """

2433

2434 def __init__(

2435 self,

2436 kvpair_elements, # type: List[Deb822KeyValuePairElement]

2437 kvpair_order, # type: OrderedSet

2438 ):

2439 # type: (...) -> None

2440 super().__init__()

2441 self._kvpair_elements = {kv.field_name: kv for kv in kvpair_elements}

2442 self._kvpair_order = kvpair_order

2443 self._init_parent_of_parts()

2444

2445 @property

2446 def kvpair_count(self):

2447 # type: () -> int

2448 return len(self._kvpair_elements)

2449

2450 def order_last(self, field):

2451 # type: (ParagraphKey) -> None

2452 """Re-order the given field so it is "last" in the paragraph"""

2453 unpacked_field, _, _ = _unpack_key(field, raise_if_indexed=True)

2454 self._kvpair_order.order_last(unpacked_field)

2455

2456 def order_first(self, field):

2457 # type: (ParagraphKey) -> None

2458 """Re-order the given field so it is "first" in the paragraph"""

2459 unpacked_field, _, _ = _unpack_key(field, raise_if_indexed=True)

2460 self._kvpair_order.order_first(unpacked_field)

2461

2462 def order_before(self, field, reference_field):

2463 # type: (ParagraphKey, ParagraphKey) -> None

2464 """Re-order the given field so appears directly after the reference field in the paragraph

2465

2466 The reference field must be present."""

2467 unpacked_field, _, _ = _unpack_key(field, raise_if_indexed=True)

2468 unpacked_ref_field, _, _ = _unpack_key(reference_field, raise_if_indexed=True)

2469 self._kvpair_order.order_before(unpacked_field, unpacked_ref_field)

2470

2471 def order_after(self, field, reference_field):

2472 # type: (ParagraphKey, ParagraphKey) -> None

2473 """Re-order the given field so appears directly before the reference field in the paragraph

2474

2475 The reference field must be present.

2476 """

2477 unpacked_field, _, _ = _unpack_key(field, raise_if_indexed=True)

2478 unpacked_ref_field, _, _ = _unpack_key(reference_field, raise_if_indexed=True)

2479 self._kvpair_order.order_after(unpacked_field, unpacked_ref_field)

2480

2481 # Overload to narrow the type to just str.

2482 def __iter__(self):

2483 # type: () -> Iterator[str]

2484 return iter(str(k) for k in self._kvpair_order)

2485

2486 def iter_keys(self):

2487 # type: () -> Iterable[str]

2488 yield from (str(k) for k in self._kvpair_order)

2489

2490 def remove_kvpair_element(self, key):

2491 # type: (ParagraphKey) -> None

2492 self._full_size_cache = None

2493 key, _, _ = _unpack_key(key, raise_if_indexed=True)

2494 del self._kvpair_elements[key]

2495 self._kvpair_order.remove(key)

2496

2497 def contains_kvpair_element(self, item):

2498 # type: (object) -> bool

2499 if not isinstance(item, (str, tuple, Deb822FieldNameToken)): 2499 ↛ 2500line 2499 didn't jump to line 2500, because the condition on line 2499 was never true

2500 return False

2501 item = cast("ParagraphKey", item)

2502 key, _, _ = _unpack_key(item, raise_if_indexed=True)

2503 return key in self._kvpair_elements

2504

2505 def get_kvpair_element(

2506 self,

2507 item, # type: ParagraphKey

2508 use_get=False, # type: bool

2509 ):

2510 # type: (...) -> Optional[Deb822KeyValuePairElement]

2511 item, _, _ = _unpack_key(item, raise_if_indexed=True)

2512 if use_get:

2513 return self._kvpair_elements.get(item)

2514 return self._kvpair_elements[item]

2515

2516 def set_kvpair_element(self, key, value):

2517 # type: (ParagraphKey, Deb822KeyValuePairElement) -> None

2518 key, _, _ = _unpack_key(key, raise_if_indexed=True)

2519 if isinstance(key, Deb822FieldNameToken): 2519 ↛ 2520line 2519 didn't jump to line 2520, because the condition on line 2519 was never true

2520 if key is not value.field_token:

2521 raise ValueError(

2522 "Key is a Deb822FieldNameToken, but not *the* Deb822FieldNameToken"

2523 " for the value"

2524 )

2525 key = value.field_name

2526 else:

2527 if key != value.field_name: 2527 ↛ 2528line 2527 didn't jump to line 2528, because the condition on line 2527 was never true

2528 raise ValueError(

2529 "Cannot insert value under a different field value than field name"

2530 " from its Deb822FieldNameToken implies"

2531 )

2532 # Use the string from the Deb822FieldNameToken as we need to keep that in memory either

2533 # way

2534 key = value.field_name

2535 original_value = self._kvpair_elements.get(key)

2536 self._full_size_cache = None

2537 self._kvpair_elements[key] = value

2538 self._kvpair_order.append(key)

2539 if original_value is not None:

2540 original_value.parent_element = None

2541 value.parent_element = self

2542

2543 def sort_fields(self, key=None):

2544 # type: (Optional[Callable[[str], Any]]) -> None

2545 """Re-order all fields

2546

2547 :param key: Provide a key function (same semantics as for sorted). Keep in mind that

2548 the module preserve the cases for field names - in generally, callers are recommended

2549 to use "lower()" to normalize the case.

2550 """

2551 for last_field_name in reversed(self._kvpair_order):

2552 last_kvpair = self._kvpair_elements[cast("_strI", last_field_name)]

2553 if last_kvpair.value_element.add_final_newline_if_missing():

2554 self._full_size_cache = None

2555 break

2556

2557 if key is None:

2558 key = default_field_sort_key

2559

2560 self._kvpair_order = OrderedSet(sorted(self._kvpair_order, key=key))

2561

2562 def iter_parts(self):

2563 # type: () -> Iterable[TokenOrElement]

2564 yield from (

2565 self._kvpair_elements[x]

2566 for x in cast("Iterable[_strI]", self._kvpair_order)

2567 )

2568

2569

2570class Deb822DuplicateFieldsParagraphElement(Deb822ParagraphElement):

2571

2572 def __init__(self, kvpair_elements):

2573 # type: (List[Deb822KeyValuePairElement]) -> None

2574 super().__init__()

2575 self._kvpair_order = LinkedList() # type: LinkedList[Deb822KeyValuePairElement]

2576 self._kvpair_elements = {} # type: Dict[_strI, List[KVPNode]]

2577 self._init_kvpair_fields(kvpair_elements)

2578 self._init_parent_of_parts()

2579

2580 @property

2581 def has_duplicate_fields(self):

2582 # type: () -> bool

2583 # Most likely, the answer is "True" but if the caller "fixes" the problem

2584 # then this can return "False"

2585 return len(self._kvpair_order) > len(self._kvpair_elements)

2586

2587 def _init_kvpair_fields(self, kvpairs):

2588 # type: (Iterable[Deb822KeyValuePairElement]) -> None

2589 assert not self._kvpair_order

2590 assert not self._kvpair_elements

2591 for kv in kvpairs:

2592 field_name = kv.field_name

2593 node = self._kvpair_order.append(kv)

2594 if field_name not in self._kvpair_elements:

2595 self._kvpair_elements[field_name] = [node]

2596 else:

2597 self._kvpair_elements[field_name].append(node)

2598

2599 def _nodes_being_relocated(self, field):

2600 # type: (ParagraphKey) -> Tuple[List[KVPNode], List[KVPNode]]

2601 key, index, name_token = _unpack_key(field)

2602 nodes = self._kvpair_elements[key]

2603 nodes_being_relocated = []

2604

2605 if name_token is not None or index is not None:

2606 single_node = self._resolve_to_single_node(nodes, key, index, name_token)

2607 assert single_node is not None

2608 nodes_being_relocated.append(single_node)

2609 else:

2610 nodes_being_relocated = nodes

2611 return nodes, nodes_being_relocated

2612

2613 def order_last(self, field):

2614 # type: (ParagraphKey) -> None

2615 """Re-order the given field so it is "last" in the paragraph"""

2616 nodes, nodes_being_relocated = self._nodes_being_relocated(field)

2617 assert len(nodes_being_relocated) == 1 or len(nodes) == len(

2618 nodes_being_relocated

2619 )

2620

2621 kvpair_order = self._kvpair_order

2622 for node in nodes_being_relocated:

2623 if kvpair_order.tail_node is node:

2624 # Special case for relocating a single node that happens to be the last.

2625 continue

2626 kvpair_order.remove_node(node)

2627 # assertion for mypy

2628 assert kvpair_order.tail_node is not None

2629 kvpair_order.insert_node_after(node, kvpair_order.tail_node)

2630

2631 if (

2632 len(nodes_being_relocated) == 1

2633 and nodes_being_relocated[0] is not nodes[-1]

2634 ):

2635 single_node = nodes_being_relocated[0]

2636 nodes.remove(single_node)

2637 nodes.append(single_node)

2638

2639 def order_first(self, field):

2640 # type: (ParagraphKey) -> None

2641 """Re-order the given field so it is "first" in the paragraph"""

2642 nodes, nodes_being_relocated = self._nodes_being_relocated(field)

2643 assert len(nodes_being_relocated) == 1 or len(nodes) == len(

2644 nodes_being_relocated

2645 )

2646

2647 kvpair_order = self._kvpair_order

2648 for node in nodes_being_relocated:

2649 if kvpair_order.head_node is node:

2650 # Special case for relocating a single node that happens to be the first.

2651 continue

2652 kvpair_order.remove_node(node)

2653 # assertion for mypy

2654 assert kvpair_order.head_node is not None

2655 kvpair_order.insert_node_before(node, kvpair_order.head_node)

2656

2657 if len(nodes_being_relocated) == 1 and nodes_being_relocated[0] is not nodes[0]:

2658 single_node = nodes_being_relocated[0]

2659 nodes.remove(single_node)

2660 nodes.insert(0, single_node)

2661

2662 def order_before(self, field, reference_field):

2663 # type: (ParagraphKey, ParagraphKey) -> None

2664 """Re-order the given field so appears directly after the reference field in the paragraph

2665

2666 The reference field must be present."""

2667 nodes, nodes_being_relocated = self._nodes_being_relocated(field)

2668 assert len(nodes_being_relocated) == 1 or len(nodes) == len(

2669 nodes_being_relocated

2670 )

2671 # For "before" we always use the "first" variant as reference in case of doubt

2672 _, reference_nodes = self._nodes_being_relocated(reference_field)

2673 reference_node = reference_nodes[0]

2674 if reference_node in nodes_being_relocated:

2675 raise ValueError("Cannot re-order a field relative to itself")

2676

2677 kvpair_order = self._kvpair_order

2678 for node in nodes_being_relocated:

2679 kvpair_order.remove_node(node)

2680 kvpair_order.insert_node_before(node, reference_node)

2681

2682 if len(nodes_being_relocated) == 1 and len(nodes) > 1:

2683 # Regenerate the (new) relative field order.

2684 field_name = nodes_being_relocated[0].value.field_name

2685 self._regenerate_relative_kvapir_order(field_name)

2686

2687 def order_after(self, field, reference_field):

2688 # type: (ParagraphKey, ParagraphKey) -> None

2689 """Re-order the given field so appears directly before the reference field in the paragraph

2690

2691 The reference field must be present.

2692 """

2693 nodes, nodes_being_relocated = self._nodes_being_relocated(field)

2694 assert len(nodes_being_relocated) == 1 or len(nodes) == len(

2695 nodes_being_relocated

2696 )

2697 _, reference_nodes = self._nodes_being_relocated(reference_field)

2698 # For "after" we always use the "last" variant as reference in case of doubt

2699 reference_node = reference_nodes[-1]

2700 if reference_node in nodes_being_relocated:

2701 raise ValueError("Cannot re-order a field relative to itself")

2702

2703 kvpair_order = self._kvpair_order

2704 # Use "reversed" to preserve the relative order of the nodes assuming a bulk reorder

2705 for node in reversed(nodes_being_relocated):

2706 kvpair_order.remove_node(node)

2707 kvpair_order.insert_node_after(node, reference_node)

2708

2709 if len(nodes_being_relocated) == 1 and len(nodes) > 1:

2710 # Regenerate the (new) relative field order.

2711 field_name = nodes_being_relocated[0].value.field_name

2712 self._regenerate_relative_kvapir_order(field_name)

2713

2714 def _regenerate_relative_kvapir_order(self, field_name):

2715 # type: (_strI) -> None

2716 nodes = []

2717 for node in self._kvpair_order.iter_nodes():

2718 if node.value.field_name == field_name:

2719 nodes.append(node)

2720 self._kvpair_elements[field_name] = nodes

2721

2722 def iter_parts(self):

2723 # type: () -> Iterable[TokenOrElement]

2724 yield from self._kvpair_order

2725

2726 @property

2727 def kvpair_count(self):

2728 # type: () -> int

2729 return len(self._kvpair_order)

2730

2731 def iter_keys(self):

2732 # type: () -> Iterable[ParagraphKey]

2733 yield from (kv.field_name for kv in self._kvpair_order)

2734

2735 def _resolve_to_single_node(

2736 self,

2737 nodes, # type: List[KVPNode]

2738 key, # type: str

2739 index, # type: Optional[int]

2740 name_token, # type: Optional[Deb822FieldNameToken]

2741 use_get=False, # type: bool

2742 ):

2743 # type: (...) -> Optional[KVPNode]

2744 if index is None:

2745 if len(nodes) != 1:

2746 if name_token is not None:

2747 node = self._find_node_via_name_token(name_token, nodes)

2748 if node is not None:

2749 return node

2750 msg = (

2751 "Ambiguous key {key} - the field appears {res_len} times. Use"

2752 " ({key}, index) to denote which instance of the field you want. (Index"

2753 " can be 0..{res_len_1} or e.g. -1 to denote the last field)"

2754 )

2755 raise AmbiguousDeb822FieldKeyError(

2756 msg.format(key=key, res_len=len(nodes), res_len_1=len(nodes) - 1)

2757 )

2758 index = 0

2759 try:

2760 return nodes[index]

2761 except IndexError:

2762 if use_get:

2763 return None

2764 msg = 'Field "{key}" was present but the index "{index}" was invalid.'

2765 raise KeyError(msg.format(key=key, index=index))

2766

2767 def get_kvpair_element(

2768 self,

2769 item, # type: ParagraphKey

2770 use_get=False, # type: bool

2771 ):

2772 # type: (...) -> Optional[Deb822KeyValuePairElement]

2773 key, index, name_token = _unpack_key(item)

2774 if use_get:

2775 nodes = self._kvpair_elements.get(key)

2776 if nodes is None:

2777 return None

2778 else:

2779 nodes = self._kvpair_elements[key]

2780 node = self._resolve_to_single_node(

2781 nodes, key, index, name_token, use_get=use_get

2782 )

2783 if node is not None:

2784 return node.value

2785 return None

2786

2787 @staticmethod

2788 def _find_node_via_name_token(

2789 name_token, # type: Deb822FieldNameToken

2790 elements, # type: Iterable[KVPNode]

2791 ):

2792 # type: (...) -> Optional[KVPNode]

2793 # if we are given a name token, then it is non-ambiguous if we have exactly

2794 # that name token in our list of nodes. It will be an O(n) lookup but we

2795 # probably do not have that many duplicate fields (and even if do, it is not

2796 # exactly a valid file, so there little reason to optimize for it)

2797 for node in elements:

2798 if name_token is node.value.field_token:

2799 return node

2800 return None

2801

2802 def contains_kvpair_element(self, item):

2803 # type: (object) -> bool

2804 if not isinstance(item, (str, tuple, Deb822FieldNameToken)):

2805 return False

2806 item = cast("ParagraphKey", item)

2807 try:

2808 return self.get_kvpair_element(item, use_get=True) is not None

2809 except AmbiguousDeb822FieldKeyError:

2810 return True

2811

2812 def set_kvpair_element(self, key, value):

2813 # type: (ParagraphKey, Deb822KeyValuePairElement) -> None

2814 key, index, name_token = _unpack_key(key)

2815 if name_token:

2816 if name_token is not value.field_token:

2817 original_nodes = self._kvpair_elements.get(value.field_name)

2818 original_node = None

2819 if original_nodes is not None:

2820 original_node = self._find_node_via_name_token(

2821 name_token, original_nodes

2822 )

2823

2824 if original_node is None:

2825 raise ValueError(

2826 "Key is a Deb822FieldNameToken, but not *the*"

2827 " Deb822FieldNameToken for the value nor the"

2828 " Deb822FieldNameToken for an existing field in the paragraph"

2829 )

2830 # Primarily for mypy's sake

2831 assert original_nodes is not None

2832 # Rely on the index-based code below to handle update.

2833 index = original_nodes.index(original_node)

2834 key = value.field_name

2835 else:

2836 if key != value.field_name:

2837 raise ValueError(

2838 "Cannot insert value under a different field value than field name"

2839 " from its Deb822FieldNameToken implies"

2840 )

2841 # Use the string from the Deb822FieldNameToken as it is a _strI and has the same value

2842 # (memory optimization)

2843 key = value.field_name

2844 self._full_size_cache = None

2845 original_nodes = self._kvpair_elements.get(key)

2846 if original_nodes is None or not original_nodes:

2847 if index is not None and index != 0:

2848 msg = (

2849 "Cannot replace field ({key}, {index}) as the field does not exist"

2850 " in the first place. Please index-less key or ({key}, 0) if you"

2851 " want to add the field."

2852 )

2853 raise KeyError(msg.format(key=key, index=index))

2854 node = self._kvpair_order.append(value)

2855 if key not in self._kvpair_elements:

2856 self._kvpair_elements[key] = [node]

2857 else:

2858 self._kvpair_elements[key].append(node)

2859 return

2860

2861 replace_all = False

2862 if index is None:

2863 replace_all = True

2864 node = original_nodes[0]

2865 if len(original_nodes) != 1:

2866 self._kvpair_elements[key] = [node]

2867 else:

2868 # We insist on there being an original node, which as a side effect ensures

2869 # you cannot add additional copies of the field. This means that you cannot

2870 # make the problem worse.

2871 node = original_nodes[index]

2872

2873 # Replace the value of the existing node plus do a little dance

2874 # for the parent element part.

2875 node.value.parent_element = None

2876 value.parent_element = self

2877 node.value = value

2878

2879 if replace_all and len(original_nodes) != 1:

2880 # If we were in a replace-all mode, discard any remaining nodes

2881 for n in original_nodes[1:]:

2882 n.value.parent_element = None

2883 self._kvpair_order.remove_node(n)

2884

2885 def remove_kvpair_element(self, key):

2886 # type: (ParagraphKey) -> None

2887 key, idx, name_token = _unpack_key(key)

2888 field_list = self._kvpair_elements[key]

2889

2890 if name_token is None and idx is None:

2891 self._full_size_cache = None

2892 # Remove all case

2893 for node in field_list:

2894 node.value.parent_element = None

2895 self._kvpair_order.remove_node(node)

2896 del self._kvpair_elements[key]

2897 return

2898

2899 if name_token is not None:

2900 # Indirection between original_node and node for mypy's sake

2901 original_node = self._find_node_via_name_token(name_token, field_list)

2902 if original_node is None:

2903 msg = 'The field "{key}" is present but key used to access it is not.'

2904 raise KeyError(msg.format(key=key))

2905 node = original_node

2906 else:

2907 assert idx is not None

2908 try:

2909 node = field_list[idx]

2910 except KeyError:

2911 msg = 'The field "{key}" is present, but the index "{idx}" was invalid.'

2912 raise KeyError(msg.format(key=key, idx=idx))

2913

2914 self._full_size_cache = None

2915 if len(field_list) == 1:

2916 del self._kvpair_elements[key]

2917 else:

2918 field_list.remove(node)

2919 node.value.parent_element = None

2920 self._kvpair_order.remove_node(node)

2921

2922 def sort_fields(self, key=None):

2923 # type: (Optional[Callable[[str], Any]]) -> None

2924 """Re-order all fields

2925

2926 :param key: Provide a key function (same semantics as for sorted). Keep in mind that

2927 the module preserve the cases for field names - in generally, callers are recommended

2928 to use "lower()" to normalize the case.

2929 """

2930

2931 if key is None:

2932 key = default_field_sort_key

2933

2934 # Work around mypy that cannot seem to shred the Optional notion

2935 # without this little indirection

2936 key_impl = key

2937

2938 def _actual_key(kvpair):

2939 # type: (Deb822KeyValuePairElement) -> Any

2940 return key_impl(kvpair.field_name)

2941

2942 for last_kvpair in reversed(self._kvpair_order):

2943 if last_kvpair.value_element.add_final_newline_if_missing():

2944 self._full_size_cache = None

2945 break

2946

2947 sorted_kvpair_list = sorted(self._kvpair_order, key=_actual_key)

2948 self._kvpair_order = LinkedList()

2949 self._kvpair_elements = {}

2950 self._init_kvpair_fields(sorted_kvpair_list)

2951

2952

2953class Deb822FileElement(Deb822Element):

2954 """Represents the entire deb822 file"""

2955

2956 def __init__(self, token_and_elements):

2957 # type: (LinkedList[TokenOrElement]) -> None

2958 super().__init__()

2959 self._token_and_elements = token_and_elements

2960 self._init_parent_of_parts()

2961

2962 @classmethod

2963 def new_empty_file(cls):

2964 # type: () -> Deb822FileElement

2965 """Creates a new Deb822FileElement with no contents

2966

2967 Note that a deb822 file must be non-empty to be considered valid

2968 """

2969 return cls(LinkedList())

2970

2971 @property

2972 def is_valid_file(self):

2973 # type: () -> bool

2974 """Returns true if the file is valid

2975

2976 Invalid elements include error elements (Deb822ErrorElement) but also

2977 issues such as paragraphs with duplicate fields or "empty" files

2978 (a valid deb822 file contains at least one paragraph).

2979 """

2980 had_paragraph = False

2981 for paragraph in self:

2982 had_paragraph = True

2983 if not paragraph or paragraph.has_duplicate_fields:

2984 return False

2985

2986 if not had_paragraph:

2987 return False

2988

2989 return self.find_first_error_element() is None

2990

2991 def find_first_error_element(self):

2992 # type: () -> Optional[Deb822ErrorElement]

2993 """Returns the first Deb822ErrorElement (or None) in the file"""

2994 return next(

2995 iter(self.iter_recurse(only_element_or_token_type=Deb822ErrorElement)), None

2996 )

2997

2998 def __iter__(self):

2999 # type: () -> Iterator[Deb822ParagraphElement]

3000 return iter(self.iter_parts_of_type(Deb822ParagraphElement))

3001

3002 def iter_parts(self):

3003 # type: () -> Iterable[TokenOrElement]

3004 yield from self._token_and_elements

3005

3006 def insert(self, idx, para):

3007 # type: (int, Deb822ParagraphElement) -> None

3008 """Inserts a paragraph into the file at the given "index" of paragraphs

3009

3010 Note that if the index is between two paragraphs containing a "free

3011 floating" comment (e.g. paragraph/start-of-file, empty line, comment,

3012 empty line, paragraph) then it is unspecified which "side" of the

3013 comment the new paragraph will appear and this may change between

3014 versions of python-debian.

3015

3016

3017 >>> original = '''

3018 ... Package: libfoo-dev

3019 ... Depends: libfoo1 (= ${binary:Version}), ${shlib:Depends}, ${misc:Depends}

3020 ... '''.lstrip()

3021 >>> deb822_file = parse_deb822_file(original.splitlines())

3022 >>> para1 = Deb822ParagraphElement.new_empty_paragraph()

3023 >>> para1["Source"] = "foo"

3024 >>> para1["Build-Depends"] = "debhelper-compat (= 13)"

3025 >>> para2 = Deb822ParagraphElement.new_empty_paragraph()

3026 >>> para2["Package"] = "libfoo1"

3027 >>> para2["Depends"] = "${shlib:Depends}, ${misc:Depends}"

3028 >>> deb822_file.insert(0, para1)

3029 >>> deb822_file.insert(1, para2)

3030 >>> expected = '''

3031 ... Source: foo

3032 ... Build-Depends: debhelper-compat (= 13)

3033 ...

3034 ... Package: libfoo1

3035 ... Depends: ${shlib:Depends}, ${misc:Depends}

3036 ...

3037 ... Package: libfoo-dev

3038 ... Depends: libfoo1 (= ${binary:Version}), ${shlib:Depends}, ${misc:Depends}

3039 ... '''.lstrip()

3040 >>> deb822_file.dump() == expected

3041 True

3042 """

3043

3044 anchor_node = None

3045 needs_newline = True

3046 self._full_size_cache = None

3047 if idx == 0:

3048 # Special-case, if idx is 0, then we insert it before everything else.

3049 # This is mostly a cosmetic choice for corner cases involving free-floating

3050 # comments in the file.

3051 if not self._token_and_elements: 3051 ↛ 3052line 3051 didn't jump to line 3052, because the condition on line 3051 was never true

3052 self.append(para)

3053 return

3054 anchor_node = self._token_and_elements.head_node

3055 needs_newline = bool(self._token_and_elements)

3056 else:

3057 i = 0

3058 for node in self._token_and_elements.iter_nodes(): 3058 ↛ 3066line 3058 didn't jump to line 3066, because the loop on line 3058 didn't complete

3059 entry = node.value

3060 if isinstance(entry, Deb822ParagraphElement):

3061 i += 1

3062 if idx == i - 1:

3063 anchor_node = node

3064 break

3065

3066 if anchor_node is None: 3066 ↛ 3068line 3066 didn't jump to line 3068, because the condition on line 3066 was never true

3067 # Empty list or idx after the last paragraph both degenerate into append

3068 self.append(para)

3069 else:

3070 if needs_newline: 3070 ↛ 3076line 3070 didn't jump to line 3076, because the condition on line 3070 was never false

3071 # Remember to inject the "separating" newline between two paragraphs

3072 nl_token = self._set_parent(Deb822WhitespaceToken("\n"))

3073 anchor_node = self._token_and_elements.insert_before(

3074 nl_token, anchor_node

3075 )

3076 self._token_and_elements.insert_before(self._set_parent(para), anchor_node)

3077

3078 def append(self, paragraph):

3079 # type: (Deb822ParagraphElement) -> None

3080 """Appends a paragraph to the file

3081

3082 >>> deb822_file = Deb822FileElement.new_empty_file()

3083 >>> para1 = Deb822ParagraphElement.new_empty_paragraph()

3084 >>> para1["Source"] = "foo"

3085 >>> para1["Build-Depends"] = "debhelper-compat (= 13)"

3086 >>> para2 = Deb822ParagraphElement.new_empty_paragraph()

3087 >>> para2["Package"] = "foo"

3088 >>> para2["Depends"] = "${shlib:Depends}, ${misc:Depends}"

3089 >>> deb822_file.append(para1)

3090 >>> deb822_file.append(para2)

3091 >>> expected = '''

3092 ... Source: foo

3093 ... Build-Depends: debhelper-compat (= 13)

3094 ...

3095 ... Package: foo

3096 ... Depends: ${shlib:Depends}, ${misc:Depends}

3097 ... '''.lstrip()

3098 >>> deb822_file.dump() == expected

3099 True

3100 """

3101 tail_element = self._token_and_elements.tail

3102 if paragraph.parent_element is not None: 3102 ↛ 3103line 3102 didn't jump to line 3103, because the condition on line 3102 was never true

3103 if paragraph.parent_element is self:

3104 raise ValueError("Paragraph is already a part of this file")

3105 raise ValueError("Paragraph is already part of another Deb822File")

3106

3107 self._full_size_cache = None

3108 # We need a separating newline if there is not a whitespace token at the end of the file.

3109 # Note the special case where the file ends on a comment; here we insert a whitespace too

3110 # to be sure. Otherwise, we would have to check that there is an empty line before that

3111 # comment and that is too much effort.

3112 if tail_element and not isinstance(tail_element, Deb822WhitespaceToken):

3113 self._token_and_elements.append(

3114 self._set_parent(Deb822WhitespaceToken("\n"))

3115 )

3116 self._token_and_elements.append(self._set_parent(paragraph))

3117

3118 def remove(self, paragraph):

3119 # type: (Deb822ParagraphElement) -> None

3120 if paragraph.parent_element is not self:

3121 raise ValueError("Paragraph is part of a different file")

3122 node = None

3123 for node in self._token_and_elements.iter_nodes():

3124 if node.value is paragraph:

3125 break

3126 if node is None:

3127 raise RuntimeError("unable to find paragraph")

3128 self._full_size_cache = None

3129 previous_node = node.previous_node

3130 next_node = node.next_node

3131 self._token_and_elements.remove_node(node)

3132 if next_node is None:

3133 if previous_node and isinstance(previous_node.value, Deb822WhitespaceToken):

3134 self._token_and_elements.remove_node(previous_node)

3135 else:

3136 if isinstance(next_node.value, Deb822WhitespaceToken):

3137 self._token_and_elements.remove_node(next_node)

3138 paragraph.parent_element = None

3139

3140 def _set_parent(self, t):

3141 # type: (TE) -> TE

3142 t.parent_element = self

3143 return t

3144

3145 def position_in_parent(self, *, skip_leading_comments: bool = True) -> Position:

3146 # Recursive base-case

3147 return START_POSITION

3148

3149 def position_in_file(self, *, skip_leading_comments: bool = True) -> Position:

3150 # By definition

3151 return START_POSITION

3152

3153 @overload

3154 def dump(

3155 self, fd # type: IO[bytes]

3156 ):

3157 # type: (...) -> None

3158 pass

3159

3160 @overload

3161 def dump(self):

3162 # type: () -> str

3163 pass

3164

3165 def dump(

3166 self, fd=None # type: Optional[IO[bytes]]

3167 ):

3168 # type: (...) -> Optional[str]

3169 if fd is None: 3169 ↛ 3171line 3169 didn't jump to line 3171, because the condition on line 3169 was never false

3170 return "".join(t.text for t in self.iter_tokens())

3171 for token in self.iter_tokens():

3172 fd.write(token.text.encode("utf-8"))

3173 return None

3174

3175

3176_combine_error_tokens_into_elements = combine_into_replacement(

3177 Deb822ErrorToken, Deb822ErrorElement

3178)

3179_combine_comment_tokens_into_elements = combine_into_replacement(

3180 Deb822CommentToken, Deb822CommentElement

3181)

3182_combine_vl_elements_into_value_elements = combine_into_replacement(

3183 Deb822ValueLineElement, Deb822ValueElement

3184)

3185_combine_kvp_elements_into_paragraphs = combine_into_replacement(

3186 Deb822KeyValuePairElement,

3187 Deb822ParagraphElement,

3188 constructor=Deb822ParagraphElement.from_kvpairs,

3189)

3190

3191

3192def _parsed_value_render_factory(discard_comments):

3193 # type: (bool) -> Callable[[Deb822ParsedValueElement], str]

3194 return (

3195 Deb822ParsedValueElement.convert_to_text_without_comments

3196 if discard_comments

3197 else Deb822ParsedValueElement.convert_to_text

3198 )

3199

3200

3201LIST_SPACE_SEPARATED_INTERPRETATION = ListInterpretation(

3202 whitespace_split_tokenizer,

3203 _parse_whitespace_list_value,

3204 Deb822ParsedValueElement,

3205 Deb822SemanticallySignificantWhiteSpace,

3206 lambda: Deb822SpaceSeparatorToken(" "),

3207 _parsed_value_render_factory,

3208)

3209LIST_COMMA_SEPARATED_INTERPRETATION = ListInterpretation(

3210 comma_split_tokenizer,

3211 _parse_comma_list_value,

3212 Deb822ParsedValueElement,

3213 Deb822CommaToken,

3214 Deb822CommaToken,

3215 _parsed_value_render_factory,

3216)

3217LIST_UPLOADERS_INTERPRETATION = ListInterpretation(

3218 comma_split_tokenizer,

3219 _parse_uploaders_list_value,

3220 Deb822ParsedValueElement,

3221 Deb822CommaToken,

3222 Deb822CommaToken,

3223 _parsed_value_render_factory,

3224)

3225

3226

3227def _non_end_of_line_token(v):

3228 # type: (TokenOrElement) -> bool

3229 # Consume tokens until the newline

3230 return not isinstance(v, Deb822WhitespaceToken) or v.text != "\n"

3231

3232

3233def _build_value_line(

3234 token_stream, # type: Iterable[Union[TokenOrElement, Deb822CommentElement]]

3235):

3236 # type: (...) -> Iterable[Union[TokenOrElement, Deb822ValueLineElement]]

3237 """Parser helper - consumes tokens part of a Deb822ValueEntryElement and turns them into one"""

3238 buffered_stream = BufferingIterator(token_stream)

3239

3240 # Deb822ValueLineElement is a bit tricky because of how we handle whitespace

3241 # and comments.

3242 #

3243 # In relation to comments, then only continuation lines can have comments.

3244 # If there is a comment before a "K: V" line, then the comment is associated

3245 # with the field rather than the value.

3246 #

3247 # On the whitespace front, then we separate syntactical mandatory whitespace

3248 # from optional whitespace. As an example:

3249 #

3250 # """

3251 # # some comment associated with the Depends field

3252 # Depends:_foo_$

3253 # # some comment associated with the line containing "bar"

3254 # !________bar_$

3255 # """

3256 #

3257 # Where "$" and "!" represents mandatory whitespace (the newline and the first

3258 # space are required for the file to be parsed correctly), where as "_" is

3259 # "optional" whitespace (from a syntactical point of view).

3260 #

3261 # This distinction enable us to facilitate APIs for easy removal/normalization

3262 # of redundant whitespaces without having programmers worry about trashing

3263 # the file.

3264 #

3265 #

3266

3267 comment_element = None

3268 continuation_line_token = None

3269 token = None # type: Optional[TokenOrElement]

3270

3271 for token in buffered_stream:

3272 start_of_value_entry = False

3273 if isinstance(token, Deb822ValueContinuationToken):

3274 continuation_line_token = token

3275 start_of_value_entry = True

3276 token = None

3277 elif isinstance(token, Deb822FieldSeparatorToken):

3278 start_of_value_entry = True

3279 elif isinstance(token, Deb822CommentElement):

3280 next_token = buffered_stream.peek()

3281 # If the next token is a continuation line token, then this comment

3282 # belong to a value and we might as well just start the value

3283 # parsing now.

3284 #

3285 # Note that we rely on this behaviour to avoid emitting the comment

3286 # token (failing to do so would cause the comment to appear twice

3287 # in the file).

3288 if isinstance(next_token, Deb822ValueContinuationToken):

3289 start_of_value_entry = True

3290 comment_element = token

3291 token = None

3292 # Use next with None to avoid raising StopIteration inside a generator

3293 # It won't happen, but pylint cannot see that, so we do this instead.

3294 continuation_line_token = cast(

3295 "Deb822ValueContinuationToken", next(buffered_stream, None)

3296 )

3297 assert continuation_line_token is not None

3298

3299 if token is not None:

3300 yield token

3301 if start_of_value_entry:

3302 tokens_in_value = list(buffered_stream.takewhile(_non_end_of_line_token))

3303 eol_token = cast("Deb822WhitespaceToken", next(buffered_stream, None))

3304 assert eol_token is None or eol_token.text == "\n"

3305 leading_whitespace = None

3306 trailing_whitespace = None

3307 # "Depends:\n foo" would cause tokens_in_value to be empty for the

3308 # first "value line" (the empty part between ":" and "\n")

3309 if tokens_in_value: 3309 ↛ 3323line 3309 didn't jump to line 3323, because the condition on line 3309 was never false

3310 # Another special-case, "Depends: \n foo" (i.e. space after colon)

3311 # should not introduce an IndexError

3312 if isinstance(tokens_in_value[-1], Deb822WhitespaceToken):

3313 trailing_whitespace = cast(

3314 "Deb822WhitespaceToken", tokens_in_value.pop()

3315 )

3316 if tokens_in_value and isinstance( 3316 ↛ 3319line 3316 didn't jump to line 3319, because the condition on line 3316 was never true

3317 tokens_in_value[-1], Deb822WhitespaceToken

3318 ):

3319 leading_whitespace = cast(

3320 "Deb822WhitespaceToken", tokens_in_value[0]

3321 )

3322 tokens_in_value = tokens_in_value[1:]

3323 yield Deb822ValueLineElement(

3324 comment_element,

3325 continuation_line_token,

3326 leading_whitespace,

3327 tokens_in_value,

3328 trailing_whitespace,

3329 eol_token,

3330 )

3331 comment_element = None

3332 continuation_line_token = None

3333

3334

3335def _build_field_with_value(

3336 token_stream, # type: Iterable[Union[TokenOrElement, Deb822ValueElement]]

3337):

3338 # type: (...) -> Iterable[Union[TokenOrElement, Deb822KeyValuePairElement]]

3339 buffered_stream = BufferingIterator(token_stream)

3340 for token_or_element in buffered_stream:

3341 start_of_field = False

3342 comment_element = None

3343 if isinstance(token_or_element, Deb822FieldNameToken):

3344 start_of_field = True

3345 elif isinstance(token_or_element, Deb822CommentElement):

3346 comment_element = token_or_element

3347 next_token = buffered_stream.peek()

3348 start_of_field = isinstance(next_token, Deb822FieldNameToken)

3349 if start_of_field: 3349 ↛ 3356line 3349 didn't jump to line 3356, because the condition on line 3349 was never false

3350 # Remember to consume the field token

3351 try:

3352 token_or_element = next(buffered_stream)

3353 except StopIteration: # pragma: no cover

3354 raise AssertionError

3355

3356 if start_of_field:

3357 field_name = token_or_element

3358 separator = next(buffered_stream, None)

3359 value_element = next(buffered_stream, None)

3360 if separator is None or value_element is None: 3360 ↛ 3363line 3360 didn't jump to line 3363, because the condition on line 3360 was never true

3361 # Early EOF - should not be possible with how the tokenizer works

3362 # right now, but now it is future-proof.

3363 if comment_element:

3364 yield comment_element

3365 error_elements = [field_name]

3366 if separator is not None:

3367 error_elements.append(separator)

3368 yield Deb822ErrorElement(error_elements)

3369 return

3370

3371 if isinstance(separator, Deb822FieldSeparatorToken) and isinstance( 3371 ↛ 3382line 3371 didn't jump to line 3382, because the condition on line 3371 was never false

3372 value_element, Deb822ValueElement

3373 ):

3374 yield Deb822KeyValuePairElement(

3375 comment_element,

3376 cast("Deb822FieldNameToken", field_name),

3377 separator,

3378 value_element,

3379 )

3380 else:

3381 # We had a parse error, consume until the newline.

3382 error_tokens = [token_or_element] # type: List[TokenOrElement]

3383 error_tokens.extend(buffered_stream.takewhile(_non_end_of_line_token))

3384 nl = buffered_stream.peek()

3385 # Take the newline as well if present

3386 if nl and isinstance(nl, Deb822NewlineAfterValueToken):

3387 next(buffered_stream, None)

3388 error_tokens.append(nl)

3389 yield Deb822ErrorElement(error_tokens)

3390 else:

3391 # Token is not part of a field, emit it as-is

3392 yield token_or_element

3393

3394

3395def _abort_on_error_tokens(sequence):

3396 # type: (Iterable[TokenOrElement]) -> Iterable[TokenOrElement]

3397 line_no = 1

3398 for token in sequence:

3399 # We are always called while the sequence consists entirely of tokens

3400 if token.is_error: 3400 ↛ 3401line 3400 didn't jump to line 3401, because the condition on line 3400 was never true

3401 error_as_text = token.convert_to_text().replace("\n", "\\n")

3402 raise SyntaxOrParseError(

3403 'Syntax or Parse error on or near line {line_no}: "{error_as_text}"'.format(

3404 error_as_text=error_as_text, line_no=line_no

3405 )

3406 )

3407 line_no += token.convert_to_text().count("\n")

3408 yield token

3409

3410

3411def parse_deb822_file(

3412 sequence, # type: Union[Iterable[Union[str, bytes]], str]

3413 *,

3414 accept_files_with_error_tokens=False, # type: bool

3415 accept_files_with_duplicated_fields=False, # type: bool

3416 encoding="utf-8", # type: str

3417):

3418 # type: (...) -> Deb822FileElement

3419 """

3420

3421 :param sequence: An iterable over lines of str or bytes (an open file for

3422 reading will do). If line endings are provided in the input, then they

3423 must be present on every line (except the last) will be preserved as-is.

3424 If omitted and the content is at least 2 lines, then parser will assume

3425 implicit newlines.

3426 :param accept_files_with_error_tokens: If True, files with critical syntax

3427 or parse errors will be returned as "successfully" parsed. Usually,

3428 working on files with this kind of errors are not desirable as it is

3429 hard to make sense of such files (and they might in fact not be a deb822

3430 file at all). When set to False (the default) a ValueError is raised if

3431 there is a critical syntax or parse error.

3432 Note that duplicated fields in a paragraph is not considered a critical

3433 parse error by this parser as the implementation can gracefully cope

3434 with these. Use accept_files_with_duplicated_fields to determine if

3435 such files should be accepted.

3436 :param accept_files_with_duplicated_fields: If True, then

3437 files containing paragraphs with duplicated fields will be returned as

3438 "successfully" parsed even though they are invalid according to the

3439 specification. The paragraphs will prefer the first appearance of the

3440 field unless caller explicitly requests otherwise (e.g., via

3441 Deb822ParagraphElement.configured_view). If False, then this method

3442 will raise a ValueError if any duplicated fields are seen inside any

3443 paragraph.

3444 :param encoding: The encoding to use (this is here to support Deb822-like

3445 APIs, new code should not use this parameter).

3446 """

3447

3448 if isinstance(sequence, (str, bytes)): 3448 ↛ 3450line 3448 didn't jump to line 3450, because the condition on line 3448 was never true

3449 # Match the deb822 API.

3450 sequence = sequence.splitlines(True)

3451

3452 # The order of operations are important here. As an example,

3453 # _build_value_line assumes that all comment tokens have been merged

3454 # into comment elements. Likewise, _build_field_and_value assumes

3455 # that value tokens (along with their comments) have been combined

3456 # into elements.

3457 tokens = tokenize_deb822_file(

3458 sequence, encoding=encoding

3459 ) # type: Iterable[TokenOrElement]

3460 if not accept_files_with_error_tokens:

3461 tokens = _abort_on_error_tokens(tokens)

3462 tokens = _combine_comment_tokens_into_elements(tokens)

3463 tokens = _build_value_line(tokens)

3464 tokens = _combine_vl_elements_into_value_elements(tokens)

3465 tokens = _build_field_with_value(tokens)

3466 tokens = _combine_kvp_elements_into_paragraphs(tokens)

3467 # Combine any free-floating error tokens into error elements. We do

3468 # this last as it enables other parts of the parser to include error

3469 # tokens in their error elements if they discover something is wrong.

3470 tokens = _combine_error_tokens_into_elements(tokens)

3471

3472 deb822_file = Deb822FileElement(LinkedList(tokens))

3473

3474 if not accept_files_with_duplicated_fields:

3475 for no, paragraph in enumerate(deb822_file):

3476 if isinstance(paragraph, Deb822DuplicateFieldsParagraphElement): 3476 ↛ 3477line 3476 didn't jump to line 3477, because the condition on line 3476 was never true

3477 field_names = set()

3478 dup_field = None

3479 for field in paragraph.keys():

3480 field_name, _, _ = _unpack_key(field)

3481 # assert for mypy

3482 assert isinstance(field_name, str)

3483 if field_name in field_names:

3484 dup_field = field_name

3485 break

3486 field_names.add(field_name)

3487 if dup_field is not None:

3488 msg = 'Duplicate field "{dup_field}" in paragraph number {no}'

3489 raise ValueError(msg.format(dup_field=dup_field, no=no))

3490

3491 return deb822_file

3492

3493

3494if __name__ == "__main__": # pragma: no cover

3495 import doctest

3496

3497 doctest.testmod()

Coverage for src/debputy/lsp/vendoring/_deb822_repro/parsing.py: 59%

1464 statements