diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 00:30:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 00:30:09 +0000 |
commit | 4dd6e896edac1096089a9cd54296266436ffec03 (patch) | |
tree | a3da8663c704704177ad944ae5a680c302f29f55 | |
parent | Initial commit. (diff) | |
download | ruamel.yaml-4dd6e896edac1096089a9cd54296266436ffec03.tar.xz ruamel.yaml-4dd6e896edac1096089a9cd54296266436ffec03.zip |
Adding upstream version 0.18.5.upstream/0.18.5
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r-- | CHANGES | 1217 | ||||
-rw-r--r-- | LICENSE | 21 | ||||
-rw-r--r-- | MANIFEST.in | 3 | ||||
-rw-r--r-- | PKG-INFO | 430 | ||||
-rw-r--r-- | README.md | 396 | ||||
-rw-r--r-- | __init__.py | 56 | ||||
-rw-r--r-- | anchor.py | 18 | ||||
-rw-r--r-- | comments.py | 1175 | ||||
-rw-r--r-- | compat.py | 235 | ||||
-rw-r--r-- | composer.py | 228 | ||||
-rw-r--r-- | configobjwalker.py | 15 | ||||
-rw-r--r-- | constructor.py | 1723 | ||||
-rw-r--r-- | cyaml.py | 195 | ||||
-rw-r--r-- | docinfo.py | 67 | ||||
-rw-r--r-- | dumper.py | 218 | ||||
-rw-r--r-- | emitter.py | 1779 | ||||
-rw-r--r-- | error.py | 297 | ||||
-rw-r--r-- | events.py | 264 | ||||
-rw-r--r-- | loader.py | 90 | ||||
-rw-r--r-- | main.py | 1514 | ||||
-rw-r--r-- | nodes.py | 145 | ||||
-rw-r--r-- | parser.py | 860 | ||||
-rw-r--r-- | py.typed | 0 | ||||
-rw-r--r-- | pyproject.toml | 4 | ||||
-rw-r--r-- | reader.py | 275 | ||||
-rw-r--r-- | representer.py | 1127 | ||||
-rw-r--r-- | resolver.py | 390 | ||||
-rw-r--r-- | ruamel.yaml.egg-info/PKG-INFO | 430 | ||||
-rw-r--r-- | ruamel.yaml.egg-info/SOURCES.txt | 44 | ||||
-rw-r--r-- | ruamel.yaml.egg-info/dependency_links.txt | 1 | ||||
-rw-r--r-- | ruamel.yaml.egg-info/not-zip-safe | 1 | ||||
-rw-r--r-- | ruamel.yaml.egg-info/requires.txt | 10 | ||||
-rw-r--r-- | ruamel.yaml.egg-info/top_level.txt | 1 | ||||
-rw-r--r-- | scalarbool.py | 42 | ||||
-rw-r--r-- | scalarfloat.py | 103 | ||||
-rw-r--r-- | scalarint.py | 122 | ||||
-rw-r--r-- | scalarstring.py | 140 | ||||
-rw-r--r-- | scanner.py | 2363 | ||||
-rw-r--r-- | serializer.py | 231 | ||||
-rw-r--r-- | setup.cfg | 4 | ||||
-rw-r--r-- | setup.py | 941 | ||||
-rw-r--r-- | tag.py | 124 | ||||
-rw-r--r-- | timestamp.py | 58 | ||||
-rw-r--r-- | tokens.py | 379 | ||||
-rw-r--r-- | util.py | 257 |
45 files changed, 17993 insertions, 0 deletions
@@ -0,0 +1,1217 @@ +[0.18.5, 2023-11-03]: +- there is some indication that dependent packages have been pinned to use specific + (tested) and just install the latest even in Python versions that have end-of-life + +[0.18.4, 2023-11-01]: +- YAML() instance has a `doc_infos` attribute which is a cumulative list of DocInfo + instances (one for `load()`, one per document for `load_all()`). DocInfo instances + contain version information (requested, directive) and tag directive information +- fix issue that the YAML instance tags attribute was not reset between documents, + resulting in mixing of tag directives of multiple documents. Now only provides tag + directive information on latest document after loading. This means tags for dumping + must be set **again** after a document is loaded with the same instance. (because + of this tags will be removed in a favour of a different mechanism in the future) +- fix issue with multiple document intermixing YAML 1.2 and YAML 1.1, the VersionedResolver + now resets +- fix issue with disappearing comment when next token was Tag (still can't have both + a comment before a tag and after a tag, before node) + +[0.18.3, 2023-10-29]: +- fix issue with spurious newline on first item after comment + nested block sequence +- additional links in the metadata on PyPI (Reported, with pointers how to fix, by + [Sorin](https://sourceforge.net/u/ssbarnea/profile/)). + +[0.18.2, 2023-10-24]: +- calling the deprecated functions now raises an `AttributeError` with the, somewhat + more informative, orginal warning message. Instead of calling `sys.exit(1)` + +[0.18.1, 2023-10-24]: +- calling the deprecated functions now always displays the warning message. (reported + by [Trend Lloyd](https://sourceforge.net/u/lathiat2/profile/)) + +[0.18.0, 2023-10-23]: +- the **functions** `scan`, `parse`, `compose`, `load`, `emit`, `serialize`, `dump` + and their variants (`_all`, `safe_`, `round_trip_`, etc) have been deprecated (the + same named **methods** on `YAML()` instances are, of course, still there. +- |- + `YAML(typ='unsafe')` now issues a `PendingDeprecationWarning`. This will become deprecated in the 0.18 series + (probably before the end of 2023). + You can use `YAML(typ='full')` to dump unregistered Python classes/functions. + For loading you'll have to register your classes/functions + if you want the old, unsafe, functionality. You can still load any tag, like `!!python/name:posix.system', **safely** + with the (default) round-trip parser. +- fix for `bytes-like object is required not 'str' while dumping binary streams`. + This was reported, analysed and a fix provided by [Vit Zikmund](https://sourceforge.net/u/tlwhitec/profile/) + +[0.17.40, 2023-10-20]: +- flow style sets are now preserved ( `!!set {a, b, c} )`. Any values specified when + loading are dropped, including `!!null ""`. +- |- + potential workaround for issue 484: the long_description_content_type including the variant specification `CommonMark` + can result in problems on Azure. If you can install from `.tar.gz` using + `RUAMEL_NO_LONG_DESCRIPTION=1 pip install ruamel.yaml --no-binary :all:` then the long description, and its + offending type, are nog included (in the METADATA). + (Reported by [Coury Ditch](https://sourceforge.net/u/cmditch/profile/)) +- links in documentation update (reported by [David Hoese](https://sourceforge.net/u/daveydave400/profile/)) +- Added some `__repr__` for internally used classes + +[0.17.39, 2023-10-19]: +- update README generation, no code changes + +[0.17.36, 2023-10-19]: +- fixed issue 480, dumping of a loaded empty flow-style mapping with comment failed + (Reported by [Stéphane Brunner](https://sourceforge.net/u/stbrunner/profile/)) +- fixed issue 482, caused by DEFAULT_MAPPING_TAG having changes to being a `Tag()` + instance, not a string (reported by [yan12125](https://sourceforge.net/u/yan12125/profile/)) +- updated documentation to use mkdocs + +[0.17.35, 2023-10-04]: +- support for loading dataclasses with `InitVar` variables (some special coding was + necessary to get the, unexecpected, default value in the corresponding instance + attribute ( example of usage in [this question](https://stackoverflow.com/q/77228378/1307905)) + +[0.17.34, 2023-10-03]: +- Python 3.12 also loads C version when using `typ='safe'` +- |- + initial support for loading invoking + `__post_init__()` on dataclasses that have that + method after loading a registered dataclass. + (Originally + [asked](https://stackoverflow.com/q/51529458/1307905) on + Stackoverflow by + [nyanpasu64](https://stackoverflow.com/users/2683842/nyanpasu64) + and as + [ticket](https://sourceforge.net/p/ruamel-yaml/tickets/355/) by + [Patrick Lehmann](https://sourceforge.net/u/paebbels/profile/)) + + ``` + @yaml.register_class + @dataclass + class ... + ``` + + +[0.17.33, 2023-09-28]: +- added `flow_seq_start`, `flow_seq_end`, `flow_seq_separator`, `flow_map_start`, + `flow_map_end`, `flow_map_separator` **class** attributes to the `Emitter` class + so flow style output can more easily be influenced (based on [this answer](https://stackoverflow.com/a/76547814/1307905) + on a StackOverflow question by [Huw Walters](https://stackoverflow.com/users/291033/huw-walters)). + +[0.17.32, 2023-06-17]: +- fix issue with scanner getting stuck in infinite loop + +[0.17.31, 2023-05-31]: +- added tag.setter on `ScalarEvent` and on `Node`, that takes either a `Tag` instance, + or a str (reported by [Sorin Sbarnea](https://sourceforge.net/u/ssbarnea/profile/)) + +[0.17.30, 2023-05-30]: +- fix issue 467, caused by Tag instances not being hashable (reported by [Douglas + Raillard](https://bitbucket.org/%7Bcf052d92-a278-4339-9aa8-de41923bb556%7D/)) + +[0.17.29, 2023-05-30]: +- changed the internals of the tag property from a string to a class which allows + for preservation of the original handle and suffix. This should result in better + results using documents with %TAG directives, as well as preserving URI escapes + in tag suffixes. + +[0.17.28, 2023-05-26]: +- |- + fix for issue 464: documents ending with document end marker + without final newline fail to load (reported by [Mariusz + Rusiniak](https://sourceforge.net/u/r2dan/profile/)) + +[0.17.27, 2023-05-25]: +- fix issue with inline mappings as value for merge keys (reported by Sirish on [StackOverflow](https://stackoverflow.com/q/76331049/1307905)) +- fix for 468, error inserting after accessing merge attribute on `CommentedMap` (reported + by [Bastien gerard](https://sourceforge.net/u/bagerard/)) +- fix for issue 461 pop + insert on same `CommentedMap` key throwing error (reported + by [John Thorvald Wodder II](https://sourceforge.net/u/jwodder/profile/)) + +[0.17.26, 2023-05-09]: +- fix for error on edge cage for issue 459 + +[0.17.25, 2023-05-09]: +- fix for regression while dumping wrapped strings with too many backslashes removed + (issue 459, reported by [Lele Gaifax](https://sourceforge.net/u/lele/profile/)) + +[0.17.24, 2023-05-06]: +- rewrite of `CommentedMap.insert()`. If you have a merge key in the YAML document + for the mapping you insert to, the position value should be the one as you look + at the YAML input. This fixes issue 453 where other keys of a merged in mapping + would show up after an insert (reported by [Alex Miller](https://sourceforge.net/u/millerdevel/profile/)). + It also fixes a call to `.insert()` resulting into the merge key to move to be the + first key if it wasn't already and it is also now possible to insert a key before + a merge key (even if the fist key in the mapping). +- fix (in the pure Python implementation including default) for issue 447. (reported + by [Jack Cherng](https://sourceforge.net/u/jfcherng/profile/), also brought up by + brent on [StackOverflow](https://stackoverflow.com/q/40072485/1307905)) + +[0.17.23, 2023-05-05]: +- fix 458, error on plain scalars starting with word longer than width. (reported + by [Kyle Larose](https://sourceforge.net/u/klarose/profile/)) +- fix for `.update()` no longer correctly handling keyword arguments (reported by + John Lin on [StackOverflow]( https://stackoverflow.com/q/76089100/1307905)) +- |- + fix issue 454: high Unicode (emojis) in quoted strings always + escaped (reported by [Michal + Čihař](https://sourceforge.net/u/nijel/profile/) based on a + question on StackOverflow). +- fix issue with emitter conservatively inserting extra backslashes in wrapped quoted + strings (reported by thebenman on [StackOverflow](https://stackoverflow.com/q/75631454/1307905)) + +[0.17.22, 2023-05-02]: +- fix issue 449 where the second exclamation marks got URL encoded (reported and fixing + PR provided by [John Stark](https://sourceforge.net/u/jods/profile/)) +- fix issue with indent != 2 and literal scalars with empty first line (reported by + wrdis on [StackOverflow](https://stackoverflow.com/q/75584262/1307905)) +- updated `__repr__` of CommentedMap, now that Python's dict is ordered -> no more + `ordereddict(list-of-tuples)` +- merge MR 4, handling OctalInt in YAML 1.1 (provided by [Jacob Floyd](https://sourceforge.net/u/cognifloyd/profile/)) +- fix loading of `!!float 42` (reported by Eric on [Stack overflow](https://stackoverflow.com/a/71555107/1307905)) +- line numbers are now set on `CommentedKeySeq` and `CommentedKeyMap` (which are created + if you have a sequence resp. mapping as the key in a mapping) +- |- + plain scalars: put single words longer than width on a line of + their own, instead of after the previous line (issue 427, reported + by [Antoine + Cotten](https://sourceforge.net/u/antoineco/profile/)). Caveat: + this currently results in a space ending the previous line. +- |- + fix for folded scalar part of 421: comments after ">" on first + line of folded scalars are now preserved (as were those in the + same position on literal scalars). Issue reported by Jacob Floyd. +- added stacklevel to warnings +- typing changed from Py2 compatible comments to Py3, removed various Py2-isms + +[0.17.21, 2022-02-12]: +- fix bug in calling `.compose()` method with `pathlib.Path` instance. + +[0.17.20, 2022-01-03]: +- fix error in microseconds while rounding datetime fractions >= 9999995 (reported + by [Luis Ferreira](https://sourceforge.net/u/ljmf00/)) + +[0.17.19, 2021-12-26]: +- fix mypy problems (reported by [Arun](https://sourceforge.net/u/arunppsg/profile/)) + +[0.17.18, 2021-12-24]: +- copy-paste error in folded scalar comment attachment (reported by [Stephan Geulette](https://sourceforge.net/u/sgeulette/profile/)) +- fix 411, indent error comment between key empty seq value (reported by [Guillermo + Julián](https://sourceforge.net/u/gjulianm/profile/)) + +[0.17.17, 2021-10-31]: +- extract timestamp matching/creation to util + +[0.17.16, 2021-08-28]: +- 398 also handle issue 397 when comment is newline + +[0.17.15, 2021-08-28]: +- fix issue 397, insert comment before key when a comment between key and value exists + (reported by [Bastien gerard](https://sourceforge.net/u/bagerard/)) + +[0.17.14, 2021-08-25]: +- fix issue 396, inserting key/val in merged-in dictionary (reported by [Bastien gerard](https://sourceforge.net/u/bagerard/)) + +[0.17.13, 2021-08-21]: +- minor fix in attr handling + +[0.17.12, 2021-08-21]: +- fix issue with anchor on registered class not preserved and those classes using + package attrs with `@attr.s()` (both reported by [ssph](https://sourceforge.net/u/sph/)) + +[0.17.11, 2021-08-19]: +- fix error baseclass for `DuplicateKeyError` (reported by [Łukasz Rogalski](https://sourceforge.net/u/lrogalski/)) +- fix typo in reader error message, causing `KeyError` during reader error (reported + by [MTU](https://sourceforge.net/u/mtu/)) + +[0.17.10, 2021-06-24]: +- fix issue 388, token with old comment structure != two elements (reported by [Dimitrios + Bariamis](https://sourceforge.net/u/dbdbc/)) + +[0.17.9, 2021-06-10]: +- fix issue with updating CommentedMap (reported by sri on [StackOverflow](https://stackoverflow.com/q/67911659/1307905)) + +[0.17.8, 2021-06-09]: +- fix for issue 387 where templated anchors on tagged object did get set resulting + in potential id reuse. (reported by [Artem Ploujnikov](https://sourceforge.net/u/flexthink/)) + +[0.17.7, 2021-05-31]: +- issue 385 also affected other deprecated loaders (reported via email by Oren Watson) + +[0.17.6, 2021-05-31]: +- merged type annotations update provided by [Jochen Sprickerhof](https://sourceforge.net/u/jspricke/) +- |- + fix for issue 385: deprecated round_trip_loader function not + working (reported by [Mike + Gouline](https://sourceforge.net/u/gouline/)) +- wasted a few hours getting rid of mypy warnings/errors + +[0.17.5, 2021-05-30]: +- fix for issue 384 `!!set` with aliased entry resulting in broken YAML on rt reported + by [William Kimball](https://sourceforge.net/u/william303/)) + +[0.17.4, 2021-04-07]: +- prevent (empty) comments from throwing assertion error (issue 351 reported by [William + Kimball](https://sourceforge.net/u/william303/)) comments (or empty line) will be + dropped + +[0.17.3, 2021-04-07]: +- fix for issue 382 caused by an error in a format string (reported by [William Kimball](https://sourceforge.net/u/william303/)) +- |- + allow expansion of aliases by setting `yaml.composer.return_alias = lambda s: copy.deepcopy(s)` + (as per [Stackoverflow answer](https://stackoverflow.com/a/66983530/1307905)) + +[0.17.2, 2021-03-29]: +- change -py2.py3-none-any.whl to -py3-none-any.whl, and remove 0.17.1 + +[0.17.1, 2021-03-29]: +- |- + added 'Programming Language :: Python :: 3 :: Only', and + removing 0.17.0 from PyPI (reported by [Alasdair + Nicol](https://sourceforge.net/u/alasdairnicol/)) + +[0.17.0, 2021-03-26]: +- removed because of incomplete classifiers +- this release no longer supports Python 2.7, most if not all Python 2 specific code + is removed. The 0.17.x series is the last to support Python 3.5 (this also allowed + for removal of the dependency on `ruamel.std.pathlib`) +- remove Python2 specific code branches and adaptations (u-strings) +- prepare % code for f-strings using `_F` +- allow PyOxidisation ([issue 324](https://sourceforge.net/p/ruamel-yaml/tickets/324/) + resp. [issue 171](https://github.com/indygreg/PyOxidizer/issues/171)) +- replaced Python 2 compatible enforcement of keyword arguments with '*' +- the old top level *functions* `load`, `safe_load`, `round_trip_load`, `dump`, `safe_dump`, + `round_trip_dump`, `scan`, `parse`, `compose`, `emit`, `serialize` as well as their + `_all` variants for multi-document streams, now issue a `PendingDeprecationning` + (e.g. when run from pytest, but also Python is started with `-Wd`). Use the methods + on `YAML()`, which have been extended. +- |- + fix for issue 376: indentation changes could put literal/folded + scalar to start before the `#` column of a following comment. + Effectively making the comment part of the scalar in the output. + (reported by [Bence Nagy](https://sourceforge.net/u/underyx/)) + +[0.16.13, 2021-03-05]: +- |- + fix for issue 359: could not update() CommentedMap with keyword + arguments (reported by [Steve + Franchak](https://sourceforge.net/u/binaryadder/)) +- |- + fix for issue 365: unable to dump mutated TimeStamp objects + (reported by [Anton Akmerov](https://sourceforge.net/u/akhmerov)) +- |- + fix for issue 371: unable to add comment without starting space + (reported by [Mark Grandi](https://sourceforge.net/u/mgrandi)) +- |- + fix for issue 373: recursive call to walk_tree not preserving + all params (reported by [eulores](https://sourceforge.net/u/eulores/)) +- a None value in a flow-style sequence is now dumped as `null` instead of `!!null + ''` (reported by mcarans on [StackOverflow](https://stackoverflow.com/a/66489600/1307905)) + +[0.16.12, 2020-09-04]: +- update links in doc + +[0.16.11, 2020-09-03]: +- workaround issue with setuptools 0.50 and importing pip (fix by [jaraco](https://github.com/pypa/setuptools/issues/2355#issuecomment-685159580) + +[0.16.10, 2020-02-12]: +- (auto) updated image references in README to sourceforge + +[0.16.9, 2020-02-11]: +- update CHANGES + +[0.16.8, 2020-02-11]: +- update requirements so that ruamel.yaml.clib is installed for 3.8, as it has become + available (via manylinux builds) + +[0.16.7, 2020-01-30]: +- fix typchecking issue on TaggedScalar (reported by Jens Nielsen) +- fix error in dumping literal scalar in sequence with comments before element (reported + by [EJ Etherington](https://sourceforge.net/u/ejether/)) + +[0.16.6, 2020-01-20]: +- fix empty string mapping key roundtripping with preservation of quotes as `? ''` + (reported via email by Tomer Aharoni). +- fix incorrect state setting in class constructor (reported by [Douglas Raillard](https://bitbucket.org/%7Bcf052d92-a278-4339-9aa8-de41923bb556%7D/)) +- adjust deprecation warning test for Hashable, as that no longer warns (reported + by [Jason Montleon](https://bitbucket.org/%7B8f377d12-8d5b-4069-a662-00a2674fee4e%7D/)) + +[0.16.5, 2019-08-18]: +- allow for `YAML(typ=['unsafe', 'pytypes'])` + +[0.16.4, 2019-08-16]: +- fix output of TAG directives with `#` (reported by [Thomas Smith](https://bitbucket.org/%7Bd4c57a72-f041-4843-8217-b4d48b6ece2f%7D/)) + +[0.16.3, 2019-08-15]: +- split construct_object +- change stuff back to keep mypy happy +- move setting of version based on YAML directive to scanner, allowing to check for + file version during TAG directive scanning + +[0.16.2, 2019-08-15]: +- preserve YAML and TAG directives on roundtrip, correctly output `#` in URL for YAML + 1.2 (both reported by [Thomas Smith](https://bitbucket.org/%7Bd4c57a72-f041-4843-8217-b4d48b6ece2f%7D/)) + +[0.16.1, 2019-08-08]: +- Force the use of new version of ruamel.yaml.clib (reported by [Alex Joz](https://bitbucket.org/%7B9af55900-2534-4212-976c-61339b6ffe14%7D/)) +- Allow `#` in tag URI as these are allowed in YAML 1.2 (reported by [Thomas Smith](https://bitbucket.org/%7Bd4c57a72-f041-4843-8217-b4d48b6ece2f%7D/)) + +[0.16.0, 2019-07-25]: +- split of C source that generates `.so` file to [ruamel.yaml.clib]( https://pypi.org/project/ruamel.yaml.clib/) +- duplicate keys are now an error when working with the old API as well + +[0.15.100, 2019-07-17]: +- fixing issue with dumping deep-copied data from commented YAML, by providing both + the memo parameter to __deepcopy__, and by allowing startmarks to be compared on + their content (reported by `Theofilos Petsios <https://bitbucket.org/%7Be550bc5d-403d-4fda-820b-bebbe71796d3%7D/>`__) + +[0.15.99, 2019-07-12]: +- add `py.typed` to distribution, based on a PR submitted by `Michael Crusoe <https://bitbucket.org/%7Bc9fbde69-e746-48f5-900d-34992b7860c8%7D/>`__ +- merge PR 40 (also by Michael Crusoe) to more accurately specify repository in the + README (also reported in a misunderstood issue some time ago) + +[0.15.98, 2019-07-09]: +- regenerate ext/_ruamel_yaml.c with Cython version 0.29.12, needed for Python 3.8.0b2 + (reported by `John Vandenberg <https://bitbucket.org/%7B6d4e8487-3c97-4dab-a060-088ec50c682c%7D/>`__) + +[0.15.97, 2019-06-06]: +- regenerate ext/_ruamel_yaml.c with Cython version 0.29.10, needed for Python 3.8.0b1 +- regenerate ext/_ruamel_yaml.c with Cython version 0.29.9, needed for Python 3.8.0a4 + (reported by `Anthony Sottile <https://bitbucket.org/%7B569cc8ea-0d9e-41cb-94a4-19ea517324df%7D/>`__) + +[0.15.96, 2019-05-16]: +- fix failure to indent comments on round-trip anchored block style scalars in block + sequence (reported by `William Kimball <https://bitbucket.org/%7Bba35ed20-4bb0-46f8-bb5d-c29871e86a22%7D/>`__) + +[0.15.95, 2019-05-16]: +- fix failure to round-trip anchored scalars in block sequence (reported by `William + Kimball <https://bitbucket.org/%7Bba35ed20-4bb0-46f8-bb5d-c29871e86a22%7D/>`__) +- wheel files for Python 3.4 no longer provided (`Python 3.4 EOL 2019-03-18 <https://www.python.org/dev/peps/pep-0429/>`__) + +[0.15.94, 2019-04-23]: +- fix missing line-break after end-of-file comments not ending in line-break (reported + by `Philip Thompson <https://bitbucket.org/%7Be42ba205-0876-4151-bcbe-ccaea5bd13ce%7D/>`__) + +[0.15.93, 2019-04-21]: +- fix failure to parse empty implicit flow mapping key +- in YAML 1.1 plains scalars `y`, 'n', `Y`, and 'N' are now correctly recognised as + booleans and such strings dumped quoted (reported by `Marcel Bollmann <https://bitbucket.org/%7Bd8850921-9145-4ad0-ac30-64c3bd9b036d%7D/>`__) + +[0.15.92, 2019-04-16]: +- fix failure to parse empty implicit block mapping key (reported by `Nolan W <https://bitbucket.org/i2labs/>`__) + +[0.15.91, 2019-04-05]: +- allowing duplicate keys would not work for merge keys (reported by mamacdon on `StackOverflow + <https://stackoverflow.com/questions/55540686/>`__ + +[0.15.90, 2019-04-04]: +- fix issue with updating `CommentedMap` from list of tuples (reported by `Peter Henry + <https://bitbucket.org/mosbasik/>`__) + +[0.15.89, 2019-02-27]: +- fix for items with flow-mapping in block sequence output on single line (reported + by `Zahari Dim <https://bitbucket.org/zahari_dim/>`__) +- fix for safe dumping erroring in creation of representereror when dumping namedtuple + (reported and solution by `Jaakko Kantojärvi <https://bitbucket.org/raphendyr/>`__) + +[0.15.88, 2019-02-12]: +- fix inclusing of python code from the subpackage data (containing extra tests, reported + by `Florian Apolloner <https://bitbucket.org/apollo13/>`__) + +[0.15.87, 2019-01-22]: +- fix problem with empty lists and the code to reinsert merge keys (reported via email + by Zaloo) + +[0.15.86, 2019-01-16]: +- reinsert merge key in its old position (reported by grumbler on <Stackoverflow <https://stackoverflow.com/a/54206512/1307905>`__) +- fix for issue with non-ASCII anchor names (reported and fix provided by Dandaleon + Flux via email) +- fix for issue when parsing flow mapping value starting with colon (in pure Python + only) (reported by `FichteFoll <https://bitbucket.org/FichteFoll/>`__) + +[0.15.85, 2019-01-08]: +- the types used by `SafeConstructor` for mappings and sequences can now by set by + assigning to `XXXConstructor.yaml_base_dict_type` (and `..._list_type`), preventing + the need to copy two methods with 50+ lines that had `var = {}` hardcoded. (Implemented + to help solve an feature request by `Anthony Sottile <https://bitbucket.org/asottile/>`__ + in an easier way) + +[0.15.84, 2019-01-07]: +- fix for `CommentedMap.copy()` not returning `CommentedMap`, let alone copying comments + etc. (reported by `Anthony Sottile <https://bitbucket.org/asottile/>`__) + +[0.15.83, 2019-01-02]: +- fix for bug in roundtripping aliases used as key (reported via email by Zaloo) + +[0.15.82, 2018-12-28]: +- anchors and aliases on scalar int, float, string and bool are now preserved. Anchors + do not need a referring alias for these (reported by `Alex Harvey <https://bitbucket.org/alexharv074/>`__) +- anchors no longer lost on tagged objects when roundtripping (reported by `Zaloo + <https://bitbucket.org/zaloo/>`__) + +[0.15.81, 2018-12-06]: +- fix issue saving methods of metaclass derived classes (reported and fix provided + by `Douglas Raillard <https://bitbucket.org/DouglasRaillard/>`__) + +[0.15.80, 2018-11-26]: +- fix issue emitting BEL character when round-tripping invalid folded input (reported + by Isaac on `StackOverflow <https://stackoverflow.com/a/53471217/1307905>`__) + +[0.15.79, 2018-11-21]: +- fix issue with anchors nested deeper than alias (reported by gaFF on `StackOverflow + <https://stackoverflow.com/a/53397781/1307905>`__) + +[0.15.78, 2018-11-15]: +- fix setup issue for 3.8 (reported by `Sidney Kuyateh <https://bitbucket.org/autinerd/>`__) + +[0.15.77, 2018-11-09]: +- setting `yaml.sort_base_mapping_type_on_output = False`, will prevent explicit sorting + by keys in the base representer of mappings. Roundtrip already did not do this. + Usage only makes real sense for Python 3.6+ (feature request by `Sebastian Gerber + <https://bitbucket.org/spacemanspiff2007/>`__). +- implement Python version check in YAML metadata in `_test/test_z_data.py` + +[0.15.76, 2018-11-01]: +- fix issue with empty mapping and sequence loaded as flow-style (mapping reported + by `Min RK <https://bitbucket.org/minrk/>`__, sequence by `Maged Ahmed <https://bitbucket.org/maged2/>`__) + +[0.15.75, 2018-10-27]: +- fix issue with single '?' scalar (reported by `Terrance <https://bitbucket.org/OllieTerrance/>`__) +- fix issue with duplicate merge keys (prompted by `answering <https://stackoverflow.com/a/52852106/1307905>`__ + a `StackOverflow question <https://stackoverflow.com/q/52851168/1307905>`__ by `math + <https://stackoverflow.com/users/1355634/math>`__) + +[0.15.74, 2018-10-17]: +- fix dropping of comment on rt before sequence item that is sequence item (reported + by `Thorsten Kampe <https://bitbucket.org/thorstenkampe/>`__) + +[0.15.73, 2018-10-16]: +- fix irregular output on pre-comment in sequence within sequence (reported by `Thorsten + Kampe <https://bitbucket.org/thorstenkampe/>`__) +- allow non-compact (i.e. next line) dumping sequence/mapping within sequence. + +[0.15.72, 2018-10-06]: +- fix regression on explicit 1.1 loading with the C based scanner/parser (reported + by `Tomas Vavra <https://bitbucket.org/xtomik/>`__) + +[0.15.71, 2018-09-26]: +- fix regression where handcrafted CommentedMaps could not be initiated (reported + by `Dan Helfman <https://bitbucket.org/dhelfman/>`__) +- fix regression with non-root literal scalars that needed indent indicator (reported + by `Clark Breyman <https://bitbucket.org/clarkbreyman/>`__) +- tag:yaml.org,2002:python/object/apply now also uses __qualname__ on PY3 (reported + by `Douglas RAILLARD <https://bitbucket.org/DouglasRaillard/>`__) + +[0.15.70, 2018-09-21]: +- reverted CommentedMap and CommentedSeq to subclass ordereddict resp. list, reimplemented + merge maps so that both `dict(**commented_map_instance)` and JSON dumping works. + This also allows checking with `isinstance()` on `dict` resp. `list`. (Proposed + by `Stuart Berg <https://bitbucket.org/stuarteberg/>`__, with feedback from `blhsing + <https://stackoverflow.com/users/6890912/blhsing>`__ on `StackOverflow <https://stackoverflow.com/q/52314186/1307905>`__) + +[0.15.69, 2018-09-20]: +- fix issue with dump_all gobbling end-of-document comments on parsing (reported by + `Pierre B. <https://bitbucket.org/octplane/>`__) + +[0.15.68, 2018-09-20]: +- fix issue with parsabel, but incorrect output with nested flow-style sequences (reported + by `Dougal Seeley <https://bitbucket.org/dseeley/>`__) +- fix issue with loading Python objects that have __setstate__ and recursion in parameters + (reported by `Douglas RAILLARD <https://bitbucket.org/DouglasRaillard/>`__) + +[0.15.67, 2018-09-19]: +- fix issue with extra space inserted with non-root literal strings (Issue reported + and PR with fix provided by `Naomi Seyfer <https://bitbucket.org/sixolet/>`__.) + +[0.15.66, 2018-09-07]: +- fix issue with fold indicating characters inserted in safe_load-ed folded strings + (reported by `Maximilian Hils <https://bitbucket.org/mhils/>`__). + +[0.15.65, 2018-09-07]: +- |- + fix issue #232 revert to throw ParserError for unexcpected `]` + and `}` instead of IndexError. (Issue reported and PR with fix + provided by `Naomi Seyfer <https://bitbucket.org/sixolet/>`__.) +- added `key` and `reverse` parameter (suggested by Jannik Klemm via email) +- indent root level literal scalars that have directive or document end markers at + the beginning of a line + +[0.15.64, 2018-08-30]: +- |- + support round-trip of tagged sequences: `!Arg [a, {b: 1}]` +- |- + single entry mappings in flow sequences now written by default without quotes + set `yaml.brace_single_entry_mapping_in_flow_sequence=True` to force + getting `[a, {b: 1}, {c: {d: 2}}]` instead of the default `[a, b: 1, c: {d: 2}]` +- fix issue when roundtripping floats starting with a dot such as `.5` (reported by + `Harrison Gregg <https://bitbucket.org/HarrisonGregg/>`__) + +[0.15.63, 2018-08-29]: +- small fix only necessary for Windows users that don't use wheels. + +[0.15.62, 2018-08-29]: +- C based reader/scanner & emitter now allow setting of 1.2 as YAML version. ** The + loading/dumping is still YAML 1.1 code**, so use the common subset of YAML 1.2 and + 1.1 (reported by `Ge Yang <https://bitbucket.org/yangge/>`__) + +[0.15.61, 2018-08-23]: +- support for round-tripping folded style scalars (initially requested by `Johnathan + Viduchinsky <https://bitbucket.org/johnathanvidu/>`__) +- update of C code +- speed up of scanning (~30% depending on the input) + +[0.15.60, 2018-08-18]: +- cleanup for mypy +- spurious print in library (reported by `Lele Gaifax <https://bitbucket.org/lele/>`__), + now automatically checked + +[0.15.59, 2018-08-17]: +- issue with C based loader and leading zeros (reported by `Tom Hamilton Stubber <https://bitbucket.org/TomHamiltonStubber/>`__) + +[0.15.58, 2018-08-17]: +- |- + simple mappings can now be used as keys when round-tripping:: + + {a: 1, b: 2}: hello world + + although using the obvious operations (del, popitem) on the key will + fail, you can mutilate it by going through its attributes. If you load the + above YAML in `d`, then changing the value is cumbersome: + + d = {CommentedKeyMap([('a', 1), ('b', 2)]): "goodbye"} + + and changing the key even more so: + + d[CommentedKeyMap([('b', 1), ('a', 2)])] = d.pop( + CommentedKeyMap([('a', 1), ('b', 2)])) + + (you can use a `dict` instead of a list of tuples (or ordereddict), but that might result + in a different order, of the keys of the key, in the output) +- check integers to dump with 1.2 patterns instead of 1.1 (reported by `Lele Gaifax + <https://bitbucket.org/lele/>`__) + + +[0.15.57, 2018-08-15]: +- Fix that CommentedSeq could no longer be used in adding or do a copy (reported by + `Christopher Wright <https://bitbucket.org/CJ-Wright4242/>`__) + +[0.15.56, 2018-08-15]: +- fix issue with `python -O` optimizing away code (reported, and detailed cause pinpointed, + by `Alex Grönholm <https://bitbucket.org/agronholm/>`__ + +[0.15.55, 2018-08-14]: + +- unmade `CommentedSeq` a subclass of `list`. It is now indirectly a subclass of the + standard `collections.abc.MutableSequence` (without .abc if you are still on Python2.7). + If you do `isinstance(yaml.load('[1, 2]'), list)`) anywhere in your code replace + `list` with `MutableSequence`. Directly, `CommentedSeq` is a subclass of the abstract + baseclass `ruamel.yaml.compat.MutableScliceableSequence`, with the result that *(extended) + slicing is supported on `CommentedSeq`*. (reported by `Stuart Berg <https://bitbucket.org/stuarteberg/>`__) +- duplicate keys (or their values) with non-ascii now correctly report in Python2, + instead of raising a Unicode error. (Reported by `Jonathan Pyle <https://bitbucket.org/jonathan_pyle/>`__) + +[0.15.54, 2018-08-13]: + +- fix issue where a comment could pop-up twice in the output (reported by `Mike Kazantsev + <https://bitbucket.org/mk_fg/>`__ and by `Nate Peterson <https://bitbucket.org/ndpete21/>`__) +- fix issue where JSON object (mapping) without spaces was not parsed properly (reported + by `Marc Schmidt <https://bitbucket.org/marcj/>`__) +- fix issue where comments after empty flow-style mappings were not emitted (reported + by `Qinfench Chen <https://bitbucket.org/flyin5ish/>`__) + +[0.15.53, 2018-08-12]: +- fix issue with flow style mapping with comments gobbled newline (reported by `Christopher + Lambert <https://bitbucket.org/XN137/>`__) +- fix issue where single '+' under YAML 1.2 was interpreted as integer, erroring out + (reported by `Jethro Yu <https://bitbucket.org/jcppkkk/>`__) + +[0.15.52, 2018-08-09]: +- added `.copy()` mapping representation for round-tripping (`CommentedMap`) to fix + incomplete copies of merged mappings (reported by `Will Richards <https://bitbucket.org/will_richards/>`__) +- Also unmade that class a subclass of ordereddict to solve incorrect behaviour for + `{**merged-mapping}` and `dict(**merged-mapping)` (reported by `Filip Matzner <https://bitbucket.org/FloopCZ/>`__) + +[0.15.51, 2018-08-08]: +- Fix method name dumps (were not dotted) and loads (reported by `Douglas Raillard + <https://bitbucket.org/DouglasRaillard/>`__) +- Fix spurious trailing white-space caused when the comment start column was no longer + reached and there was no actual EOL comment (e.g. following empty line) and doing + substitutions, or when quotes around scalars got dropped. (reported by `Thomas + Guillet <https://bitbucket.org/guillett/>`__) + +[0.15.50, 2018-08-05]: +- Allow `YAML()` as a context manager for output, thereby making it much easier to + generate multi-documents in a stream. +- Fix issue with incorrect type information for `load()` and `dump()` (reported by + `Jimbo Jim <https://bitbucket.org/jimbo1qaz/>`__) + +[0.15.49, 2018-08-05]: +- |- + fix preservation of leading newlines in root level literal style scalar, + and preserve comment after literal style indicator (`| # some comment`) + Both needed for round-tripping multi-doc streams in + `ryd <https://pypi.org/project/ryd/>`__. + +[0.15.48, 2018-08-03]: +- |- + housekeeping: `oitnb` for formatting, mypy 0.620 upgrade and conformity + +[0.15.47, 2018-07-31]: +- fix broken 3.6 manylinux1 (result of an unclean `build` (reported by `Roman Sichnyi + <https://bitbucket.org/rsichnyi-gl/>`__) + + +[0.15.46, 2018-07-29]: +- fixed DeprecationWarning for importing from `collections` on 3.7 (issue 210, reported + by `Reinoud Elhorst <https://bitbucket.org/reinhrst/>`__). It was `difficult to + find why tox/pytest did not report <https://stackoverflow.com/q/51573204/1307905>`__ + and as time consuming to actually `fix <https://stackoverflow.com/a/51573205/1307905>`__ + the tests. + +[0.15.45, 2018-07-26]: +- After adding failing test for `YAML.load_all(Path())`, remove StopIteration (PR + provided by `Zachary Buhman <https://bitbucket.org/buhman/>`__, also reported by + `Steven Hiscocks <https://bitbucket.org/sdhiscocks/>`__. + +[0.15.44, 2018-07-14]: +- Correct loading plain scalars consisting of numerals only and starting with `0`, + when not explicitly specifying YAML version 1.1. This also fixes the issue about + dumping string `'019'` as plain scalars as reported by `Min RK <https://bitbucket.org/minrk/>`__, + that prompted this chance. + +[0.15.43, 2018-07-12]: +- |- + merge PR33: Python2.7 on Windows is narrow, but has no + `sysconfig.get_config_var('Py_UNICODE_SIZE')`. (merge provided by + `Marcel Bargull <https://bitbucket.org/mbargull/>`__) + - `register_class()` now returns class (proposed by + `Mike Nerone <https://bitbucket.org/Manganeez/>`__} + +[0.15.42, 2018-07-01]: +- fix regression showing only on narrow Python 2.7 (py27mu) builds (with help from + `Marcel Bargull <https://bitbucket.org/mbargull/>`__ and `Colm O'Connor <>`__). +- run pre-commit `tox` on Python 2.7 wide and narrow, as well as 3.4/3.5/3.6/3.7/pypy + +[0.15.41, 2018-06-27]: +- add detection of C-compile failure (investigation prompted by `StackOverlow <https://stackoverflow.com/a/51057399/1307905>`__ + by `Emmanuel Blot <https://stackoverflow.com/users/8233409/emmanuel-blot>`__), which + was removed while no longer dependent on `libyaml`, C-extensions compilation still + needs a compiler though. + +[0.15.40, 2018-06-18]: +- added links to landing places as suggested in issue 190 by `KostisA <https://bitbucket.org/ankostis/>`__ +- |- + fixes issue #201: decoding unicode escaped tags on Python2, reported + by `Dan Abolafia <https://bitbucket.org/danabo/>`__ + +[0.15.39, 2018-06-16]: +- merge PR27 improving package startup time (and loading when regexp not actually + used), provided by `Marcel Bargull <https://bitbucket.org/mbargull/>`__ + +[0.15.38, 2018-06-13]: +- fix for losing precision when roundtripping floats by `Rolf Wojtech <https://bitbucket.org/asomov/>`__ +- fix for hardcoded dir separator not working for Windows by `Nuno André <https://bitbucket.org/nu_no/>`__ +- typo fix by `Andrey Somov <https://bitbucket.org/asomov/>`__ + +[0.15.37, 2018-03-21]: +- again trying to create installable files for 187 + +[0.15.36, 2018-02-07]: +- fix issue 187, incompatibility of C extension with 3.7 (reported by Daniel Blanchard) + +[0.15.35, 2017-12-03]: +- allow `None` as stream when specifying `transform` parameters to `YAML.dump()`. + This is useful if the transforming function doesn't return a meaningful value (inspired + by `StackOverflow <https://stackoverflow.com/q/47614862/1307905>`__ by `rsaw <https://stackoverflow.com/users/406281/rsaw>`__). + +[0.15.34, 2017-09-17]: +- fix for issue 157: CDumper not dumping floats (reported by Jan Smitka) + +[0.15.33, 2017-08-31]: +- support for "undefined" round-tripping tagged scalar objects (in addition to tagged + mapping object). Inspired by a use case presented by Matthew Patton on `StackOverflow + <https://stackoverflow.com/a/45967047/1307905>`__. +- |- + fix issue 148: replace cryptic error message when using `!!timestamp` with an + incorrectly formatted or non-scalar. Reported by FichteFoll. + +[0.15.32, 2017-08-21]: +- |- + allow setting `yaml.default_flow_style = None` (default: `False`) for for `typ='rt'`. +- fix for issue 149: multiplications on `ScalarFloat` now return `float` + +[0.15.31, 2017-08-15]: +- fix Comment dumping + +[0.15.30, 2017-08-14]: +- |- + fix for issue with "compact JSON" not parsing: `{"in":{},"out":{}}` + (reported on `StackOverflow <https://stackoverflow.com/q/45681626/1307905>`_ by + `mjalkio <https://stackoverflow.com/users/5130525/mjalkio>`_ + +[0.15.29, 2017-08-14]: +- |- + fix issue #51: different indents for mappings and sequences (reported by Alex Harvey) +- fix for flow sequence/mapping as element/value of block sequence with sequence-indent + minus dash-offset not equal two. + +[0.15.28, 2017-08-13]: +- |- + fix issue #61: merge of merge cannot be __repr__-ed (reported by Tal Liron) + +[0.15.27, 2017-08-13]: +- fix issue 62, YAML 1.2 allows `?` and `:` in plain scalars if non-ambigious (reported + by nowox) +- fix lists within lists which would make comments disappear + +[0.15.26, 2017-08-10]: +- fix for disappearing comment after empty flow sequence (reported by oit-tzhimmash) + +[0.15.25, 2017-08-09]: +- fix for problem with dumping (unloaded) floats (reported by eyenseo) + +[0.15.24, 2017-08-09]: +- added ScalarFloat which supports roundtripping of 23.1, 23.100, 42.00E+56, 0.0, + -0.0 etc. while keeping the format. Underscores in mantissas are not preserved/supported + (yet, is anybody using that?). +- (finally) fixed longstanding issue 23 (reported by `Antony Sottile <https://bitbucket.org/asottile/>`_), + now handling comment between block mapping key and value correctly +- warn on YAML 1.1 float input that is incorrect (triggered by invalid YAML provided + by Cecil Curry) +- |- + allow setting of boolean representation (`false`, `true`) by using: + `yaml.boolean_representation = [u'False', u'True']` + +[0.15.23, 2017-08-01]: +- fix for round_tripping integers on 2.7.X > sys.maxint (reported by ccatterina) + +[0.15.22, 2017-07-28]: +- fix for round_tripping singe excl. mark tags doubling (reported and fix by Jan Brezina) + +[0.15.21, 2017-07-25]: +- fix for writing unicode in new API, https://stackoverflow.com/a/45281922/1307905 + +[0.15.20, 2017-07-23]: +- wheels for windows including C extensions + +[0.15.19, 2017-07-13]: +- added object constructor for rt, decorator `yaml_object` to replace YAMLObject. +- fix for problem using load_all with Path() instance +- fix for load_all in combination with zero indent block style literal (`pure=True` + only!) + +[0.15.18, 2017-07-04]: +- missing `pure` attribute on `YAML` useful for implementing `!include` tag constructor + for `including YAML files in a YAML file <https://stackoverflow.com/a/44913652/1307905>`_ +- some documentation improvements +- trigger of doc build on new revision + +[0.15.17, 2017-07-03]: +- support for Unicode supplementary Plane **output** with allow_unicode (input was + already supported, triggered by `this <https://stackoverflow.com/a/44875714/1307905>`_ + Stack Overflow Q&A) + +[0.15.16, 2017-07-01]: +- minor typing issues (reported and fix provided by `Manvendra Singh <https://bitbucket.org/manu-chroma/>`_) +- small doc improvements + +[0.15.15, 2017-06-27]: +- fix for issue 135, typ='safe' not dumping in Python 2.7 (reported by Andrzej Ostrowski + <https://bitbucket.org/aostr123/>`_) + +[0.15.14, 2017-06-25]: +- setup.py: change ModuleNotFoundError to ImportError (reported and fix by Asley Drake) + +[0.15.13, 2017-06-24]: +- suppress duplicate key warning on mappings with merge keys (reported by Cameron + Sweeney) + +[0.15.12, 2017-06-24]: +- remove fatal dependency of setup.py on wheel package (reported by Cameron Sweeney) + +[0.15.11, 2017-06-24]: +- fix for issue 130, regression in nested merge keys (reported by `David Fee <https://bitbucket.org/dfee/>`_) + +[0.15.10, 2017-06-23]: +- top level PreservedScalarString not indented if not explicitly asked to +- remove Makefile (not very useful anyway) +- some mypy additions + +[0.15.9, 2017-06-16]: +- |- + fix for issue 127: tagged scalars were always quoted and seperated + by a newline when in a block sequence (reported and largely fixed by + `Tommy Wang <https://bitbucket.org/twang817/>`_) + +[0.15.8, 2017-06-15]: +- allow plug-in install via `install ruamel.yaml[jinja2]` + +[0.15.7, 2017-06-14]: +- add plug-in mechanism for load/dump pre resp. post-processing + +[0.15.6, 2017-06-10]: +- a set() with duplicate elements now throws error in rt loading +- support for toplevel column zero literal/folded scalar in explicit documents + +[0.15.5, 2017-06-08]: +- repeat `load()` on a single `YAML()` instance would fail. + +[0.15.4, 2017-06-08]: +- |- + `transform` parameter on dump that expects a function taking a + string and returning a string. This allows transformation of the output + before it is written to stream. +- some updates to the docs + +[0.15.3, 2017-06-07]: +- No longer try to compile C extensions on Windows. Compilation can be forced by setting + the environment variable `RUAMEL_FORCE_EXT_BUILD` to some value before starting + the `pip install`. + +[0.15.2, 2017-06-07]: +- update to conform to mypy 0.511:mypy --strict + +[0.15.1, 2017-06-07]: +- Any `duplicate keys <http://yaml.readthedocs.io/en/latest/api.html#duplicate-keys>`_ + in mappings generate an error (in the old API this change generates a warning until + 0.16) +- dependecy on ruamel.ordereddict for 2.7 now via extras_require + +[0.15.0, 2017-06-04]: +- it is now allowed to pass in a `pathlib.Path` as "stream" parameter to all load/dump + functions +- passing in a non-supported object (e.g. a string) as "stream" will result in a much + more meaningful YAMLStreamError. +- assigning a normal string value to an existing CommentedMap key or CommentedSeq + element will result in a value cast to the previous value's type if possible. + +[0.14.12, 2017-05-14]: +- fix for issue 119, deepcopy not returning subclasses (reported and PR by Constantine + Evans <cevans@evanslabs.org>) + +[0.14.11, 2017-05-01]: +- fix for issue 103 allowing implicit documents after document end marker line (`...`) + in YAML 1.2 + +[0.14.10, 2017-04-26]: +- fix problem with emitting using cyaml + +[0.14.9, 2017-04-22]: +- remove dependency on `typing` while still supporting `mypy` (http://stackoverflow.com/a/43516781/1307905) +- fix unclarity in doc that stated 2.6 is supported (reported by feetdust) + +[0.14.8, 2017-04-19]: +- fix Text not available on 3.5.0 and 3.5.1, now proactively setting version guards + on all files (reported by `João Paulo Magalhães <https://bitbucket.org/jpmag/>`_) + +[0.14.7, 2017-04-18]: +- round trip of integers (decimal, octal, hex, binary) now preserve leading zero(s) + padding and underscores. Underscores are presumed to be at regular distances (i.e. + `0o12_345_67` dumps back as `0o1_23_45_67` as the space from the last digit to the + underscore before that is the determining factor). + +[0.14.6, 2017-04-14]: +- binary, octal and hex integers are now preserved by default. This was a known deficiency. + Working on this was prompted by the issue report (112) from devnoname120, as well + as the additional experience with `.replace()` on `scalarstring` classes. +- fix issues 114 cannot install on Buildozer (reported by mixmastamyk). Setting env. + var `RUAMEL_NO_PIP_INSTALL_CHECK` will suppress `pip`-check. + +[0.14.5, 2017-04-04]: +- fix issue 109 None not dumping correctly at top level (reported by Andrea Censi) +- fix issue 110 .replace on Preserved/DoubleQuoted/SingleQuoted ScalarString would + give back "normal" string (reported by sandres23) + +[0.14.4, 2017-03-31]: +- fix readme + +[0.14.3, 2017-03-31]: +- fix for 0o52 not being a string in YAML 1.1 (reported on `StackOverflow Q&A 43138503><http://stackoverflow.com/a/43138503/1307905>`_ + by `Frank D <http://stackoverflow.com/users/7796630/frank-d>`_ + +[0.14.2, 2017-03-23]: +- fix for old default pip on Ubuntu 14.04 (reported by Sébastien Maccagnoni-Munch) + +[0.14.1, 2017-03-22]: +- fix Text not available on 3.5.0 and 3.5.1 (reported by Charles Bouchard-Légaré) + +[0.14.0, 2017-03-21]: +- updates for mypy --strict +- preparation for moving away from inheritance in Loader and Dumper, calls from e.g. + the Representer to the Serializer.serialize() are now done via the attribute .serializer.serialize(). + Usage of .serialize() outside of Serializer will be deprecated soon +- some extra tests on main.py functions + +[0.13.14, 2017-02-12]: +- fix for issue 97, clipped block scalar followed by empty lines and comment would + result in two CommentTokens of which the first was dropped. (reported by Colm O'Connor) + +[0.13.13, 2017-01-28]: +- fix for issue 96, prevent insertion of extra empty line if indented mapping entries + are separated by an empty line (reported by Derrick Sawyer) + +[0.13.11, 2017-01-23]: +- allow ':' in flow style scalars if not followed by space. Also don't quote such + scalar as this is no longer necessary. +- add python 3.6 manylinux wheel to PyPI + +[0.13.10, 2017-01-22]: +- fix for issue 93, insert spurious blank line before single line comment between + indented sequence elements (reported by Alex) + +[0.13.9, 2017-01-18]: +- fix for issue 92, wrong import name reported by the-corinthian + +[0.13.8, 2017-01-18]: +- fix for issue 91, when a compiler is unavailable reported by Maximilian Hils +- fix for deepcopy issue with TimeStamps not preserving 'T', reported on `StackOverflow + Q&A <http://stackoverflow.com/a/41577841/1307905>`_ by `Quuxplusone <http://stackoverflow.com/users/1424877/quuxplusone>`_ + +[0.13.7, 2016-12-27]: +- fix for issue 85, constructor.py importing unicode_literals caused mypy to fail + on 2.7 (reported by Peter Amstutz) + +[0.13.6, 2016-12-27]: +- fix for issue 83, collections.OrderedDict not representable by SafeRepresenter (reported + by Frazer McLean) + +[0.13.5, 2016-12-25]: +- fix for issue 84, deepcopy not properly working (reported by Peter Amstutz) + +[0.13.4, 2016-12-05]: +- another fix for issue 82, change to non-global resolver data broke implicit type + specification + +[0.13.3, 2016-12-05]: +- fix for issue 82, deepcopy not working (reported by code monk) + +[0.13.2, 2016-11-28]: +- fix for comments after empty (null) values (reported by dsw2127 and cokelaer) + +[0.13.1, 2016-11-22]: +- optimisations on memory usage when loading YAML from large files (py3 -50%, py2 + -85%) + +[0.13.0, 2016-11-20]: +- if `load()` or `load_all()` is called with only a single argument (stream or string) + a UnsafeLoaderWarning will be issued once. If appropriate you can surpress this + warning by filtering it. Explicitly supplying the `Loader=ruamel.yaml.Loader` argument, + will also prevent it from being issued. You should however consider using `safe_load()`, + `safe_load_all()` if your YAML input does not use tags. +- allow adding comments before and after keys (based on `StackOveflow Q&A <http://stackoverflow.com/a/40705671/1307905>`_ by + `msinn <http://stackoverflow.com/users/7185467/msinn>`_) + +[0.12.18, 2016-11-16]: +- another fix for numpy (re-reported independently by PaulG & Nathanial Burdic) + +[0.12.17, 2016-11-15]: +- only the RoundTripLoader included the Resolver that supports YAML 1.2 now all loaders + do (reported by mixmastamyk) + +[0.12.16, 2016-11-13]: +- allow dot char (and many others) in anchor name Fix issue 72 (reported by Shalon + Wood) +- |- + Slightly smarter behaviour dumping strings when no style is + specified. Single string scalars that start with single quotes + or have newlines now are dumped double quoted "'abc\nklm'" instead of + + '''abc + + klm''' + +[0.12.14, 2016-09-21]: +- preserve round-trip sequences that are mapping keys (prompted by stackoverflow question + 39595807 from Nowox) + +[0.12.13, 2016-09-15]: +- |- + Fix for issue #60 representation of CommentedMap with merge + keys incorrect (reported by Tal Liron) + +[0.12.11, 2016-09-06]: +- Fix issue 58 endless loop in scanning tokens (reported by Christopher Lambert) + +[0.12.10, 2016-09-05]: +- Make previous fix depend on unicode char width (32 bit unicode support is a problem + on MacOS reported by David Tagatac) + +[0.12.8, 2016-09-05]: +- To be ignored Unicode characters were not properly regex matched (no specific tests, + PR by Haraguroicha Hsu) + +[0.12.7, 2016-09-03]: +- fixing issue 54 empty lines with spaces (reported by Alex Harvey) + +[0.12.6, 2016-09-03]: +- fixing issue 46 empty lines between top-level keys were gobbled (but not between + sequence elements, nor between keys in netsted mappings (reported by Alex Harvey) + +[0.12.5, 2016-08-20]: +- |- + fixing issue 45 preserving datetime formatting (submitted by altuin) + Several formatting parameters are preserved with some normalisation: +- preserve 'T', 't' is replaced by 'T', multiple spaces between date and time reduced + to one. +- optional space before timezone is removed +- still using microseconds, but now rounded (.1234567 -> .123457) +- Z/-5/+01:00 preserved + +[0.12.4, 2016-08-19]: +- |- + Fix for issue 44: missing preserve_quotes keyword argument (reported by M. Crusoe) + +[0.12.3, 2016-08-17]: +- correct 'in' operation for merged CommentedMaps in round-trip mode (implementation + inspired by J.Ngo, but original not working for merges) +- iteration over round-trip loaded mappings, that contain merges. Also keys(), items(), + values() (Py3/Py2) and iterkeys(), iteritems(), itervalues(), viewkeys(), viewitems(), + viewvalues() (Py2) +- |- + reuse of anchor name now generates warning, not an error. Round-tripping such + anchors works correctly. This inherited PyYAML issue was brought to attention + by G. Coddut (and was long standing https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=515634) + suppressing the warning: + ``` + import warnings + from ruamel.yaml.error import ReusedAnchorWarning + warnings.simplefilter("ignore", ReusedAnchorWarning) + ``` + +[0.12.2, 2016-08-16]: +- minor improvements based on feedback from M. Crusoe https://bitbucket.org/ruamel/yaml/issues/42/ + +[0.12.0, 2016-08-16]: +- drop support for Python 2.6 +- include initial Type information (inspired by M. Crusoe) + +[0.11.15, 2016-08-07]: +- Change to prevent FutureWarning in NumPy, as reported by tgehring ("comparison to + None will result in an elementwise object comparison in the future") + +[0.11.14, 2016-07-06]: +- fix preserve_quotes missing on original Loaders (as reported by Leynos, bitbucket + issue 38) + +[0.11.13, 2016-07-06]: +- documentation only, automated linux wheels + +[0.11.12, 2016-07-06]: +- |- + added support for roundtrip of single/double quoted scalars using: + ruamel.yaml.round_trip_load(stream, preserve_quotes=True) + +[0.11.10, 2016-05-02]: +- added `.insert(pos, key, value, comment=None)` to CommentedMap + +[0.11.10, 2016-04-19]: + +- indent=2, block_seq_indent=2 works as expected + +[0.11.0, 2016-02-18]: +- RoundTripLoader loads 1.2 by default (no sexagesimals, 012 octals nor yes/no/on/off + booleans + +[0.10.11, 2015-09-17]: +- Fix issue 13: dependency on libyaml to be installed for yaml.h + +[0.10.10, 2015-09-15]: +- Python 3.5 tested with tox +- pypy full test (old PyYAML tests failed on too many open file handles) + +[0.10.6-0.10.9, 2015-09-14]: +- Fix for issue 9 +- Fix for issue 11: double dump losing comments +- Include libyaml code +- move code from 'py' subdir for proper namespace packaging. + +[0.10.5, 2015-08-25]: +- preservation of newlines after block scalars. Contributed by Sam Thursfield. + +[0.10, 2015-06-22]: +- preservation of hand crafted anchor names ( not of the form "idNNN") +- preservation of map merges ( `<<` ) + +[0.9, 2015-04-18]: +- collections read in by the RoundTripLoader now have a `lc` property that can be + quired for line and column ( `lc.line` resp. `lc.col`) + +[0.8, 2015-04-15]: +- bug fix for non-roundtrip save of ordereddict +- adding/replacing end of line comments on block style mappings/sequences + +[0.7.2, 2015-03-29]: +- support for end-of-line comments on flow style sequences and mappings + +[0.7.1, 2015-03-27]: +- RoundTrip capability of flow style sequences ( 'a: b, c, d' ) + +[0.7, 2015-03-26]: +- tests (currently failing) for inline sequece and non-standard spacing between block + sequence dash and scalar (Anthony Sottile) +- initial possibility (on list, i.e. CommentedSeq) to set the flow format explicitly +- RoundTrip capability of flow style sequences ( 'a: b, c, d' ) + +[0.6.1, 2015-03-15]: +- setup.py changed so ruamel.ordereddict no longer is a dependency if not on CPython + 2.x (used to test only for 2.x, which breaks pypy 2.5.0 reported by Anthony Sottile) + +[0.6, 2015-03-11]: +- basic support for scalars with preserved newlines +- html option for yaml command +- check if yaml C library is available before trying to compile C extension +- include unreleased change in PyYAML dd 20141128 + +[0.5, 2015-01-14]: +- move configobj -> YAML generator to own module +- added dependency on ruamel.base (based on feedback from Sess <leycec@gmail.com> + +[0.4, 2014-11-25]: +- move comment classes in own module comments +- fix omap pre comment +- make !!omap and !!set take parameters. There are still some restrictions: + - no comments before the !!tag +- extra tests + +[0.3, 2014-11-24]: +- fix value comment occuring as on previous line (looking like eol comment) +- INI conversion in yaml + tests +- (hidden) test in yaml for debugging with auto command +- fix for missing comment in middel of simple map + test + +[0.2, 2014-11-23]: +- add ext/_yaml.c etc to the source tree +- tests for yaml to work on 2.6/3.3/3.4 +- change install so that you can include ruamel.yaml instead of ruamel.yaml.py +- add "yaml" utility with initial subcommands (test rt, from json) + +[0.1, 2014-11-22]: +- merge py2 and py3 code bases +- remove support for 2.5/3.0/3.1/3.2 (this merge relies on u"" as available in 3.3 + and . imports not available in 2.5) +- tox.ini for 2.7/3.4/2.6/3.3 +- remove lib3/ and tests/lib3 directories and content +- commit +- correct --verbose for test application +- DATA=changed to be relative to __file__ of code +- DATA using os.sep +- remove os.path from imports as os is already imported +- have test_yaml.py exit with value 0 on success, 1 on failures, 2 on error +- added support for octal integers starting with '0o' keep support for 01234 as well + as 0o1234 +- commit +- |- + added test_roundtrip_data: + requires a .data file and .roundtrip (empty), yaml_load .data + and compare dump against original. +- |- + fix grammar as per David Pursehouse: + https://bitbucket.org/xi/pyyaml/pull-request/5/fix-grammar-in-error-messages/diff +- http://www.json.org/ extra escaped char `\/` add .skip-ext as libyaml is not updated +- |- + David Fraser: Extract a method to represent keys in mappings, so that + a subclass can choose not to quote them, used in repesent_mapping + https://bitbucket.org/davidfraser/pyyaml/ +- add CommentToken and percolate through parser and composer and constructor +- add Comments to wrapped mapping and sequence constructs (not to scalars) +- generate YAML with comments +- initial README @@ -0,0 +1,21 @@ + The MIT License (MIT) + + Copyright (c) 2014-2023 Anthon van der Neut, Ruamel bvba + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..1aa7798 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include README.rst LICENSE CHANGES setup.py +prune ext* +prune clib* diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..b4e9d9e --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,430 @@ +Metadata-Version: 2.1 +Name: ruamel.yaml +Version: 0.18.5 +Summary: ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order +Author: Anthon van der Neut +Author-email: a.van.der.neut@ruamel.eu +License: MIT license +Project-URL: Home, https://sourceforge.net/p/ruamel-yaml/ +Project-URL: Source, https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/ +Project-URL: Tracker, https://sourceforge.net/p/ruamel-yaml/tickets/ +Project-URL: Documentation, https://yaml.readthedocs.io/ +Keywords: yaml 1.2 parser round-trip preserve quotes order config +Classifier: Development Status :: 4 - Beta +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Text Processing :: Markup +Classifier: Typing :: Typed +Requires-Python: >=3.7 +Description-Content-Type: text/markdown; charset=UTF-8; variant=CommonMark +Provides-Extra: jinja2 +Provides-Extra: docs +License-File: LICENSE + +# ruamel.yaml + +`ruamel.yaml` is a YAML 1.2 loader/dumper package for Python. +<table class="docutils"> + <tr> <td>version</td> + <td>0.18.5</td> + </tr> + <tr> <td>updated</td> + <td>2023-11-03</td> + </tr> + <tr> <td>documentation</td> + <td><a href="https://yaml.readthedocs.io">https://yaml.readthedocs.io</a></td> + </tr> + <tr> <td>repository</td> + <td><a href="https://sourceforge.net/projects/ruamel-yaml">https://sourceforge.net/projects/ruamel-yaml</a></td> + </tr> + <tr> <td>pypi</td> + <td><a href="https://pypi.org/project/ruamel.yaml">https://pypi.org/project/ruamel.yaml</a></td> + </tr> +</table> + +As announced, in 0.18.0, the old PyYAML functions have been deprecated. +(`scan`, `parse`, `compose`, `load`, `emit`, `serialize`, `dump` and their variants +(`_all`, `safe_`, `round_trip_`, etc)). If you only read this after your program has +stopped working: I am sorry to hear that, but that also means you, or the person +developing your program, has not tested with warnings on (which is the recommendation +in PEP 565, and e.g. defaultin when using `pytest`). If you have troubles, explicitly use +``` +pip install "ruamel.yaml<0.18.0" +``` +or put something to that effects in your requirments, to give yourself +some time to solve the issue. + +There will be at least one more potentially breaking change in the 0.18 series: `YAML(typ='unsafe')` +now has a pending deprecation warning and is going to be deprecated, probably before the end of 2023. +If you only use it to dump, please use the new `YAML(typ='full')`, the result of that can be *safely* +loaded with a default instance `YAML()`, as that will get you inspectable, tagged, scalars, instead of +executed Python functions/classes. (You should probably add constructors for what you actually need, +but I do consider adding a `ruamel.yaml.unsafe` package that will re-add the `typ='unsafe'` option. +*Please adjust/pin your dependencies accordingly if necessary.* + + +There seems to be a CVE on `ruamel.yaml`, stating that the `load()` function could be abused +because of unchecked input. `load()` was never the default function (that was `round_trip_load()` +before the new API came into existence`. So the creator of that CVE was ill informed and +probably lazily assumed that since `ruamel.yaml` is a derivative of PyYAML (for which +a similar CVE exists), the same problem would still exist, without checking. +So the CVE was always inappriate, now just more so, as the call +to the function `load()` with any input will terminate your program with an error message. If you +(have to) care about such things as this CVE, my recommendation is to stop using Python +completely, as `pickle.load()` can be abused in the same way as `load()` (and like unlike `load()` +is only documented to be unsafe, without development-time warning. + +Version 0.17.21 was the last one tested to be working on Python 3.5 and 3.6<BR> +The 0.16.13 release was the last that was tested to be working on Python 2.7. + + +There are two extra plug-in packages +(`ruamel.yaml.bytes` and `ruamel.yaml.string`) +for those not wanting to do the streaming to a +`io.BytesIO/StringIO` buffer themselves. + +If your package uses `ruamel.yaml` and is not listed on PyPI, drop me an +email, preferably with some information on how you use the package (or a +link to the repository) and I'll keep you informed when the status of +the API is stable enough to make the transition. + +<pre> + <a href="overview/#overview">Overview</a> + + <a href="install/#installing">Installing</a> + <a href="install/#optional-requirements">Optional requirements</a> + + <a href="basicuse/#basic-usage">Basic Usage</a> + <a href="basicuse/#load-and-dump">Load and dump </a> + <a href="basicuse/#more-examples">More examples</a> + + <a href="dumpcls/#working-with-python-classes">Working with Python classes</a> + <a href="dumpcls/#dumping-python-classes">Dumping Python classes</a> + <a href="dumpcls/#dataclass">Dataclass</a> + + <a href="detail/#details">Details</a> + <a href="detail/#indentation-of-block-sequences">Indentation of block sequences</a> + <a href="detail/#inconsistently-indented-yaml">Inconsistently indented YAML</a> + <a href="detail/#indenting-using-typsafe">Indenting using `typ="safe"`</a> + <a href="detail/#positioning-in-top-level-mappings-prefixing">Positioning ':' in top level mappings, prefixing ':'</a> + <a href="detail/#document-version-support">Document version support</a> + <a href="detail/#round-trip-including-comments">Round trip including comments</a> + <a href="detail/#config-file-formats">Config file formats</a> + <a href="detail/#extending">Extending</a> + <a href="detail/#smartening">Smartening</a> + + <a href="example/#examples">Examples</a> + <a href="example/#output-of-dump-as-a-string">Output of `dump()` as a string</a> + + <a href="api/#departure-from-previous-api">Departure from previous API</a> + <a href="api/#loading">Loading</a> + <a href="api/#duplicate-keys">Duplicate keys</a> + <a href="api/#dumping-a-multi-document-yaml-stream">Dumping a multi-document YAML stream</a> + <a href="api/#dumping">Dumping</a> + <a href="api/#controls">Controls</a> + <a href="api/#transparent-usage-of-new-and-old-api">Transparent usage of new and old API</a> + <a href="api/#reason-for-api-change">Reason for API change</a> + + <a href="pyyaml/#differences-with-pyyaml">Differences with PyYAML</a> + <a href="pyyaml/#defaulting-to-yaml-12-support">Defaulting to YAML 1.2 support</a> + <a href="pyyaml/#py2py3-reintegration">PY2/PY3 reintegration</a> + <a href="pyyaml/#fixes">Fixes</a> + <a href="pyyaml/#testing">Testing</a> + <a href="pyyaml/#api">API</a> + + <a href="contributing/#contributing">Contributing</a> + <a href="contributing/#documentation">Documentation</a> + <a href="contributing/#code">Code</a> + <a href="contributing/#flake">Flake</a> + <a href="contributing/#toxpytest">Tox/pytest</a> + <a href="contributing/#typingmypy">Typing/mypy</a> + <a href="contributing/#generated-files">Generated files</a> + <a href="contributing/#vulnerabilities">Vulnerabilities</a> +</pre> + + +[![image](https://readthedocs.org/projects/yaml/badge/?version=latest)](https://yaml.readthedocs.org/en/latest?badge=latest)[![image](https://bestpractices.coreinfrastructure.org/projects/1128/badge)](https://bestpractices.coreinfrastructure.org/projects/1128) +[![image](https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/_doc/_static/license.svg?format=raw)](https://opensource.org/licenses/MIT) +[![image](https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/_doc/_static/pypi.svg?format=raw)](https://pypi.org/project/ruamel.yaml/) +[![image](https://sourceforge.net/p/oitnb/code/ci/default/tree/_doc/_static/oitnb.svg?format=raw)](https://pypi.org/project/oitnb/) +[![image](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/) + +# ChangeLog + +0.18.5 (2023-11-03): + +- there is some indication that dependent packages have been pinned to use specific (tested) and just install the latest even in Python versions that have end-of-life + +0.18.4 (2023-11-01): + +- YAML() instance has a `doc_infos` attribute which is a cumulative list of DocInfo instances (one for `load()`, one per document for `load_all()`). DocInfo instances contain version information (requested, directive) and tag directive information +- fix issue that the YAML instance tags attribute was not reset between documents, resulting in mixing of tag directives of multiple documents. Now only provides tag directive information on latest document after loading. This means tags for dumping must be set **again** after a document is loaded with the same instance. (because of this tags will be removed in a favour of a different mechanism in the future) +- fix issue with multiple document intermixing YAML 1.2 and YAML 1.1, the VersionedResolver now resets +- fix issue with disappearing comment when next token was Tag (still can't have both a comment before a tag and after a tag, before node) + +0.18.3 (2023-10-29): + +- fix issue with spurious newline on first item after comment + nested block sequence +- additional links in the metadata on PyPI (Reported, with pointers how to fix, by [Sorin](https://sourceforge.net/u/ssbarnea/profile/)). + +0.18.2 (2023-10-24): + +- calling the deprecated functions now raises an `AttributeError` with the, somewhat more informative, orginal warning message. Instead of calling `sys.exit(1)` + +0.18.1 (2023-10-24): + +- calling the deprecated functions now always displays the warning message. (reported by [Trend Lloyd](https://sourceforge.net/u/lathiat2/profile/)) + +0.18.0 (2023-10-23): + +- the **functions** `scan`, `parse`, `compose`, `load`, `emit`, `serialize`, `dump` and their variants (`_all`, `safe_`, `round_trip_`, etc) have been deprecated (the same named **methods** on `YAML()` instances are, of course, still there. +- `YAML(typ='unsafe')` now issues a `PendingDeprecationWarning`. This will become deprecated in the 0.18 series +(probably before the end of 2023). +You can use `YAML(typ='full')` to dump unregistered Python classes/functions. +For loading you'll have to register your classes/functions +if you want the old, unsafe, functionality. You can still load any tag, like `!!python/name:posix.system', **safely** +with the (default) round-trip parser. +- fix for `bytes-like object is required not 'str' while dumping binary streams`. This was reported, analysed and a fix provided by [Vit Zikmund](https://sourceforge.net/u/tlwhitec/profile/) + +0.17.40 (2023-10-20): + +- flow style sets are now preserved ( `!!set {a, b, c} )`. Any values specified when loading are dropped, including `!!null ""`. +- potential workaround for issue 484: the long_description_content_type including the variant specification `CommonMark` +can result in problems on Azure. If you can install from `.tar.gz` using +`RUAMEL_NO_LONG_DESCRIPTION=1 pip install ruamel.yaml --no-binary :all:` then the long description, and its +offending type, are nog included (in the METADATA). +(Reported by [Coury Ditch](https://sourceforge.net/u/cmditch/profile/)) +- links in documentation update (reported by [David Hoese](https://sourceforge.net/u/daveydave400/profile/)) +- Added some `__repr__` for internally used classes + +0.17.39 (2023-10-19): + +- update README generation, no code changes + +0.17.36 (2023-10-19): + +- fixed issue 480, dumping of a loaded empty flow-style mapping with comment failed (Reported by [Stéphane Brunner](https://sourceforge.net/u/stbrunner/profile/)) +- fixed issue 482, caused by DEFAULT_MAPPING_TAG having changes to being a `Tag()` instance, not a string (reported by [yan12125](https://sourceforge.net/u/yan12125/profile/)) +- updated documentation to use mkdocs + +0.17.35 (2023-10-04): + +- support for loading dataclasses with `InitVar` variables (some special coding was necessary to get the, unexecpected, default value in the corresponding instance attribute ( example of usage in [this question](https://stackoverflow.com/q/77228378/1307905)) + +0.17.34 (2023-10-03): + +- Python 3.12 also loads C version when using `typ='safe'` +- initial support for loading invoking +`__post_init__()` on dataclasses that have that +method after loading a registered dataclass. +(Originally +[asked](https://stackoverflow.com/q/51529458/1307905) on +Stackoverflow by +[nyanpasu64](https://stackoverflow.com/users/2683842/nyanpasu64) +and as +[ticket](https://sourceforge.net/p/ruamel-yaml/tickets/355/) by +[Patrick Lehmann](https://sourceforge.net/u/paebbels/profile/)) + +``` +@yaml.register_class +@dataclass +class ... +``` + +0.17.33 (2023-09-28): + +- added `flow_seq_start`, `flow_seq_end`, `flow_seq_separator`, `flow_map_start`, `flow_map_end`, `flow_map_separator` **class** attributes to the `Emitter` class so flow style output can more easily be influenced (based on [this answer](https://stackoverflow.com/a/76547814/1307905) on a StackOverflow question by [Huw Walters](https://stackoverflow.com/users/291033/huw-walters)). + +0.17.32 (2023-06-17): + +- fix issue with scanner getting stuck in infinite loop + +0.17.31 (2023-05-31): + +- added tag.setter on `ScalarEvent` and on `Node`, that takes either a `Tag` instance, or a str (reported by [Sorin Sbarnea](https://sourceforge.net/u/ssbarnea/profile/)) + +0.17.30 (2023-05-30): + +- fix issue 467, caused by Tag instances not being hashable (reported by [Douglas Raillard](https://bitbucket.org/%7Bcf052d92-a278-4339-9aa8-de41923bb556%7D/)) + +0.17.29 (2023-05-30): + +- changed the internals of the tag property from a string to a class which allows for preservation of the original handle and suffix. This should result in better results using documents with %TAG directives, as well as preserving URI escapes in tag suffixes. + +0.17.28 (2023-05-26): + +- fix for issue 464: documents ending with document end marker +without final newline fail to load (reported by [Mariusz +Rusiniak](https://sourceforge.net/u/r2dan/profile/)) + +0.17.27 (2023-05-25): + +- fix issue with inline mappings as value for merge keys (reported by Sirish on [StackOverflow](https://stackoverflow.com/q/76331049/1307905)) +- fix for 468, error inserting after accessing merge attribute on `CommentedMap` (reported by [Bastien gerard](https://sourceforge.net/u/bagerard/)) +- fix for issue 461 pop + insert on same `CommentedMap` key throwing error (reported by [John Thorvald Wodder II](https://sourceforge.net/u/jwodder/profile/)) + +0.17.26 (2023-05-09): + +- fix for error on edge cage for issue 459 + +0.17.25 (2023-05-09): + +- fix for regression while dumping wrapped strings with too many backslashes removed (issue 459, reported by [Lele Gaifax](https://sourceforge.net/u/lele/profile/)) + +0.17.24 (2023-05-06): + +- rewrite of `CommentedMap.insert()`. If you have a merge key in the YAML document for the mapping you insert to, the position value should be the one as you look at the YAML input. This fixes issue 453 where other keys of a merged in mapping would show up after an insert (reported by [Alex Miller](https://sourceforge.net/u/millerdevel/profile/)). It also fixes a call to `.insert()` resulting into the merge key to move to be the first key if it wasn't already and it is also now possible to insert a key before a merge key (even if the fist key in the mapping). +- fix (in the pure Python implementation including default) for issue 447. (reported by [Jack Cherng](https://sourceforge.net/u/jfcherng/profile/), also brought up by brent on [StackOverflow](https://stackoverflow.com/q/40072485/1307905)) + +0.17.23 (2023-05-05): + +- fix 458, error on plain scalars starting with word longer than width. (reported by [Kyle Larose](https://sourceforge.net/u/klarose/profile/)) +- fix for `.update()` no longer correctly handling keyword arguments (reported by John Lin on [StackOverflow]( https://stackoverflow.com/q/76089100/1307905)) +- fix issue 454: high Unicode (emojis) in quoted strings always +escaped (reported by [Michal +Čihař](https://sourceforge.net/u/nijel/profile/) based on a +question on StackOverflow). +- fix issue with emitter conservatively inserting extra backslashes in wrapped quoted strings (reported by thebenman on [StackOverflow](https://stackoverflow.com/q/75631454/1307905)) + +0.17.22 (2023-05-02): + +- fix issue 449 where the second exclamation marks got URL encoded (reported and fixing PR provided by [John Stark](https://sourceforge.net/u/jods/profile/)) +- fix issue with indent != 2 and literal scalars with empty first line (reported by wrdis on [StackOverflow](https://stackoverflow.com/q/75584262/1307905)) +- updated `__repr__` of CommentedMap, now that Python's dict is ordered -> no more `ordereddict(list-of-tuples)` +- merge MR 4, handling OctalInt in YAML 1.1 (provided by [Jacob Floyd](https://sourceforge.net/u/cognifloyd/profile/)) +- fix loading of `!!float 42` (reported by Eric on [Stack overflow](https://stackoverflow.com/a/71555107/1307905)) +- line numbers are now set on `CommentedKeySeq` and `CommentedKeyMap` (which are created if you have a sequence resp. mapping as the key in a mapping) +- plain scalars: put single words longer than width on a line of +their own, instead of after the previous line (issue 427, reported +by [Antoine +Cotten](https://sourceforge.net/u/antoineco/profile/)). Caveat: +this currently results in a space ending the previous line. +- fix for folded scalar part of 421: comments after ">" on first +line of folded scalars are now preserved (as were those in the +same position on literal scalars). Issue reported by Jacob Floyd. +- added stacklevel to warnings +- typing changed from Py2 compatible comments to Py3, removed various Py2-isms + +0.17.21 (2022-02-12): + +- fix bug in calling `.compose()` method with `pathlib.Path` instance. + +0.17.20 (2022-01-03): + +- fix error in microseconds while rounding datetime fractions >= 9999995 (reported by [Luis Ferreira](https://sourceforge.net/u/ljmf00/)) + +0.17.19 (2021-12-26): + +- fix mypy problems (reported by [Arun](https://sourceforge.net/u/arunppsg/profile/)) + +0.17.18 (2021-12-24): + +- copy-paste error in folded scalar comment attachment (reported by [Stephan Geulette](https://sourceforge.net/u/sgeulette/profile/)) +- fix 411, indent error comment between key empty seq value (reported by [Guillermo Julián](https://sourceforge.net/u/gjulianm/profile/)) + +0.17.17 (2021-10-31): + +- extract timestamp matching/creation to util + +0.17.16 (2021-08-28): + +- 398 also handle issue 397 when comment is newline + +0.17.15 (2021-08-28): + +- fix issue 397, insert comment before key when a comment between key and value exists (reported by [Bastien gerard](https://sourceforge.net/u/bagerard/)) + +0.17.14 (2021-08-25): + +- fix issue 396, inserting key/val in merged-in dictionary (reported by [Bastien gerard](https://sourceforge.net/u/bagerard/)) + +0.17.13 (2021-08-21): + +- minor fix in attr handling + +0.17.12 (2021-08-21): + +- fix issue with anchor on registered class not preserved and those classes using package attrs with `@attr.s()` (both reported by [ssph](https://sourceforge.net/u/sph/)) + +0.17.11 (2021-08-19): + +- fix error baseclass for `DuplicateKeyError` (reported by [Łukasz Rogalski](https://sourceforge.net/u/lrogalski/)) +- fix typo in reader error message, causing `KeyError` during reader error (reported by [MTU](https://sourceforge.net/u/mtu/)) + +0.17.10 (2021-06-24): + +- fix issue 388, token with old comment structure != two elements (reported by [Dimitrios Bariamis](https://sourceforge.net/u/dbdbc/)) + +0.17.9 (2021-06-10): + +- fix issue with updating CommentedMap (reported by sri on [StackOverflow](https://stackoverflow.com/q/67911659/1307905)) + +0.17.8 (2021-06-09): + +- fix for issue 387 where templated anchors on tagged object did get set resulting in potential id reuse. (reported by [Artem Ploujnikov](https://sourceforge.net/u/flexthink/)) + +0.17.7 (2021-05-31): + +- issue 385 also affected other deprecated loaders (reported via email by Oren Watson) + +0.17.6 (2021-05-31): + +- merged type annotations update provided by [Jochen Sprickerhof](https://sourceforge.net/u/jspricke/) +- fix for issue 385: deprecated round_trip_loader function not +working (reported by [Mike +Gouline](https://sourceforge.net/u/gouline/)) +- wasted a few hours getting rid of mypy warnings/errors + +0.17.5 (2021-05-30): + +- fix for issue 384 `!!set` with aliased entry resulting in broken YAML on rt reported by [William Kimball](https://sourceforge.net/u/william303/)) + +0.17.4 (2021-04-07): + +- prevent (empty) comments from throwing assertion error (issue 351 reported by [William Kimball](https://sourceforge.net/u/william303/)) comments (or empty line) will be dropped + +0.17.3 (2021-04-07): + +- fix for issue 382 caused by an error in a format string (reported by [William Kimball](https://sourceforge.net/u/william303/)) +- allow expansion of aliases by setting `yaml.composer.return_alias = lambda s: copy.deepcopy(s)` +(as per [Stackoverflow answer](https://stackoverflow.com/a/66983530/1307905)) + +0.17.2 (2021-03-29): + +- change -py2.py3-none-any.whl to -py3-none-any.whl, and remove 0.17.1 + +0.17.1 (2021-03-29): + +- added 'Programming Language :: Python :: 3 :: Only', and +removing 0.17.0 from PyPI (reported by [Alasdair +Nicol](https://sourceforge.net/u/alasdairnicol/)) + +0.17.0 (2021-03-26): + +- removed because of incomplete classifiers +- this release no longer supports Python 2.7, most if not all Python 2 specific code is removed. The 0.17.x series is the last to support Python 3.5 (this also allowed for removal of the dependency on `ruamel.std.pathlib`) +- remove Python2 specific code branches and adaptations (u-strings) +- prepare % code for f-strings using `_F` +- allow PyOxidisation ([issue 324](https://sourceforge.net/p/ruamel-yaml/tickets/324/) resp. [issue 171](https://github.com/indygreg/PyOxidizer/issues/171)) +- replaced Python 2 compatible enforcement of keyword arguments with '*' +- the old top level *functions* `load`, `safe_load`, `round_trip_load`, `dump`, `safe_dump`, `round_trip_dump`, `scan`, `parse`, `compose`, `emit`, `serialize` as well as their `_all` variants for multi-document streams, now issue a `PendingDeprecationning` (e.g. when run from pytest, but also Python is started with `-Wd`). Use the methods on `YAML()`, which have been extended. +- fix for issue 376: indentation changes could put literal/folded +scalar to start before the `#` column of a following comment. +Effectively making the comment part of the scalar in the output. +(reported by [Bence Nagy](https://sourceforge.net/u/underyx/)) + +------------------------------------------------------------------------ + +For older changes see the file +[CHANGES](https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/CHANGES) diff --git a/README.md b/README.md new file mode 100644 index 0000000..bf3d8cc --- /dev/null +++ b/README.md @@ -0,0 +1,396 @@ +# ruamel.yaml + +`ruamel.yaml` is a YAML 1.2 loader/dumper package for Python. +<table class="docutils"> + <tr> <td>version</td> + <td>0.18.5</td> + </tr> + <tr> <td>updated</td> + <td>2023-11-03</td> + </tr> + <tr> <td>documentation</td> + <td><a href="https://yaml.readthedocs.io">https://yaml.readthedocs.io</a></td> + </tr> + <tr> <td>repository</td> + <td><a href="https://sourceforge.net/projects/ruamel-yaml">https://sourceforge.net/projects/ruamel-yaml</a></td> + </tr> + <tr> <td>pypi</td> + <td><a href="https://pypi.org/project/ruamel.yaml">https://pypi.org/project/ruamel.yaml</a></td> + </tr> +</table> + +As announced, in 0.18.0, the old PyYAML functions have been deprecated. +(`scan`, `parse`, `compose`, `load`, `emit`, `serialize`, `dump` and their variants +(`_all`, `safe_`, `round_trip_`, etc)). If you only read this after your program has +stopped working: I am sorry to hear that, but that also means you, or the person +developing your program, has not tested with warnings on (which is the recommendation +in PEP 565, and e.g. defaultin when using `pytest`). If you have troubles, explicitly use +``` +pip install "ruamel.yaml<0.18.0" +``` +or put something to that effects in your requirments, to give yourself +some time to solve the issue. + +There will be at least one more potentially breaking change in the 0.18 series: `YAML(typ='unsafe')` +now has a pending deprecation warning and is going to be deprecated, probably before the end of 2023. +If you only use it to dump, please use the new `YAML(typ='full')`, the result of that can be *safely* +loaded with a default instance `YAML()`, as that will get you inspectable, tagged, scalars, instead of +executed Python functions/classes. (You should probably add constructors for what you actually need, +but I do consider adding a `ruamel.yaml.unsafe` package that will re-add the `typ='unsafe'` option. +*Please adjust/pin your dependencies accordingly if necessary.* + + +There seems to be a CVE on `ruamel.yaml`, stating that the `load()` function could be abused +because of unchecked input. `load()` was never the default function (that was `round_trip_load()` +before the new API came into existence`. So the creator of that CVE was ill informed and +probably lazily assumed that since `ruamel.yaml` is a derivative of PyYAML (for which +a similar CVE exists), the same problem would still exist, without checking. +So the CVE was always inappriate, now just more so, as the call +to the function `load()` with any input will terminate your program with an error message. If you +(have to) care about such things as this CVE, my recommendation is to stop using Python +completely, as `pickle.load()` can be abused in the same way as `load()` (and like unlike `load()` +is only documented to be unsafe, without development-time warning. + +Version 0.17.21 was the last one tested to be working on Python 3.5 and 3.6<BR> +The 0.16.13 release was the last that was tested to be working on Python 2.7. + + +There are two extra plug-in packages +(`ruamel.yaml.bytes` and `ruamel.yaml.string`) +for those not wanting to do the streaming to a +`io.BytesIO/StringIO` buffer themselves. + +If your package uses `ruamel.yaml` and is not listed on PyPI, drop me an +email, preferably with some information on how you use the package (or a +link to the repository) and I'll keep you informed when the status of +the API is stable enough to make the transition. + +<pre> + <a href="overview/#overview">Overview</a> + + <a href="install/#installing">Installing</a> + <a href="install/#optional-requirements">Optional requirements</a> + + <a href="basicuse/#basic-usage">Basic Usage</a> + <a href="basicuse/#load-and-dump">Load and dump </a> + <a href="basicuse/#more-examples">More examples</a> + + <a href="dumpcls/#working-with-python-classes">Working with Python classes</a> + <a href="dumpcls/#dumping-python-classes">Dumping Python classes</a> + <a href="dumpcls/#dataclass">Dataclass</a> + + <a href="detail/#details">Details</a> + <a href="detail/#indentation-of-block-sequences">Indentation of block sequences</a> + <a href="detail/#inconsistently-indented-yaml">Inconsistently indented YAML</a> + <a href="detail/#indenting-using-typsafe">Indenting using `typ="safe"`</a> + <a href="detail/#positioning-in-top-level-mappings-prefixing">Positioning ':' in top level mappings, prefixing ':'</a> + <a href="detail/#document-version-support">Document version support</a> + <a href="detail/#round-trip-including-comments">Round trip including comments</a> + <a href="detail/#config-file-formats">Config file formats</a> + <a href="detail/#extending">Extending</a> + <a href="detail/#smartening">Smartening</a> + + <a href="example/#examples">Examples</a> + <a href="example/#output-of-dump-as-a-string">Output of `dump()` as a string</a> + + <a href="api/#departure-from-previous-api">Departure from previous API</a> + <a href="api/#loading">Loading</a> + <a href="api/#duplicate-keys">Duplicate keys</a> + <a href="api/#dumping-a-multi-document-yaml-stream">Dumping a multi-document YAML stream</a> + <a href="api/#dumping">Dumping</a> + <a href="api/#controls">Controls</a> + <a href="api/#transparent-usage-of-new-and-old-api">Transparent usage of new and old API</a> + <a href="api/#reason-for-api-change">Reason for API change</a> + + <a href="pyyaml/#differences-with-pyyaml">Differences with PyYAML</a> + <a href="pyyaml/#defaulting-to-yaml-12-support">Defaulting to YAML 1.2 support</a> + <a href="pyyaml/#py2py3-reintegration">PY2/PY3 reintegration</a> + <a href="pyyaml/#fixes">Fixes</a> + <a href="pyyaml/#testing">Testing</a> + <a href="pyyaml/#api">API</a> + + <a href="contributing/#contributing">Contributing</a> + <a href="contributing/#documentation">Documentation</a> + <a href="contributing/#code">Code</a> + <a href="contributing/#flake">Flake</a> + <a href="contributing/#toxpytest">Tox/pytest</a> + <a href="contributing/#typingmypy">Typing/mypy</a> + <a href="contributing/#generated-files">Generated files</a> + <a href="contributing/#vulnerabilities">Vulnerabilities</a> +</pre> + + +[![image](https://readthedocs.org/projects/yaml/badge/?version=latest)](https://yaml.readthedocs.org/en/latest?badge=latest)[![image](https://bestpractices.coreinfrastructure.org/projects/1128/badge)](https://bestpractices.coreinfrastructure.org/projects/1128) +[![image](https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/_doc/_static/license.svg?format=raw)](https://opensource.org/licenses/MIT) +[![image](https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/_doc/_static/pypi.svg?format=raw)](https://pypi.org/project/ruamel.yaml/) +[![image](https://sourceforge.net/p/oitnb/code/ci/default/tree/_doc/_static/oitnb.svg?format=raw)](https://pypi.org/project/oitnb/) +[![image](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/) + +# ChangeLog + +0.18.5 (2023-11-03): + +- there is some indication that dependent packages have been pinned to use specific (tested) and just install the latest even in Python versions that have end-of-life + +0.18.4 (2023-11-01): + +- YAML() instance has a `doc_infos` attribute which is a cumulative list of DocInfo instances (one for `load()`, one per document for `load_all()`). DocInfo instances contain version information (requested, directive) and tag directive information +- fix issue that the YAML instance tags attribute was not reset between documents, resulting in mixing of tag directives of multiple documents. Now only provides tag directive information on latest document after loading. This means tags for dumping must be set **again** after a document is loaded with the same instance. (because of this tags will be removed in a favour of a different mechanism in the future) +- fix issue with multiple document intermixing YAML 1.2 and YAML 1.1, the VersionedResolver now resets +- fix issue with disappearing comment when next token was Tag (still can't have both a comment before a tag and after a tag, before node) + +0.18.3 (2023-10-29): + +- fix issue with spurious newline on first item after comment + nested block sequence +- additional links in the metadata on PyPI (Reported, with pointers how to fix, by [Sorin](https://sourceforge.net/u/ssbarnea/profile/)). + +0.18.2 (2023-10-24): + +- calling the deprecated functions now raises an `AttributeError` with the, somewhat more informative, orginal warning message. Instead of calling `sys.exit(1)` + +0.18.1 (2023-10-24): + +- calling the deprecated functions now always displays the warning message. (reported by [Trend Lloyd](https://sourceforge.net/u/lathiat2/profile/)) + +0.18.0 (2023-10-23): + +- the **functions** `scan`, `parse`, `compose`, `load`, `emit`, `serialize`, `dump` and their variants (`_all`, `safe_`, `round_trip_`, etc) have been deprecated (the same named **methods** on `YAML()` instances are, of course, still there. +- `YAML(typ='unsafe')` now issues a `PendingDeprecationWarning`. This will become deprecated in the 0.18 series +(probably before the end of 2023). +You can use `YAML(typ='full')` to dump unregistered Python classes/functions. +For loading you'll have to register your classes/functions +if you want the old, unsafe, functionality. You can still load any tag, like `!!python/name:posix.system', **safely** +with the (default) round-trip parser. +- fix for `bytes-like object is required not 'str' while dumping binary streams`. This was reported, analysed and a fix provided by [Vit Zikmund](https://sourceforge.net/u/tlwhitec/profile/) + +0.17.40 (2023-10-20): + +- flow style sets are now preserved ( `!!set {a, b, c} )`. Any values specified when loading are dropped, including `!!null ""`. +- potential workaround for issue 484: the long_description_content_type including the variant specification `CommonMark` +can result in problems on Azure. If you can install from `.tar.gz` using +`RUAMEL_NO_LONG_DESCRIPTION=1 pip install ruamel.yaml --no-binary :all:` then the long description, and its +offending type, are nog included (in the METADATA). +(Reported by [Coury Ditch](https://sourceforge.net/u/cmditch/profile/)) +- links in documentation update (reported by [David Hoese](https://sourceforge.net/u/daveydave400/profile/)) +- Added some `__repr__` for internally used classes + +0.17.39 (2023-10-19): + +- update README generation, no code changes + +0.17.36 (2023-10-19): + +- fixed issue 480, dumping of a loaded empty flow-style mapping with comment failed (Reported by [Stéphane Brunner](https://sourceforge.net/u/stbrunner/profile/)) +- fixed issue 482, caused by DEFAULT_MAPPING_TAG having changes to being a `Tag()` instance, not a string (reported by [yan12125](https://sourceforge.net/u/yan12125/profile/)) +- updated documentation to use mkdocs + +0.17.35 (2023-10-04): + +- support for loading dataclasses with `InitVar` variables (some special coding was necessary to get the, unexecpected, default value in the corresponding instance attribute ( example of usage in [this question](https://stackoverflow.com/q/77228378/1307905)) + +0.17.34 (2023-10-03): + +- Python 3.12 also loads C version when using `typ='safe'` +- initial support for loading invoking +`__post_init__()` on dataclasses that have that +method after loading a registered dataclass. +(Originally +[asked](https://stackoverflow.com/q/51529458/1307905) on +Stackoverflow by +[nyanpasu64](https://stackoverflow.com/users/2683842/nyanpasu64) +and as +[ticket](https://sourceforge.net/p/ruamel-yaml/tickets/355/) by +[Patrick Lehmann](https://sourceforge.net/u/paebbels/profile/)) + +``` +@yaml.register_class +@dataclass +class ... +``` + +0.17.33 (2023-09-28): + +- added `flow_seq_start`, `flow_seq_end`, `flow_seq_separator`, `flow_map_start`, `flow_map_end`, `flow_map_separator` **class** attributes to the `Emitter` class so flow style output can more easily be influenced (based on [this answer](https://stackoverflow.com/a/76547814/1307905) on a StackOverflow question by [Huw Walters](https://stackoverflow.com/users/291033/huw-walters)). + +0.17.32 (2023-06-17): + +- fix issue with scanner getting stuck in infinite loop + +0.17.31 (2023-05-31): + +- added tag.setter on `ScalarEvent` and on `Node`, that takes either a `Tag` instance, or a str (reported by [Sorin Sbarnea](https://sourceforge.net/u/ssbarnea/profile/)) + +0.17.30 (2023-05-30): + +- fix issue 467, caused by Tag instances not being hashable (reported by [Douglas Raillard](https://bitbucket.org/%7Bcf052d92-a278-4339-9aa8-de41923bb556%7D/)) + +0.17.29 (2023-05-30): + +- changed the internals of the tag property from a string to a class which allows for preservation of the original handle and suffix. This should result in better results using documents with %TAG directives, as well as preserving URI escapes in tag suffixes. + +0.17.28 (2023-05-26): + +- fix for issue 464: documents ending with document end marker +without final newline fail to load (reported by [Mariusz +Rusiniak](https://sourceforge.net/u/r2dan/profile/)) + +0.17.27 (2023-05-25): + +- fix issue with inline mappings as value for merge keys (reported by Sirish on [StackOverflow](https://stackoverflow.com/q/76331049/1307905)) +- fix for 468, error inserting after accessing merge attribute on `CommentedMap` (reported by [Bastien gerard](https://sourceforge.net/u/bagerard/)) +- fix for issue 461 pop + insert on same `CommentedMap` key throwing error (reported by [John Thorvald Wodder II](https://sourceforge.net/u/jwodder/profile/)) + +0.17.26 (2023-05-09): + +- fix for error on edge cage for issue 459 + +0.17.25 (2023-05-09): + +- fix for regression while dumping wrapped strings with too many backslashes removed (issue 459, reported by [Lele Gaifax](https://sourceforge.net/u/lele/profile/)) + +0.17.24 (2023-05-06): + +- rewrite of `CommentedMap.insert()`. If you have a merge key in the YAML document for the mapping you insert to, the position value should be the one as you look at the YAML input. This fixes issue 453 where other keys of a merged in mapping would show up after an insert (reported by [Alex Miller](https://sourceforge.net/u/millerdevel/profile/)). It also fixes a call to `.insert()` resulting into the merge key to move to be the first key if it wasn't already and it is also now possible to insert a key before a merge key (even if the fist key in the mapping). +- fix (in the pure Python implementation including default) for issue 447. (reported by [Jack Cherng](https://sourceforge.net/u/jfcherng/profile/), also brought up by brent on [StackOverflow](https://stackoverflow.com/q/40072485/1307905)) + +0.17.23 (2023-05-05): + +- fix 458, error on plain scalars starting with word longer than width. (reported by [Kyle Larose](https://sourceforge.net/u/klarose/profile/)) +- fix for `.update()` no longer correctly handling keyword arguments (reported by John Lin on [StackOverflow]( https://stackoverflow.com/q/76089100/1307905)) +- fix issue 454: high Unicode (emojis) in quoted strings always +escaped (reported by [Michal +Čihař](https://sourceforge.net/u/nijel/profile/) based on a +question on StackOverflow). +- fix issue with emitter conservatively inserting extra backslashes in wrapped quoted strings (reported by thebenman on [StackOverflow](https://stackoverflow.com/q/75631454/1307905)) + +0.17.22 (2023-05-02): + +- fix issue 449 where the second exclamation marks got URL encoded (reported and fixing PR provided by [John Stark](https://sourceforge.net/u/jods/profile/)) +- fix issue with indent != 2 and literal scalars with empty first line (reported by wrdis on [StackOverflow](https://stackoverflow.com/q/75584262/1307905)) +- updated `__repr__` of CommentedMap, now that Python's dict is ordered -> no more `ordereddict(list-of-tuples)` +- merge MR 4, handling OctalInt in YAML 1.1 (provided by [Jacob Floyd](https://sourceforge.net/u/cognifloyd/profile/)) +- fix loading of `!!float 42` (reported by Eric on [Stack overflow](https://stackoverflow.com/a/71555107/1307905)) +- line numbers are now set on `CommentedKeySeq` and `CommentedKeyMap` (which are created if you have a sequence resp. mapping as the key in a mapping) +- plain scalars: put single words longer than width on a line of +their own, instead of after the previous line (issue 427, reported +by [Antoine +Cotten](https://sourceforge.net/u/antoineco/profile/)). Caveat: +this currently results in a space ending the previous line. +- fix for folded scalar part of 421: comments after ">" on first +line of folded scalars are now preserved (as were those in the +same position on literal scalars). Issue reported by Jacob Floyd. +- added stacklevel to warnings +- typing changed from Py2 compatible comments to Py3, removed various Py2-isms + +0.17.21 (2022-02-12): + +- fix bug in calling `.compose()` method with `pathlib.Path` instance. + +0.17.20 (2022-01-03): + +- fix error in microseconds while rounding datetime fractions >= 9999995 (reported by [Luis Ferreira](https://sourceforge.net/u/ljmf00/)) + +0.17.19 (2021-12-26): + +- fix mypy problems (reported by [Arun](https://sourceforge.net/u/arunppsg/profile/)) + +0.17.18 (2021-12-24): + +- copy-paste error in folded scalar comment attachment (reported by [Stephan Geulette](https://sourceforge.net/u/sgeulette/profile/)) +- fix 411, indent error comment between key empty seq value (reported by [Guillermo Julián](https://sourceforge.net/u/gjulianm/profile/)) + +0.17.17 (2021-10-31): + +- extract timestamp matching/creation to util + +0.17.16 (2021-08-28): + +- 398 also handle issue 397 when comment is newline + +0.17.15 (2021-08-28): + +- fix issue 397, insert comment before key when a comment between key and value exists (reported by [Bastien gerard](https://sourceforge.net/u/bagerard/)) + +0.17.14 (2021-08-25): + +- fix issue 396, inserting key/val in merged-in dictionary (reported by [Bastien gerard](https://sourceforge.net/u/bagerard/)) + +0.17.13 (2021-08-21): + +- minor fix in attr handling + +0.17.12 (2021-08-21): + +- fix issue with anchor on registered class not preserved and those classes using package attrs with `@attr.s()` (both reported by [ssph](https://sourceforge.net/u/sph/)) + +0.17.11 (2021-08-19): + +- fix error baseclass for `DuplicateKeyError` (reported by [Łukasz Rogalski](https://sourceforge.net/u/lrogalski/)) +- fix typo in reader error message, causing `KeyError` during reader error (reported by [MTU](https://sourceforge.net/u/mtu/)) + +0.17.10 (2021-06-24): + +- fix issue 388, token with old comment structure != two elements (reported by [Dimitrios Bariamis](https://sourceforge.net/u/dbdbc/)) + +0.17.9 (2021-06-10): + +- fix issue with updating CommentedMap (reported by sri on [StackOverflow](https://stackoverflow.com/q/67911659/1307905)) + +0.17.8 (2021-06-09): + +- fix for issue 387 where templated anchors on tagged object did get set resulting in potential id reuse. (reported by [Artem Ploujnikov](https://sourceforge.net/u/flexthink/)) + +0.17.7 (2021-05-31): + +- issue 385 also affected other deprecated loaders (reported via email by Oren Watson) + +0.17.6 (2021-05-31): + +- merged type annotations update provided by [Jochen Sprickerhof](https://sourceforge.net/u/jspricke/) +- fix for issue 385: deprecated round_trip_loader function not +working (reported by [Mike +Gouline](https://sourceforge.net/u/gouline/)) +- wasted a few hours getting rid of mypy warnings/errors + +0.17.5 (2021-05-30): + +- fix for issue 384 `!!set` with aliased entry resulting in broken YAML on rt reported by [William Kimball](https://sourceforge.net/u/william303/)) + +0.17.4 (2021-04-07): + +- prevent (empty) comments from throwing assertion error (issue 351 reported by [William Kimball](https://sourceforge.net/u/william303/)) comments (or empty line) will be dropped + +0.17.3 (2021-04-07): + +- fix for issue 382 caused by an error in a format string (reported by [William Kimball](https://sourceforge.net/u/william303/)) +- allow expansion of aliases by setting `yaml.composer.return_alias = lambda s: copy.deepcopy(s)` +(as per [Stackoverflow answer](https://stackoverflow.com/a/66983530/1307905)) + +0.17.2 (2021-03-29): + +- change -py2.py3-none-any.whl to -py3-none-any.whl, and remove 0.17.1 + +0.17.1 (2021-03-29): + +- added 'Programming Language :: Python :: 3 :: Only', and +removing 0.17.0 from PyPI (reported by [Alasdair +Nicol](https://sourceforge.net/u/alasdairnicol/)) + +0.17.0 (2021-03-26): + +- removed because of incomplete classifiers +- this release no longer supports Python 2.7, most if not all Python 2 specific code is removed. The 0.17.x series is the last to support Python 3.5 (this also allowed for removal of the dependency on `ruamel.std.pathlib`) +- remove Python2 specific code branches and adaptations (u-strings) +- prepare % code for f-strings using `_F` +- allow PyOxidisation ([issue 324](https://sourceforge.net/p/ruamel-yaml/tickets/324/) resp. [issue 171](https://github.com/indygreg/PyOxidizer/issues/171)) +- replaced Python 2 compatible enforcement of keyword arguments with '*' +- the old top level *functions* `load`, `safe_load`, `round_trip_load`, `dump`, `safe_dump`, `round_trip_dump`, `scan`, `parse`, `compose`, `emit`, `serialize` as well as their `_all` variants for multi-document streams, now issue a `PendingDeprecationning` (e.g. when run from pytest, but also Python is started with `-Wd`). Use the methods on `YAML()`, which have been extended. +- fix for issue 376: indentation changes could put literal/folded +scalar to start before the `#` column of a following comment. +Effectively making the comment part of the scalar in the output. +(reported by [Bence Nagy](https://sourceforge.net/u/underyx/)) + +------------------------------------------------------------------------ + +For older changes see the file +[CHANGES](https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/CHANGES) diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..f8cfa43 --- /dev/null +++ b/__init__.py @@ -0,0 +1,56 @@ +# coding: utf-8 + +if False: # MYPY + from typing import Dict, Any # NOQA + +_package_data = dict( + full_package_name='ruamel.yaml', + version_info=(0, 18, 5), + __version__='0.18.5', + version_timestamp='2023-11-03 08:54:26', + author='Anthon van der Neut', + author_email='a.van.der.neut@ruamel.eu', + description='ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order', # NOQA + entry_points=None, + since=2014, + extras_require={ + ':platform_python_implementation=="CPython" and python_version<"3.13"': ['ruamel.yaml.clib>=0.2.7'], # NOQA + 'jinja2': ['ruamel.yaml.jinja2>=0.2'], + 'docs': ['ryd', 'mercurial>5.7'], + }, + classifiers=[ + 'Programming Language :: Python :: 3 :: Only', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: Implementation :: CPython', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Text Processing :: Markup', + 'Typing :: Typed', + ], + keywords='yaml 1.2 parser round-trip preserve quotes order config', + read_the_docs='yaml', + supported=[(3, 7)], # minimum + tox=dict( + env='*', + fl8excl='_test/lib,branch_default', + ), + # universal=True, + python_requires='>=3.7', +) # type: Dict[Any, Any] + + +version_info = _package_data['version_info'] +__version__ = _package_data['__version__'] + +try: + from .cyaml import * # NOQA + + __with_libyaml__ = True +except (ImportError, ValueError): # for Jython + __with_libyaml__ = False + +from ruamel.yaml.main import * # NOQA diff --git a/anchor.py b/anchor.py new file mode 100644 index 0000000..1eb1480 --- /dev/null +++ b/anchor.py @@ -0,0 +1,18 @@ +# coding: utf-8 + +from typing import Any, Dict, Optional, List, Union, Optional, Iterator # NOQA + +anchor_attrib = '_yaml_anchor' + + +class Anchor: + __slots__ = 'value', 'always_dump' + attrib = anchor_attrib + + def __init__(self) -> None: + self.value = None + self.always_dump = False + + def __repr__(self) -> Any: + ad = ', (always dump)' if self.always_dump else "" + return f'Anchor({self.value!r}{ad})' diff --git a/comments.py b/comments.py new file mode 100644 index 0000000..843b329 --- /dev/null +++ b/comments.py @@ -0,0 +1,1175 @@ +# coding: utf-8 + +""" +stuff to deal with comments and formatting on dict/list/ordereddict/set +these are not really related, formatting could be factored out as +a separate base +""" + +import sys +import copy + + +from ruamel.yaml.compat import ordereddict +from ruamel.yaml.compat import MutableSliceableSequence, nprintf # NOQA +from ruamel.yaml.scalarstring import ScalarString +from ruamel.yaml.anchor import Anchor +from ruamel.yaml.tag import Tag + +from collections.abc import MutableSet, Sized, Set, Mapping + +from typing import Any, Dict, Optional, List, Union, Optional, Iterator # NOQA + +# fmt: off +__all__ = ['CommentedSeq', 'CommentedKeySeq', + 'CommentedMap', 'CommentedOrderedMap', + 'CommentedSet', 'comment_attrib', 'merge_attrib', + 'TaggedScalar', + 'C_POST', 'C_PRE', 'C_SPLIT_ON_FIRST_BLANK', 'C_BLANK_LINE_PRESERVE_SPACE', + ] +# fmt: on + +# splitting of comments by the scanner +# an EOLC (End-Of-Line Comment) is preceded by some token +# an FLC (Full Line Comment) is a comment not preceded by a token, i.e. # is +# the first non-blank on line +# a BL is a blank line i.e. empty or spaces/tabs only +# bits 0 and 1 are combined, you can choose only one +C_POST = 0b00 +C_PRE = 0b01 +C_SPLIT_ON_FIRST_BLANK = 0b10 # as C_POST, but if blank line then C_PRE all lines before +# first blank goes to POST even if no following real FLC +# (first blank -> first of post) +# 0b11 -> reserved for future use +C_BLANK_LINE_PRESERVE_SPACE = 0b100 +# C_EOL_PRESERVE_SPACE2 = 0b1000 + + +class IDX: + # temporary auto increment, so rearranging is easier + def __init__(self) -> None: + self._idx = 0 + + def __call__(self) -> Any: + x = self._idx + self._idx += 1 + return x + + def __str__(self) -> Any: + return str(self._idx) + + +cidx = IDX() + +# more or less in order of subjective expected likelyhood +# the _POST and _PRE ones are lists themselves +C_VALUE_EOL = C_ELEM_EOL = cidx() +C_KEY_EOL = cidx() +C_KEY_PRE = C_ELEM_PRE = cidx() # not this is not value +C_VALUE_POST = C_ELEM_POST = cidx() # not this is not value +C_VALUE_PRE = cidx() +C_KEY_POST = cidx() +C_TAG_EOL = cidx() +C_TAG_POST = cidx() +C_TAG_PRE = cidx() +C_ANCHOR_EOL = cidx() +C_ANCHOR_POST = cidx() +C_ANCHOR_PRE = cidx() + + +comment_attrib = '_yaml_comment' +format_attrib = '_yaml_format' +line_col_attrib = '_yaml_line_col' +merge_attrib = '_yaml_merge' + + +class Comment: + # using sys.getsize tested the Comment objects, __slots__ makes them bigger + # and adding self.end did not matter + __slots__ = 'comment', '_items', '_post', '_pre' + attrib = comment_attrib + + def __init__(self, old: bool = True) -> None: + self._pre = None if old else [] # type: ignore + self.comment = None # [post, [pre]] + # map key (mapping/omap/dict) or index (sequence/list) to a list of + # dict: post_key, pre_key, post_value, pre_value + # list: pre item, post item + self._items: Dict[Any, Any] = {} + # self._start = [] # should not put these on first item + self._post: List[Any] = [] # end of document comments + + def __str__(self) -> str: + if bool(self._post): + end = ',\n end=' + str(self._post) + else: + end = "" + return f'Comment(comment={self.comment},\n items={self._items}{end})' + + def _old__repr__(self) -> str: + if bool(self._post): + end = ',\n end=' + str(self._post) + else: + end = "" + try: + ln = max([len(str(k)) for k in self._items]) + 1 + except ValueError: + ln = '' # type: ignore + it = ' '.join([f'{str(k) + ":":{ln}} {v}\n' for k, v in self._items.items()]) + if it: + it = '\n ' + it + ' ' + return f'Comment(\n start={self.comment},\n items={{{it}}}{end})' + + def __repr__(self) -> str: + if self._pre is None: + return self._old__repr__() + if bool(self._post): + end = ',\n end=' + repr(self._post) + else: + end = "" + try: + ln = max([len(str(k)) for k in self._items]) + 1 + except ValueError: + ln = '' # type: ignore + it = ' '.join([f'{str(k) + ":":{ln}} {v}\n' for k, v in self._items.items()]) + if it: + it = '\n ' + it + ' ' + return f'Comment(\n pre={self.pre},\n items={{{it}}}{end})' + + @property + def items(self) -> Any: + return self._items + + @property + def end(self) -> Any: + return self._post + + @end.setter + def end(self, value: Any) -> None: + self._post = value + + @property + def pre(self) -> Any: + return self._pre + + @pre.setter + def pre(self, value: Any) -> None: + self._pre = value + + def get(self, item: Any, pos: Any) -> Any: + x = self._items.get(item) + if x is None or len(x) < pos: + return None + return x[pos] # can be None + + def set(self, item: Any, pos: Any, value: Any) -> Any: + x = self._items.get(item) + if x is None: + self._items[item] = x = [None] * (pos + 1) + else: + while len(x) <= pos: + x.append(None) + assert x[pos] is None + x[pos] = value + + def __contains__(self, x: Any) -> Any: + # test if a substring is in any of the attached comments + if self.comment: + if self.comment[0] and x in self.comment[0].value: + return True + if self.comment[1]: + for c in self.comment[1]: + if x in c.value: + return True + for value in self.items.values(): + if not value: + continue + for c in value: + if c and x in c.value: + return True + if self.end: + for c in self.end: + if x in c.value: + return True + return False + + +# to distinguish key from None +class NotNone: + pass # NOQA + + +class Format: + __slots__ = ('_flow_style',) + attrib = format_attrib + + def __init__(self) -> None: + self._flow_style: Any = None + + def set_flow_style(self) -> None: + self._flow_style = True + + def set_block_style(self) -> None: + self._flow_style = False + + def flow_style(self, default: Optional[Any] = None) -> Any: + """if default (the flow_style) is None, the flow style tacked on to + the object explicitly will be taken. If that is None as well the + default flow style rules the format down the line, or the type + of the constituent values (simple -> flow, map/list -> block)""" + if self._flow_style is None: + return default + return self._flow_style + + def __repr__(self) -> str: + return f'Format({self._flow_style})' + + +class LineCol: + """ + line and column information wrt document, values start at zero (0) + """ + + attrib = line_col_attrib + + def __init__(self) -> None: + self.line = None + self.col = None + self.data: Optional[Dict[Any, Any]] = None + + def add_kv_line_col(self, key: Any, data: Any) -> None: + if self.data is None: + self.data = {} + self.data[key] = data + + def key(self, k: Any) -> Any: + return self._kv(k, 0, 1) + + def value(self, k: Any) -> Any: + return self._kv(k, 2, 3) + + def _kv(self, k: Any, x0: Any, x1: Any) -> Any: + if self.data is None: + return None + data = self.data[k] + return data[x0], data[x1] + + def item(self, idx: Any) -> Any: + if self.data is None: + return None + return self.data[idx][0], self.data[idx][1] + + def add_idx_line_col(self, key: Any, data: Any) -> None: + if self.data is None: + self.data = {} + self.data[key] = data + + def __repr__(self) -> str: + return f'LineCol({self.line}, {self.col})' + + +class CommentedBase: + @property + def ca(self): + # type: () -> Any + if not hasattr(self, Comment.attrib): + setattr(self, Comment.attrib, Comment()) + return getattr(self, Comment.attrib) + + def yaml_end_comment_extend(self, comment: Any, clear: bool = False) -> None: + if comment is None: + return + if clear or self.ca.end is None: + self.ca.end = [] + self.ca.end.extend(comment) + + def yaml_key_comment_extend(self, key: Any, comment: Any, clear: bool = False) -> None: + r = self.ca._items.setdefault(key, [None, None, None, None]) + if clear or r[1] is None: + if comment[1] is not None: + assert isinstance(comment[1], list) + r[1] = comment[1] + else: + r[1].extend(comment[0]) + r[0] = comment[0] + + def yaml_value_comment_extend(self, key: Any, comment: Any, clear: bool = False) -> None: + r = self.ca._items.setdefault(key, [None, None, None, None]) + if clear or r[3] is None: + if comment[1] is not None: + assert isinstance(comment[1], list) + r[3] = comment[1] + else: + r[3].extend(comment[0]) + r[2] = comment[0] + + def yaml_set_start_comment(self, comment: Any, indent: Any = 0) -> None: + """overwrites any preceding comment lines on an object + expects comment to be without `#` and possible have multiple lines + """ + from .error import CommentMark + from .tokens import CommentToken + + pre_comments = self._yaml_clear_pre_comment() # type: ignore + if comment[-1] == '\n': + comment = comment[:-1] # strip final newline if there + start_mark = CommentMark(indent) + for com in comment.split('\n'): + c = com.strip() + if len(c) > 0 and c[0] != '#': + com = '# ' + com + pre_comments.append(CommentToken(com + '\n', start_mark)) + + def yaml_set_comment_before_after_key( + self, + key: Any, + before: Any = None, + indent: Any = 0, + after: Any = None, + after_indent: Any = None, + ) -> None: + """ + expects comment (before/after) to be without `#` and possible have multiple lines + """ + from ruamel.yaml.error import CommentMark + from ruamel.yaml.tokens import CommentToken + + def comment_token(s: Any, mark: Any) -> Any: + # handle empty lines as having no comment + return CommentToken(('# ' if s else "") + s + '\n', mark) + + if after_indent is None: + after_indent = indent + 2 + if before and (len(before) > 1) and before[-1] == '\n': + before = before[:-1] # strip final newline if there + if after and after[-1] == '\n': + after = after[:-1] # strip final newline if there + start_mark = CommentMark(indent) + c = self.ca.items.setdefault(key, [None, [], None, None]) + if before is not None: + if c[1] is None: + c[1] = [] + if before == '\n': + c[1].append(comment_token("", start_mark)) # type: ignore + else: + for com in before.split('\n'): + c[1].append(comment_token(com, start_mark)) # type: ignore + if after: + start_mark = CommentMark(after_indent) + if c[3] is None: + c[3] = [] + for com in after.split('\n'): + c[3].append(comment_token(com, start_mark)) # type: ignore + + @property + def fa(self) -> Any: + """format attribute + + set_flow_style()/set_block_style()""" + if not hasattr(self, Format.attrib): + setattr(self, Format.attrib, Format()) + return getattr(self, Format.attrib) + + def yaml_add_eol_comment( + self, comment: Any, key: Optional[Any] = NotNone, column: Optional[Any] = None, + ) -> None: + """ + there is a problem as eol comments should start with ' #' + (but at the beginning of the line the space doesn't have to be before + the #. The column index is for the # mark + """ + from .tokens import CommentToken + from .error import CommentMark + + if column is None: + try: + column = self._yaml_get_column(key) + except AttributeError: + column = 0 + if comment[0] != '#': + comment = '# ' + comment + if column is None: + if comment[0] == '#': + comment = ' ' + comment + column = 0 + start_mark = CommentMark(column) + ct = [CommentToken(comment, start_mark), None] + self._yaml_add_eol_comment(ct, key=key) + + @property + def lc(self) -> Any: + if not hasattr(self, LineCol.attrib): + setattr(self, LineCol.attrib, LineCol()) + return getattr(self, LineCol.attrib) + + def _yaml_set_line_col(self, line: Any, col: Any) -> None: + self.lc.line = line + self.lc.col = col + + def _yaml_set_kv_line_col(self, key: Any, data: Any) -> None: + self.lc.add_kv_line_col(key, data) + + def _yaml_set_idx_line_col(self, key: Any, data: Any) -> None: + self.lc.add_idx_line_col(key, data) + + @property + def anchor(self) -> Any: + if not hasattr(self, Anchor.attrib): + setattr(self, Anchor.attrib, Anchor()) + return getattr(self, Anchor.attrib) + + def yaml_anchor(self) -> Any: + if not hasattr(self, Anchor.attrib): + return None + return self.anchor + + def yaml_set_anchor(self, value: Any, always_dump: bool = False) -> None: + self.anchor.value = value + self.anchor.always_dump = always_dump + + @property + def tag(self) -> Any: + if not hasattr(self, Tag.attrib): + setattr(self, Tag.attrib, Tag()) + return getattr(self, Tag.attrib) + + def yaml_set_ctag(self, value: Tag) -> None: + setattr(self, Tag.attrib, value) + + def copy_attributes(self, t: Any, memo: Any = None) -> Any: + """ + copies the YAML related attributes, not e.g. .values + returns target + """ + # fmt: off + for a in [Comment.attrib, Format.attrib, LineCol.attrib, Anchor.attrib, + Tag.attrib, merge_attrib]: + if hasattr(self, a): + if memo is not None: + setattr(t, a, copy.deepcopy(getattr(self, a, memo))) + else: + setattr(t, a, getattr(self, a)) + return t + # fmt: on + + def _yaml_add_eol_comment(self, comment: Any, key: Any) -> None: + raise NotImplementedError + + def _yaml_get_pre_comment(self) -> Any: + raise NotImplementedError + + def _yaml_get_column(self, key: Any) -> Any: + raise NotImplementedError + + +class CommentedSeq(MutableSliceableSequence, list, CommentedBase): # type: ignore + __slots__ = (Comment.attrib, '_lst') + + def __init__(self, *args: Any, **kw: Any) -> None: + list.__init__(self, *args, **kw) + + def __getsingleitem__(self, idx: Any) -> Any: + return list.__getitem__(self, idx) + + def __setsingleitem__(self, idx: Any, value: Any) -> None: + # try to preserve the scalarstring type if setting an existing key to a new value + if idx < len(self): + if ( + isinstance(value, str) + and not isinstance(value, ScalarString) + and isinstance(self[idx], ScalarString) + ): + value = type(self[idx])(value) + list.__setitem__(self, idx, value) + + def __delsingleitem__(self, idx: Any = None) -> Any: + list.__delitem__(self, idx) + self.ca.items.pop(idx, None) # might not be there -> default value + for list_index in sorted(self.ca.items): + if list_index < idx: + continue + self.ca.items[list_index - 1] = self.ca.items.pop(list_index) + + def __len__(self) -> int: + return list.__len__(self) + + def insert(self, idx: Any, val: Any) -> None: + """the comments after the insertion have to move forward""" + list.insert(self, idx, val) + for list_index in sorted(self.ca.items, reverse=True): + if list_index < idx: + break + self.ca.items[list_index + 1] = self.ca.items.pop(list_index) + + def extend(self, val: Any) -> None: + list.extend(self, val) + + def __eq__(self, other: Any) -> bool: + return list.__eq__(self, other) + + def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NotNone) -> None: + if key is not NotNone: + self.yaml_key_comment_extend(key, comment) + else: + self.ca.comment = comment + + def _yaml_add_eol_comment(self, comment: Any, key: Any) -> None: + self._yaml_add_comment(comment, key=key) + + def _yaml_get_columnX(self, key: Any) -> Any: + return self.ca.items[key][0].start_mark.column + + def _yaml_get_column(self, key: Any) -> Any: + column = None + sel_idx = None + pre, post = key - 1, key + 1 + if pre in self.ca.items: + sel_idx = pre + elif post in self.ca.items: + sel_idx = post + else: + # self.ca.items is not ordered + for row_idx, _k1 in enumerate(self): + if row_idx >= key: + break + if row_idx not in self.ca.items: + continue + sel_idx = row_idx + if sel_idx is not None: + column = self._yaml_get_columnX(sel_idx) + return column + + def _yaml_get_pre_comment(self) -> Any: + pre_comments: List[Any] = [] + if self.ca.comment is None: + self.ca.comment = [None, pre_comments] + else: + pre_comments = self.ca.comment[1] + return pre_comments + + def _yaml_clear_pre_comment(self) -> Any: + pre_comments: List[Any] = [] + if self.ca.comment is None: + self.ca.comment = [None, pre_comments] + else: + self.ca.comment[1] = pre_comments + return pre_comments + + def __deepcopy__(self, memo: Any) -> Any: + res = self.__class__() + memo[id(self)] = res + for k in self: + res.append(copy.deepcopy(k, memo)) + self.copy_attributes(res, memo=memo) + return res + + def __add__(self, other: Any) -> Any: + return list.__add__(self, other) + + def sort(self, key: Any = None, reverse: bool = False) -> None: + if key is None: + tmp_lst = sorted(zip(self, range(len(self))), reverse=reverse) + list.__init__(self, [x[0] for x in tmp_lst]) + else: + tmp_lst = sorted( + zip(map(key, list.__iter__(self)), range(len(self))), reverse=reverse, + ) + list.__init__(self, [list.__getitem__(self, x[1]) for x in tmp_lst]) + itm = self.ca.items + self.ca._items = {} + for idx, x in enumerate(tmp_lst): + old_index = x[1] + if old_index in itm: + self.ca.items[idx] = itm[old_index] + + def __repr__(self) -> Any: + return list.__repr__(self) + + +class CommentedKeySeq(tuple, CommentedBase): # type: ignore + """This primarily exists to be able to roundtrip keys that are sequences""" + + def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NotNone) -> None: + if key is not NotNone: + self.yaml_key_comment_extend(key, comment) + else: + self.ca.comment = comment + + def _yaml_add_eol_comment(self, comment: Any, key: Any) -> None: + self._yaml_add_comment(comment, key=key) + + def _yaml_get_columnX(self, key: Any) -> Any: + return self.ca.items[key][0].start_mark.column + + def _yaml_get_column(self, key: Any) -> Any: + column = None + sel_idx = None + pre, post = key - 1, key + 1 + if pre in self.ca.items: + sel_idx = pre + elif post in self.ca.items: + sel_idx = post + else: + # self.ca.items is not ordered + for row_idx, _k1 in enumerate(self): + if row_idx >= key: + break + if row_idx not in self.ca.items: + continue + sel_idx = row_idx + if sel_idx is not None: + column = self._yaml_get_columnX(sel_idx) + return column + + def _yaml_get_pre_comment(self) -> Any: + pre_comments: List[Any] = [] + if self.ca.comment is None: + self.ca.comment = [None, pre_comments] + else: + pre_comments = self.ca.comment[1] + return pre_comments + + def _yaml_clear_pre_comment(self) -> Any: + pre_comments: List[Any] = [] + if self.ca.comment is None: + self.ca.comment = [None, pre_comments] + else: + self.ca.comment[1] = pre_comments + return pre_comments + + +class CommentedMapView(Sized): + __slots__ = ('_mapping',) + + def __init__(self, mapping: Any) -> None: + self._mapping = mapping + + def __len__(self) -> int: + count = len(self._mapping) + return count + + +class CommentedMapKeysView(CommentedMapView, Set): # type: ignore + __slots__ = () + + @classmethod + def _from_iterable(self, it: Any) -> Any: + return set(it) + + def __contains__(self, key: Any) -> Any: + return key in self._mapping + + def __iter__(self) -> Any: + # yield from self._mapping # not in py27, pypy + # for x in self._mapping._keys(): + for x in self._mapping: + yield x + + +class CommentedMapItemsView(CommentedMapView, Set): # type: ignore + __slots__ = () + + @classmethod + def _from_iterable(self, it: Any) -> Any: + return set(it) + + def __contains__(self, item: Any) -> Any: + key, value = item + try: + v = self._mapping[key] + except KeyError: + return False + else: + return v == value + + def __iter__(self) -> Any: + for key in self._mapping._keys(): + yield (key, self._mapping[key]) + + +class CommentedMapValuesView(CommentedMapView): + __slots__ = () + + def __contains__(self, value: Any) -> Any: + for key in self._mapping: + if value == self._mapping[key]: + return True + return False + + def __iter__(self) -> Any: + for key in self._mapping._keys(): + yield self._mapping[key] + + +class CommentedMap(ordereddict, CommentedBase): + __slots__ = (Comment.attrib, '_ok', '_ref') + + def __init__(self, *args: Any, **kw: Any) -> None: + self._ok: MutableSet[Any] = set() # own keys + self._ref: List[CommentedMap] = [] + ordereddict.__init__(self, *args, **kw) + + def _yaml_add_comment( + self, comment: Any, key: Optional[Any] = NotNone, value: Optional[Any] = NotNone, + ) -> None: + """values is set to key to indicate a value attachment of comment""" + if key is not NotNone: + self.yaml_key_comment_extend(key, comment) + return + if value is not NotNone: + self.yaml_value_comment_extend(value, comment) + else: + self.ca.comment = comment + + def _yaml_add_eol_comment(self, comment: Any, key: Any) -> None: + """add on the value line, with value specified by the key""" + self._yaml_add_comment(comment, value=key) + + def _yaml_get_columnX(self, key: Any) -> Any: + return self.ca.items[key][2].start_mark.column + + def _yaml_get_column(self, key: Any) -> Any: + column = None + sel_idx = None + pre, post, last = None, None, None + for x in self: + if pre is not None and x != key: + post = x + break + if x == key: + pre = last + last = x + if pre in self.ca.items: + sel_idx = pre + elif post in self.ca.items: + sel_idx = post + else: + # self.ca.items is not ordered + for k1 in self: + if k1 >= key: + break + if k1 not in self.ca.items: + continue + sel_idx = k1 + if sel_idx is not None: + column = self._yaml_get_columnX(sel_idx) + return column + + def _yaml_get_pre_comment(self) -> Any: + pre_comments: List[Any] = [] + if self.ca.comment is None: + self.ca.comment = [None, pre_comments] + else: + pre_comments = self.ca.comment[1] + return pre_comments + + def _yaml_clear_pre_comment(self) -> Any: + pre_comments: List[Any] = [] + if self.ca.comment is None: + self.ca.comment = [None, pre_comments] + else: + self.ca.comment[1] = pre_comments + return pre_comments + + def update(self, *vals: Any, **kw: Any) -> None: + try: + ordereddict.update(self, *vals, **kw) + except TypeError: + # probably a dict that is used + for x in vals[0]: + self[x] = vals[0][x] + if vals: + try: + self._ok.update(vals[0].keys()) # type: ignore + except AttributeError: + # assume one argument that is a list/tuple of two element lists/tuples + for x in vals[0]: + self._ok.add(x[0]) + if kw: + self._ok.update(*kw.keys()) # type: ignore + + def insert(self, pos: Any, key: Any, value: Any, comment: Optional[Any] = None) -> None: + """insert key value into given position, as defined by source YAML + attach comment if provided + """ + if key in self._ok: + del self[key] + keys = [k for k in self.keys() if k in self._ok] + try: + ma0 = getattr(self, merge_attrib, [[-1]])[0] + merge_pos = ma0[0] + except IndexError: + merge_pos = -1 + if merge_pos >= 0: + if merge_pos >= pos: + getattr(self, merge_attrib)[0] = (merge_pos + 1, ma0[1]) + idx_min = pos + idx_max = len(self._ok) + else: + idx_min = pos - 1 + idx_max = len(self._ok) + else: + idx_min = pos + idx_max = len(self._ok) + self[key] = value # at the end + # print(f'{idx_min=} {idx_max=}') + for idx in range(idx_min, idx_max): + self.move_to_end(keys[idx]) + self._ok.add(key) + # for referer in self._ref: + # for keytmp in keys: + # referer.update_key_value(keytmp) + if comment is not None: + self.yaml_add_eol_comment(comment, key=key) + + def mlget(self, key: Any, default: Any = None, list_ok: Any = False) -> Any: + """multi-level get that expects dicts within dicts""" + if not isinstance(key, list): + return self.get(key, default) + # assume that the key is a list of recursively accessible dicts + + def get_one_level(key_list: Any, level: Any, d: Any) -> Any: + if not list_ok: + assert isinstance(d, dict) + if level >= len(key_list): + if level > len(key_list): + raise IndexError + return d[key_list[level - 1]] + return get_one_level(key_list, level + 1, d[key_list[level - 1]]) + + try: + return get_one_level(key, 1, self) + except KeyError: + return default + except (TypeError, IndexError): + if not list_ok: + raise + return default + + def __getitem__(self, key: Any) -> Any: + try: + return ordereddict.__getitem__(self, key) + except KeyError: + for merged in getattr(self, merge_attrib, []): + if key in merged[1]: + return merged[1][key] + raise + + def __setitem__(self, key: Any, value: Any) -> None: + # try to preserve the scalarstring type if setting an existing key to a new value + if key in self: + if ( + isinstance(value, str) + and not isinstance(value, ScalarString) + and isinstance(self[key], ScalarString) + ): + value = type(self[key])(value) + ordereddict.__setitem__(self, key, value) + self._ok.add(key) + + def _unmerged_contains(self, key: Any) -> Any: + if key in self._ok: + return True + return None + + def __contains__(self, key: Any) -> bool: + return bool(ordereddict.__contains__(self, key)) + + def get(self, key: Any, default: Any = None) -> Any: + try: + return self.__getitem__(key) + except: # NOQA + return default + + def __repr__(self) -> Any: + res = '{' + sep = '' + for k, v in self.items(): + res += f'{sep}{k!r}: {v!r}' + if not sep: + sep = ', ' + res += '}' + return res + + def non_merged_items(self) -> Any: + for x in ordereddict.__iter__(self): + if x in self._ok: + yield x, ordereddict.__getitem__(self, x) + + def __delitem__(self, key: Any) -> None: + # for merged in getattr(self, merge_attrib, []): + # if key in merged[1]: + # value = merged[1][key] + # break + # else: + # # not found in merged in stuff + # ordereddict.__delitem__(self, key) + # for referer in self._ref: + # referer.update=_key_value(key) + # return + # + # ordereddict.__setitem__(self, key, value) # merge might have different value + # self._ok.discard(key) + self._ok.discard(key) + ordereddict.__delitem__(self, key) + for referer in self._ref: + referer.update_key_value(key) + + def __iter__(self) -> Any: + for x in ordereddict.__iter__(self): + yield x + + def pop(self, key: Any, default: Any = NotNone) -> Any: + try: + result = self[key] + except KeyError: + if default is NotNone: + raise + return default + del self[key] + return result + + def _keys(self) -> Any: + for x in ordereddict.__iter__(self): + yield x + + def __len__(self) -> int: + return int(ordereddict.__len__(self)) + + def __eq__(self, other: Any) -> bool: + return bool(dict(self) == other) + + def keys(self) -> Any: + return CommentedMapKeysView(self) + + def values(self) -> Any: + return CommentedMapValuesView(self) + + def _items(self) -> Any: + for x in ordereddict.__iter__(self): + yield x, ordereddict.__getitem__(self, x) + + def items(self) -> Any: + return CommentedMapItemsView(self) + + @property + def merge(self) -> Any: + if not hasattr(self, merge_attrib): + setattr(self, merge_attrib, []) + return getattr(self, merge_attrib) + + def copy(self) -> Any: + x = type(self)() # update doesn't work + for k, v in self._items(): + x[k] = v + self.copy_attributes(x) + return x + + def add_referent(self, cm: Any) -> None: + if cm not in self._ref: + self._ref.append(cm) + + def add_yaml_merge(self, value: Any) -> None: + for v in value: + v[1].add_referent(self) + for k1, v1 in v[1].items(): + if ordereddict.__contains__(self, k1): + continue + ordereddict.__setitem__(self, k1, v1) + self.merge.extend(value) + + def update_key_value(self, key: Any) -> None: + if key in self._ok: + return + for v in self.merge: + if key in v[1]: + ordereddict.__setitem__(self, key, v[1][key]) + return + ordereddict.__delitem__(self, key) + + def __deepcopy__(self, memo: Any) -> Any: + res = self.__class__() + memo[id(self)] = res + for k in self: + res[k] = copy.deepcopy(self[k], memo) + self.copy_attributes(res, memo=memo) + return res + + +# based on brownie mappings +@classmethod # type: ignore +def raise_immutable(cls: Any, *args: Any, **kwargs: Any) -> None: + raise TypeError(f'{cls.__name__} objects are immutable') + + +class CommentedKeyMap(CommentedBase, Mapping): # type: ignore + __slots__ = Comment.attrib, '_od' + """This primarily exists to be able to roundtrip keys that are mappings""" + + def __init__(self, *args: Any, **kw: Any) -> None: + if hasattr(self, '_od'): + raise_immutable(self) + try: + self._od = ordereddict(*args, **kw) + except TypeError: + raise + + __delitem__ = __setitem__ = clear = pop = popitem = setdefault = update = raise_immutable + + # need to implement __getitem__, __iter__ and __len__ + def __getitem__(self, index: Any) -> Any: + return self._od[index] + + def __iter__(self) -> Iterator[Any]: + for x in self._od.__iter__(): + yield x + + def __len__(self) -> int: + return len(self._od) + + def __hash__(self) -> Any: + return hash(tuple(self.items())) + + def __repr__(self) -> Any: + if not hasattr(self, merge_attrib): + return self._od.__repr__() + return 'ordereddict(' + repr(list(self._od.items())) + ')' + + @classmethod + def fromkeys(keys: Any, v: Any = None) -> Any: + return CommentedKeyMap(dict.fromkeys(keys, v)) + + def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NotNone) -> None: + if key is not NotNone: + self.yaml_key_comment_extend(key, comment) + else: + self.ca.comment = comment + + def _yaml_add_eol_comment(self, comment: Any, key: Any) -> None: + self._yaml_add_comment(comment, key=key) + + def _yaml_get_columnX(self, key: Any) -> Any: + return self.ca.items[key][0].start_mark.column + + def _yaml_get_column(self, key: Any) -> Any: + column = None + sel_idx = None + pre, post = key - 1, key + 1 + if pre in self.ca.items: + sel_idx = pre + elif post in self.ca.items: + sel_idx = post + else: + # self.ca.items is not ordered + for row_idx, _k1 in enumerate(self): + if row_idx >= key: + break + if row_idx not in self.ca.items: + continue + sel_idx = row_idx + if sel_idx is not None: + column = self._yaml_get_columnX(sel_idx) + return column + + def _yaml_get_pre_comment(self) -> Any: + pre_comments: List[Any] = [] + if self.ca.comment is None: + self.ca.comment = [None, pre_comments] + else: + self.ca.comment[1] = pre_comments + return pre_comments + + +class CommentedOrderedMap(CommentedMap): + __slots__ = (Comment.attrib,) + + +class CommentedSet(MutableSet, CommentedBase): # type: ignore # NOQA + __slots__ = Comment.attrib, 'odict' + + def __init__(self, values: Any = None) -> None: + self.odict = ordereddict() + MutableSet.__init__(self) + if values is not None: + self |= values + + def _yaml_add_comment( + self, comment: Any, key: Optional[Any] = NotNone, value: Optional[Any] = NotNone, + ) -> None: + """values is set to key to indicate a value attachment of comment""" + if key is not NotNone: + self.yaml_key_comment_extend(key, comment) + return + if value is not NotNone: + self.yaml_value_comment_extend(value, comment) + else: + self.ca.comment = comment + + def _yaml_add_eol_comment(self, comment: Any, key: Any) -> None: + """add on the value line, with value specified by the key""" + self._yaml_add_comment(comment, value=key) + + def add(self, value: Any) -> None: + """Add an element.""" + self.odict[value] = None + + def discard(self, value: Any) -> None: + """Remove an element. Do not raise an exception if absent.""" + del self.odict[value] + + def __contains__(self, x: Any) -> Any: + return x in self.odict + + def __iter__(self) -> Any: + for x in self.odict: + yield x + + def __len__(self) -> int: + return len(self.odict) + + def __repr__(self) -> str: + return f'set({self.odict.keys()!r})' + + +class TaggedScalar(CommentedBase): + # the value and style attributes are set during roundtrip construction + def __init__(self, value: Any = None, style: Any = None, tag: Any = None) -> None: + self.value = value + self.style = style + if tag is not None: + if isinstance(tag, str): + tag = Tag(suffix=tag) + self.yaml_set_ctag(tag) + + def __str__(self) -> Any: + return self.value + + def count(self, s: str, start: Optional[int] = None, end: Optional[int] = None) -> Any: + return self.value.count(s, start, end) + + def __getitem__(self, pos: int) -> Any: + return self.value[pos] + + def __repr__(self) -> str: + return f'TaggedScalar(value={self.value!r}, style={self.style!r}, tag={self.tag!r})' + + +def dump_comments(d: Any, name: str = "", sep: str = '.', out: Any = sys.stdout) -> None: + """ + recursively dump comments, all but the toplevel preceded by the path + in dotted form x.0.a + """ + if isinstance(d, dict) and hasattr(d, 'ca'): + if name: + out.write(f'{name} {type(d)}\n') + out.write(f'{d.ca!r}\n') + for k in d: + dump_comments(d[k], name=(name + sep + str(k)) if name else k, sep=sep, out=out) + elif isinstance(d, list) and hasattr(d, 'ca'): + if name: + out.write(f'{name} {type(d)}\n') + out.write(f'{d.ca!r}\n') + for idx, k in enumerate(d): + dump_comments( + k, name=(name + sep + str(idx)) if name else str(idx), sep=sep, out=out, + ) diff --git a/compat.py b/compat.py new file mode 100644 index 0000000..9786fae --- /dev/null +++ b/compat.py @@ -0,0 +1,235 @@ +# coding: utf-8 + +from __future__ import annotations + +# partially from package six by Benjamin Peterson + +import sys +import os +import io +import traceback +from abc import abstractmethod +import collections.abc + + +# fmt: off +from typing import Any, Dict, Optional, List, Union, BinaryIO, IO, Text, Tuple # NOQA +from typing import Optional # NOQA +try: + from typing import SupportsIndex as SupportsIndex # in order to reexport for mypy +except ImportError: + SupportsIndex = int # type: ignore +# fmt: on + +_DEFAULT_YAML_VERSION = (1, 2) + +try: + from collections import OrderedDict +except ImportError: + from ordereddict import OrderedDict # type: ignore + + # to get the right name import ... as ordereddict doesn't do that + + +class ordereddict(OrderedDict): # type: ignore + if not hasattr(OrderedDict, 'insert'): + + def insert(self, pos: int, key: Any, value: Any) -> None: + if pos >= len(self): + self[key] = value + return + od = ordereddict() + od.update(self) + for k in od: + del self[k] + for index, old_key in enumerate(od): + if pos == index: + self[key] = value + self[old_key] = od[old_key] + + +StringIO = io.StringIO +BytesIO = io.BytesIO + +StreamType = Any + +StreamTextType = StreamType +from ruamel.yaml.docinfo import Version # NOQA +VersionType = Union[str , Tuple[int, int] , List[int] , Version , None] + +builtins_module = 'builtins' + + +def with_metaclass(meta: Any, *bases: Any) -> Any: + """Create a base class with a metaclass.""" + return meta('NewBase', bases, {}) + + +DBG_TOKEN = 1 +DBG_EVENT = 2 +DBG_NODE = 4 + + +_debug: Optional[int] = None +if 'RUAMELDEBUG' in os.environ: + _debugx = os.environ.get('RUAMELDEBUG') + if _debugx is None: + _debug = 0 + else: + _debug = int(_debugx) + + +if bool(_debug): + + class ObjectCounter: + def __init__(self) -> None: + self.map: Dict[Any, Any] = {} + + def __call__(self, k: Any) -> None: + self.map[k] = self.map.get(k, 0) + 1 + + def dump(self) -> None: + for k in sorted(self.map): + sys.stdout.write(f'{k} -> {self.map[k]}') + + object_counter = ObjectCounter() + + +# used from yaml util when testing +def dbg(val: Any = None) -> Any: + debug = _debug + if debug is None: + # set to true or false + _debugx = os.environ.get('YAMLDEBUG') + if _debugx is None: + debug = 0 + else: + debug = int(_debugx) + if val is None: + return debug + return debug & val + + +class Nprint: + def __init__(self, file_name: Any = None) -> None: + self._max_print: Any = None + self._count: Any = None + self._file_name = file_name + + def __call__(self, *args: Any, **kw: Any) -> None: + if not bool(_debug): + return + out = sys.stdout if self._file_name is None else open(self._file_name, 'a') + dbgprint = print # to fool checking for print statements by dv utility + kw1 = kw.copy() + kw1['file'] = out + dbgprint(*args, **kw1) + out.flush() + if self._max_print is not None: + if self._count is None: + self._count = self._max_print + self._count -= 1 + if self._count == 0: + dbgprint('forced exit\n') + traceback.print_stack() + out.flush() + sys.exit(0) + if self._file_name: + out.close() + + def set_max_print(self, i: int) -> None: + self._max_print = i + self._count = None + + def fp(self, mode: str = 'a') -> Any: + out = sys.stdout if self._file_name is None else open(self._file_name, mode) + return out + + +nprint = Nprint() +nprintf = Nprint('/var/tmp/ruamel.yaml.log') + +# char checkers following production rules + + +def check_namespace_char(ch: Any) -> bool: + if '\x21' <= ch <= '\x7E': # ! to ~ + return True + if '\xA0' <= ch <= '\uD7FF': + return True + if ('\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF': # excl. byte order mark + return True + if '\U00010000' <= ch <= '\U0010FFFF': + return True + return False + + +def check_anchorname_char(ch: Any) -> bool: + if ch in ',[]{}': + return False + return check_namespace_char(ch) + + +def version_tnf(t1: Any, t2: Any = None) -> Any: + """ + return True if ruamel.yaml version_info < t1, None if t2 is specified and bigger else False + """ + from ruamel.yaml import version_info # NOQA + + if version_info < t1: + return True + if t2 is not None and version_info < t2: + return None + return False + + +class MutableSliceableSequence(collections.abc.MutableSequence): # type: ignore + __slots__ = () + + def __getitem__(self, index: Any) -> Any: + if not isinstance(index, slice): + return self.__getsingleitem__(index) + return type(self)([self[i] for i in range(*index.indices(len(self)))]) # type: ignore + + def __setitem__(self, index: Any, value: Any) -> None: + if not isinstance(index, slice): + return self.__setsingleitem__(index, value) + assert iter(value) + # nprint(index.start, index.stop, index.step, index.indices(len(self))) + if index.step is None: + del self[index.start : index.stop] + for elem in reversed(value): + self.insert(0 if index.start is None else index.start, elem) + else: + range_parms = index.indices(len(self)) + nr_assigned_items = (range_parms[1] - range_parms[0] - 1) // range_parms[2] + 1 + # need to test before changing, in case TypeError is caught + if nr_assigned_items < len(value): + raise TypeError( + f'too many elements in value {nr_assigned_items} < {len(value)}', + ) + elif nr_assigned_items > len(value): + raise TypeError( + f'not enough elements in value {nr_assigned_items} > {len(value)}', + ) + for idx, i in enumerate(range(*range_parms)): + self[i] = value[idx] + + def __delitem__(self, index: Any) -> None: + if not isinstance(index, slice): + return self.__delsingleitem__(index) + # nprint(index.start, index.stop, index.step, index.indices(len(self))) + for i in reversed(range(*index.indices(len(self)))): + del self[i] + + @abstractmethod + def __getsingleitem__(self, index: Any) -> Any: + raise IndexError + + @abstractmethod + def __setsingleitem__(self, index: Any, value: Any) -> None: + raise IndexError + + @abstractmethod + def __delsingleitem__(self, index: Any) -> None: + raise IndexError diff --git a/composer.py b/composer.py new file mode 100644 index 0000000..3802d94 --- /dev/null +++ b/composer.py @@ -0,0 +1,228 @@ +# coding: utf-8 + +import warnings + +from ruamel.yaml.error import MarkedYAMLError, ReusedAnchorWarning +from ruamel.yaml.compat import nprint, nprintf # NOQA + +from ruamel.yaml.events import ( + StreamStartEvent, + StreamEndEvent, + MappingStartEvent, + MappingEndEvent, + SequenceStartEvent, + SequenceEndEvent, + AliasEvent, + ScalarEvent, +) +from ruamel.yaml.nodes import MappingNode, ScalarNode, SequenceNode + +from typing import Any, Dict, Optional, List # NOQA + +__all__ = ['Composer', 'ComposerError'] + + +class ComposerError(MarkedYAMLError): + pass + + +class Composer: + def __init__(self, loader: Any = None) -> None: + self.loader = loader + if self.loader is not None and getattr(self.loader, '_composer', None) is None: + self.loader._composer = self + self.anchors: Dict[Any, Any] = {} + self.warn_double_anchors = True + + @property + def parser(self) -> Any: + if hasattr(self.loader, 'typ'): + self.loader.parser + return self.loader._parser + + @property + def resolver(self) -> Any: + # assert self.loader._resolver is not None + if hasattr(self.loader, 'typ'): + self.loader.resolver + return self.loader._resolver + + def check_node(self) -> Any: + # Drop the STREAM-START event. + if self.parser.check_event(StreamStartEvent): + self.parser.get_event() + + # If there are more documents available? + return not self.parser.check_event(StreamEndEvent) + + def get_node(self) -> Any: + # Get the root node of the next document. + if not self.parser.check_event(StreamEndEvent): + return self.compose_document() + + def get_single_node(self) -> Any: + # Drop the STREAM-START event. + self.parser.get_event() + + # Compose a document if the stream is not empty. + document: Any = None + if not self.parser.check_event(StreamEndEvent): + document = self.compose_document() + + # Ensure that the stream contains no more documents. + if not self.parser.check_event(StreamEndEvent): + event = self.parser.get_event() + raise ComposerError( + 'expected a single document in the stream', + document.start_mark, + 'but found another document', + event.start_mark, + ) + + # Drop the STREAM-END event. + self.parser.get_event() + + return document + + def compose_document(self: Any) -> Any: + # Drop the DOCUMENT-START event. + self.parser.get_event() + + # Compose the root node. + node = self.compose_node(None, None) + + # Drop the DOCUMENT-END event. + self.parser.get_event() + + self.anchors = {} + return node + + def return_alias(self, a: Any) -> Any: + return a + + def compose_node(self, parent: Any, index: Any) -> Any: + if self.parser.check_event(AliasEvent): + event = self.parser.get_event() + alias = event.anchor + if alias not in self.anchors: + raise ComposerError( + None, None, f'found undefined alias {alias!r}', event.start_mark, + ) + return self.return_alias(self.anchors[alias]) + event = self.parser.peek_event() + anchor = event.anchor + if anchor is not None: # have an anchor + if self.warn_double_anchors and anchor in self.anchors: + ws = ( + f'\nfound duplicate anchor {anchor!r}\n' + f'first occurrence {self.anchors[anchor].start_mark}\n' + f'second occurrence {event.start_mark}' + ) + warnings.warn(ws, ReusedAnchorWarning, stacklevel=2) + self.resolver.descend_resolver(parent, index) + if self.parser.check_event(ScalarEvent): + node = self.compose_scalar_node(anchor) + elif self.parser.check_event(SequenceStartEvent): + node = self.compose_sequence_node(anchor) + elif self.parser.check_event(MappingStartEvent): + node = self.compose_mapping_node(anchor) + self.resolver.ascend_resolver() + return node + + def compose_scalar_node(self, anchor: Any) -> Any: + event = self.parser.get_event() + tag = event.ctag + if tag is None or str(tag) == '!': + tag = self.resolver.resolve(ScalarNode, event.value, event.implicit) + assert not isinstance(tag, str) + # e.g tag.yaml.org,2002:str + node = ScalarNode( + tag, + event.value, + event.start_mark, + event.end_mark, + style=event.style, + comment=event.comment, + anchor=anchor, + ) + if anchor is not None: + self.anchors[anchor] = node + return node + + def compose_sequence_node(self, anchor: Any) -> Any: + start_event = self.parser.get_event() + tag = start_event.ctag + if tag is None or str(tag) == '!': + tag = self.resolver.resolve(SequenceNode, None, start_event.implicit) + assert not isinstance(tag, str) + node = SequenceNode( + tag, + [], + start_event.start_mark, + None, + flow_style=start_event.flow_style, + comment=start_event.comment, + anchor=anchor, + ) + if anchor is not None: + self.anchors[anchor] = node + index = 0 + while not self.parser.check_event(SequenceEndEvent): + node.value.append(self.compose_node(node, index)) + index += 1 + end_event = self.parser.get_event() + if node.flow_style is True and end_event.comment is not None: + if node.comment is not None: + x = node.flow_style + nprint( + f'Warning: unexpected end_event commment in sequence node {x}', + ) + node.comment = end_event.comment + node.end_mark = end_event.end_mark + self.check_end_doc_comment(end_event, node) + return node + + def compose_mapping_node(self, anchor: Any) -> Any: + start_event = self.parser.get_event() + tag = start_event.ctag + if tag is None or str(tag) == '!': + tag = self.resolver.resolve(MappingNode, None, start_event.implicit) + assert not isinstance(tag, str) + node = MappingNode( + tag, + [], + start_event.start_mark, + None, + flow_style=start_event.flow_style, + comment=start_event.comment, + anchor=anchor, + ) + if anchor is not None: + self.anchors[anchor] = node + while not self.parser.check_event(MappingEndEvent): + # key_event = self.parser.peek_event() + item_key = self.compose_node(node, None) + # if item_key in node.value: + # raise ComposerError("while composing a mapping", + # start_event.start_mark, + # "found duplicate key", key_event.start_mark) + item_value = self.compose_node(node, item_key) + # node.value[item_key] = item_value + node.value.append((item_key, item_value)) + end_event = self.parser.get_event() + if node.flow_style is True and end_event.comment is not None: + node.comment = end_event.comment + node.end_mark = end_event.end_mark + self.check_end_doc_comment(end_event, node) + return node + + def check_end_doc_comment(self, end_event: Any, node: Any) -> None: + if end_event.comment and end_event.comment[1]: + # pre comments on an end_event, no following to move to + if node.comment is None: + node.comment = [None, None] + assert not isinstance(node, ScalarEvent) + # this is a post comment on a mapping node, add as third element + # in the list + node.comment.append(end_event.comment[1]) + end_event.comment[1] = None diff --git a/configobjwalker.py b/configobjwalker.py new file mode 100644 index 0000000..28318f1 --- /dev/null +++ b/configobjwalker.py @@ -0,0 +1,15 @@ +# coding: utf-8 + +import warnings + +from ruamel.yaml.util import configobj_walker as new_configobj_walker + +from typing import Any + + +def configobj_walker(cfg: Any) -> Any: + warnings.warn( + 'configobj_walker has moved to ruamel.yaml.util, please update your code', + stacklevel=2, + ) + return new_configobj_walker(cfg) diff --git a/constructor.py b/constructor.py new file mode 100644 index 0000000..e4f6f16 --- /dev/null +++ b/constructor.py @@ -0,0 +1,1723 @@ +# coding: utf-8 + +import datetime +import base64 +import binascii +import sys +import types +import warnings +from collections.abc import Hashable, MutableSequence, MutableMapping + +# fmt: off +from ruamel.yaml.error import (MarkedYAMLError, MarkedYAMLFutureWarning, + MantissaNoDotYAML1_1Warning) +from ruamel.yaml.nodes import * # NOQA +from ruamel.yaml.nodes import (SequenceNode, MappingNode, ScalarNode) +from ruamel.yaml.compat import (builtins_module, # NOQA + nprint, nprintf, version_tnf) +from ruamel.yaml.compat import ordereddict + +from ruamel.yaml.tag import Tag +from ruamel.yaml.comments import * # NOQA +from ruamel.yaml.comments import (CommentedMap, CommentedOrderedMap, CommentedSet, + CommentedKeySeq, CommentedSeq, TaggedScalar, + CommentedKeyMap, + C_KEY_PRE, C_KEY_EOL, C_KEY_POST, + C_VALUE_PRE, C_VALUE_EOL, C_VALUE_POST, + ) +from ruamel.yaml.scalarstring import (SingleQuotedScalarString, DoubleQuotedScalarString, + LiteralScalarString, FoldedScalarString, + PlainScalarString, ScalarString) +from ruamel.yaml.scalarint import ScalarInt, BinaryInt, OctalInt, HexInt, HexCapsInt +from ruamel.yaml.scalarfloat import ScalarFloat +from ruamel.yaml.scalarbool import ScalarBoolean +from ruamel.yaml.timestamp import TimeStamp +from ruamel.yaml.util import timestamp_regexp, create_timestamp + +from typing import Any, Dict, List, Set, Iterator, Union, Optional # NOQA + + +__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', + 'ConstructorError', 'RoundTripConstructor'] +# fmt: on + + +class ConstructorError(MarkedYAMLError): + pass + + +class DuplicateKeyFutureWarning(MarkedYAMLFutureWarning): + pass + + +class DuplicateKeyError(MarkedYAMLError): + pass + + +class BaseConstructor: + + yaml_constructors = {} # type: Dict[Any, Any] + yaml_multi_constructors = {} # type: Dict[Any, Any] + + def __init__(self, preserve_quotes: Optional[bool] = None, loader: Any = None) -> None: + self.loader = loader + if self.loader is not None and getattr(self.loader, '_constructor', None) is None: + self.loader._constructor = self + self.loader = loader + self.yaml_base_dict_type = dict + self.yaml_base_list_type = list + self.constructed_objects: Dict[Any, Any] = {} + self.recursive_objects: Dict[Any, Any] = {} + self.state_generators: List[Any] = [] + self.deep_construct = False + self._preserve_quotes = preserve_quotes + self.allow_duplicate_keys = version_tnf((0, 15, 1), (0, 16)) + + @property + def composer(self) -> Any: + if hasattr(self.loader, 'typ'): + return self.loader.composer + try: + return self.loader._composer + except AttributeError: + sys.stdout.write(f'slt {type(self)}\n') + sys.stdout.write(f'slc {self.loader._composer}\n') + sys.stdout.write(f'{dir(self)}\n') + raise + + @property + def resolver(self) -> Any: + if hasattr(self.loader, 'typ'): + return self.loader.resolver + return self.loader._resolver + + @property + def scanner(self) -> Any: + # needed to get to the expanded comments + if hasattr(self.loader, 'typ'): + return self.loader.scanner + return self.loader._scanner + + def check_data(self) -> Any: + # If there are more documents available? + return self.composer.check_node() + + def get_data(self) -> Any: + # Construct and return the next document. + if self.composer.check_node(): + return self.construct_document(self.composer.get_node()) + + def get_single_data(self) -> Any: + # Ensure that the stream contains a single document and construct it. + node = self.composer.get_single_node() + if node is not None: + return self.construct_document(node) + return None + + def construct_document(self, node: Any) -> Any: + data = self.construct_object(node) + while bool(self.state_generators): + state_generators = self.state_generators + self.state_generators = [] + for generator in state_generators: + for _dummy in generator: + pass + self.constructed_objects = {} + self.recursive_objects = {} + self.deep_construct = False + return data + + def construct_object(self, node: Any, deep: bool = False) -> Any: + """deep is True when creating an object/mapping recursively, + in that case want the underlying elements available during construction + """ + if node in self.constructed_objects: + return self.constructed_objects[node] + if deep: + old_deep = self.deep_construct + self.deep_construct = True + if node in self.recursive_objects: + return self.recursive_objects[node] + # raise ConstructorError( + # None, None, 'found unconstructable recursive node', node.start_mark + # ) + self.recursive_objects[node] = None + data = self.construct_non_recursive_object(node) + + self.constructed_objects[node] = data + del self.recursive_objects[node] + if deep: + self.deep_construct = old_deep + return data + + def construct_non_recursive_object(self, node: Any, tag: Optional[str] = None) -> Any: + constructor: Any = None + tag_suffix = None + if tag is None: + tag = node.tag + if tag in self.yaml_constructors: + constructor = self.yaml_constructors[tag] + else: + for tag_prefix in self.yaml_multi_constructors: + if tag.startswith(tag_prefix): + tag_suffix = tag[len(tag_prefix) :] + constructor = self.yaml_multi_constructors[tag_prefix] + break + else: + if None in self.yaml_multi_constructors: + tag_suffix = tag + constructor = self.yaml_multi_constructors[None] + elif None in self.yaml_constructors: + constructor = self.yaml_constructors[None] + elif isinstance(node, ScalarNode): + constructor = self.__class__.construct_scalar + elif isinstance(node, SequenceNode): + constructor = self.__class__.construct_sequence + elif isinstance(node, MappingNode): + constructor = self.__class__.construct_mapping + if tag_suffix is None: + data = constructor(self, node) + else: + data = constructor(self, tag_suffix, node) + if isinstance(data, types.GeneratorType): + generator = data + data = next(generator) + if self.deep_construct: + for _dummy in generator: + pass + else: + self.state_generators.append(generator) + return data + + def construct_scalar(self, node: Any) -> Any: + if not isinstance(node, ScalarNode): + raise ConstructorError( + None, None, f'expected a scalar node, but found {node.id!s}', node.start_mark, + ) + return node.value + + def construct_sequence(self, node: Any, deep: bool = False) -> Any: + """deep is True when creating an object/mapping recursively, + in that case want the underlying elements available during construction + """ + if not isinstance(node, SequenceNode): + raise ConstructorError( + None, + None, + f'expected a sequence node, but found {node.id!s}', + node.start_mark, + ) + return [self.construct_object(child, deep=deep) for child in node.value] + + def construct_mapping(self, node: Any, deep: bool = False) -> Any: + """deep is True when creating an object/mapping recursively, + in that case want the underlying elements available during construction + """ + if not isinstance(node, MappingNode): + raise ConstructorError( + None, None, f'expected a mapping node, but found {node.id!s}', node.start_mark, + ) + total_mapping = self.yaml_base_dict_type() + if getattr(node, 'merge', None) is not None: + todo = [(node.merge, False), (node.value, False)] + else: + todo = [(node.value, True)] + for values, check in todo: + mapping: Dict[Any, Any] = self.yaml_base_dict_type() + for key_node, value_node in values: + # keys can be list -> deep + key = self.construct_object(key_node, deep=True) + # lists are not hashable, but tuples are + if not isinstance(key, Hashable): + if isinstance(key, list): + key = tuple(key) + if not isinstance(key, Hashable): + raise ConstructorError( + 'while constructing a mapping', + node.start_mark, + 'found unhashable key', + key_node.start_mark, + ) + + value = self.construct_object(value_node, deep=deep) + if check: + if self.check_mapping_key(node, key_node, mapping, key, value): + mapping[key] = value + else: + mapping[key] = value + total_mapping.update(mapping) + return total_mapping + + def check_mapping_key( + self, node: Any, key_node: Any, mapping: Any, key: Any, value: Any, + ) -> bool: + """return True if key is unique""" + if key in mapping: + if not self.allow_duplicate_keys: + mk = mapping.get(key) + args = [ + 'while constructing a mapping', + node.start_mark, + f'found duplicate key "{key}" with value "{value}" ' + f'(original value: "{mk}")', + key_node.start_mark, + """ + To suppress this check see: + http://yaml.readthedocs.io/en/latest/api.html#duplicate-keys + """, + """\ + Duplicate keys will become an error in future releases, and are errors + by default when using the new API. + """, + ] + if self.allow_duplicate_keys is None: + warnings.warn(DuplicateKeyFutureWarning(*args), stacklevel=1) + else: + raise DuplicateKeyError(*args) + return False + return True + + def check_set_key(self: Any, node: Any, key_node: Any, setting: Any, key: Any) -> None: + if key in setting: + if not self.allow_duplicate_keys: + args = [ + 'while constructing a set', + node.start_mark, + f'found duplicate key "{key}"', + key_node.start_mark, + """ + To suppress this check see: + http://yaml.readthedocs.io/en/latest/api.html#duplicate-keys + """, + """\ + Duplicate keys will become an error in future releases, and are errors + by default when using the new API. + """, + ] + if self.allow_duplicate_keys is None: + warnings.warn(DuplicateKeyFutureWarning(*args), stacklevel=1) + else: + raise DuplicateKeyError(*args) + + def construct_pairs(self, node: Any, deep: bool = False) -> Any: + if not isinstance(node, MappingNode): + raise ConstructorError( + None, None, f'expected a mapping node, but found {node.id!s}', node.start_mark, + ) + pairs = [] + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + pairs.append((key, value)) + return pairs + + # ToDo: putting stuff on the class makes it global, consider making this to work on an + # instance variable once function load is dropped. + @classmethod + def add_constructor(cls, tag: Any, constructor: Any) -> Any: + if isinstance(tag, Tag): + tag = str(tag) + if 'yaml_constructors' not in cls.__dict__: + cls.yaml_constructors = cls.yaml_constructors.copy() + ret_val = cls.yaml_constructors.get(tag, None) + cls.yaml_constructors[tag] = constructor + return ret_val + + @classmethod + def add_multi_constructor(cls, tag_prefix: Any, multi_constructor: Any) -> None: + if 'yaml_multi_constructors' not in cls.__dict__: + cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() + cls.yaml_multi_constructors[tag_prefix] = multi_constructor + + @classmethod + def add_default_constructor( + cls, tag: str, method: Any = None, tag_base: str = 'tag:yaml.org,2002:', + ) -> None: + if not tag.startswith('tag:'): + if method is None: + method = 'construct_yaml_' + tag + tag = tag_base + tag + cls.add_constructor(tag, getattr(cls, method)) + + +class SafeConstructor(BaseConstructor): + def construct_scalar(self, node: Any) -> Any: + if isinstance(node, MappingNode): + for key_node, value_node in node.value: + if key_node.tag == 'tag:yaml.org,2002:value': + return self.construct_scalar(value_node) + return BaseConstructor.construct_scalar(self, node) + + def flatten_mapping(self, node: Any) -> Any: + """ + This implements the merge key feature http://yaml.org/type/merge.html + by inserting keys from the merge dict/list of dicts if not yet + available in this node + """ + merge: List[Any] = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == 'tag:yaml.org,2002:merge': + if merge: # double << key + if self.allow_duplicate_keys: + del node.value[index] + index += 1 + continue + args = [ + 'while constructing a mapping', + node.start_mark, + f'found duplicate key "{key_node.value}"', + key_node.start_mark, + """ + To suppress this check see: + http://yaml.readthedocs.io/en/latest/api.html#duplicate-keys + """, + """\ + Duplicate keys will become an error in future releases, and are errors + by default when using the new API. + """, + ] + if self.allow_duplicate_keys is None: + warnings.warn(DuplicateKeyFutureWarning(*args), stacklevel=1) + else: + raise DuplicateKeyError(*args) + del node.value[index] + if isinstance(value_node, MappingNode): + self.flatten_mapping(value_node) + merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError( + 'while constructing a mapping', + node.start_mark, + f'expected a mapping for merging, but found {subnode.id!s}', + subnode.start_mark, + ) + self.flatten_mapping(subnode) + submerge.append(subnode.value) + submerge.reverse() + for value in submerge: + merge.extend(value) + else: + raise ConstructorError( + 'while constructing a mapping', + node.start_mark, + 'expected a mapping or list of mappings for merging, ' + f'but found {value_node.id!s}', + value_node.start_mark, + ) + elif key_node.tag == 'tag:yaml.org,2002:value': + key_node.tag = 'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + if bool(merge): + node.merge = merge # separate merge keys to be able to update without duplicate + node.value = merge + node.value + + def construct_mapping(self, node: Any, deep: bool = False) -> Any: + """deep is True when creating an object/mapping recursively, + in that case want the underlying elements available during construction + """ + if isinstance(node, MappingNode): + self.flatten_mapping(node) + return BaseConstructor.construct_mapping(self, node, deep=deep) + + def construct_yaml_null(self, node: Any) -> Any: + self.construct_scalar(node) + return None + + # YAML 1.2 spec doesn't mention yes/no etc any more, 1.1 does + bool_values = { + 'yes': True, + 'no': False, + 'y': True, + 'n': False, + 'true': True, + 'false': False, + 'on': True, + 'off': False, + } + + def construct_yaml_bool(self, node: Any) -> bool: + value = self.construct_scalar(node) + return self.bool_values[value.lower()] + + def construct_yaml_int(self, node: Any) -> int: + value_s = self.construct_scalar(node) + value_s = value_s.replace('_', "") + sign = +1 + if value_s[0] == '-': + sign = -1 + if value_s[0] in '+-': + value_s = value_s[1:] + if value_s == '0': + return 0 + elif value_s.startswith('0b'): + return sign * int(value_s[2:], 2) + elif value_s.startswith('0x'): + return sign * int(value_s[2:], 16) + elif value_s.startswith('0o'): + return sign * int(value_s[2:], 8) + elif self.resolver.processing_version == (1, 1) and value_s[0] == '0': + return sign * int(value_s, 8) + elif self.resolver.processing_version == (1, 1) and ':' in value_s: + digits = [int(part) for part in value_s.split(':')] + digits.reverse() + base = 1 + value = 0 + for digit in digits: + value += digit * base + base *= 60 + return sign * value + else: + return sign * int(value_s) + + inf_value = 1e300 + while inf_value != inf_value * inf_value: + inf_value *= inf_value + nan_value = -inf_value / inf_value # Trying to make a quiet NaN (like C99). + + def construct_yaml_float(self, node: Any) -> float: + value_so = self.construct_scalar(node) + value_s = value_so.replace('_', "").lower() + sign = +1 + if value_s[0] == '-': + sign = -1 + if value_s[0] in '+-': + value_s = value_s[1:] + if value_s == '.inf': + return sign * self.inf_value + elif value_s == '.nan': + return self.nan_value + elif self.resolver.processing_version != (1, 2) and ':' in value_s: + digits = [float(part) for part in value_s.split(':')] + digits.reverse() + base = 1 + value = 0.0 + for digit in digits: + value += digit * base + base *= 60 + return sign * value + else: + if self.resolver.processing_version != (1, 2) and 'e' in value_s: + # value_s is lower case independent of input + mantissa, exponent = value_s.split('e') + if '.' not in mantissa: + warnings.warn(MantissaNoDotYAML1_1Warning(node, value_so), stacklevel=1) + return sign * float(value_s) + + def construct_yaml_binary(self, node: Any) -> Any: + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError( + None, + None, + f'failed to convert base64 data into ascii: {exc!s}', + node.start_mark, + ) + try: + return base64.decodebytes(value) + except binascii.Error as exc: + raise ConstructorError( + None, None, f'failed to decode base64 data: {exc!s}', node.start_mark, + ) + + timestamp_regexp = timestamp_regexp # moved to util 0.17.17 + + def construct_yaml_timestamp(self, node: Any, values: Any = None) -> Any: + if values is None: + try: + match = self.timestamp_regexp.match(node.value) + except TypeError: + match = None + if match is None: + raise ConstructorError( + None, + None, + f'failed to construct timestamp from "{node.value}"', + node.start_mark, + ) + values = match.groupdict() + return create_timestamp(**values) + + def construct_yaml_omap(self, node: Any) -> Any: + # Note: we do now check for duplicate keys + omap = ordereddict() + yield omap + if not isinstance(node, SequenceNode): + raise ConstructorError( + 'while constructing an ordered map', + node.start_mark, + f'expected a sequence, but found {node.id!s}', + node.start_mark, + ) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError( + 'while constructing an ordered map', + node.start_mark, + f'expected a mapping of length 1, but found {subnode.id!s}', + subnode.start_mark, + ) + if len(subnode.value) != 1: + raise ConstructorError( + 'while constructing an ordered map', + node.start_mark, + f'expected a single mapping item, but found {len(subnode.value):d} items', + subnode.start_mark, + ) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + assert key not in omap + value = self.construct_object(value_node) + omap[key] = value + + def construct_yaml_pairs(self, node: Any) -> Any: + # Note: the same code as `construct_yaml_omap`. + pairs: List[Any] = [] + yield pairs + if not isinstance(node, SequenceNode): + raise ConstructorError( + 'while constructing pairs', + node.start_mark, + f'expected a sequence, but found {node.id!s}', + node.start_mark, + ) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError( + 'while constructing pairs', + node.start_mark, + f'expected a mapping of length 1, but found {subnode.id!s}', + subnode.start_mark, + ) + if len(subnode.value) != 1: + raise ConstructorError( + 'while constructing pairs', + node.start_mark, + f'expected a single mapping item, but found {len(subnode.value):d} items', + subnode.start_mark, + ) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + pairs.append((key, value)) + + def construct_yaml_set(self, node: Any) -> Any: + data: Set[Any] = set() + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_str(self, node: Any) -> Any: + value = self.construct_scalar(node) + return value + + def construct_yaml_seq(self, node: Any) -> Any: + data: List[Any] = self.yaml_base_list_type() + yield data + data.extend(self.construct_sequence(node)) + + def construct_yaml_map(self, node: Any) -> Any: + data: Dict[Any, Any] = self.yaml_base_dict_type() + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_object(self, node: Any, cls: Any) -> Any: + data = cls.__new__(cls) + yield data + if hasattr(data, '__setstate__'): + state = self.construct_mapping(node, deep=True) + data.__setstate__(state) + else: + state = self.construct_mapping(node) + data.__dict__.update(state) + + def construct_undefined(self, node: Any) -> None: + raise ConstructorError( + None, + None, + f'could not determine a constructor for the tag {node.tag!r}', + node.start_mark, + ) + + +for tag in 'null bool int float binary timestamp omap pairs set str seq map'.split(): + SafeConstructor.add_default_constructor(tag) + +SafeConstructor.add_constructor(None, SafeConstructor.construct_undefined) + + +class Constructor(SafeConstructor): + def construct_python_str(self, node: Any) -> Any: + return self.construct_scalar(node) + + def construct_python_unicode(self, node: Any) -> Any: + return self.construct_scalar(node) + + def construct_python_bytes(self, node: Any) -> Any: + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError( + None, + None, + f'failed to convert base64 data into ascii: {exc!s}', + node.start_mark, + ) + try: + return base64.decodebytes(value) + except binascii.Error as exc: + raise ConstructorError( + None, None, f'failed to decode base64 data: {exc!s}', node.start_mark, + ) + + def construct_python_long(self, node: Any) -> int: + val = self.construct_yaml_int(node) + return val + + def construct_python_complex(self, node: Any) -> Any: + return complex(self.construct_scalar(node)) + + def construct_python_tuple(self, node: Any) -> Any: + return tuple(self.construct_sequence(node)) + + def find_python_module(self, name: Any, mark: Any) -> Any: + if not name: + raise ConstructorError( + 'while constructing a Python module', + mark, + 'expected non-empty name appended to the tag', + mark, + ) + try: + __import__(name) + except ImportError as exc: + raise ConstructorError( + 'while constructing a Python module', + mark, + f'cannot find module {name!r} ({exc!s})', + mark, + ) + return sys.modules[name] + + def find_python_name(self, name: Any, mark: Any) -> Any: + if not name: + raise ConstructorError( + 'while constructing a Python object', + mark, + 'expected non-empty name appended to the tag', + mark, + ) + if '.' in name: + lname = name.split('.') + lmodule_name = lname + lobject_name: List[Any] = [] + while len(lmodule_name) > 1: + lobject_name.insert(0, lmodule_name.pop()) + module_name = '.'.join(lmodule_name) + try: + __import__(module_name) + # object_name = '.'.join(object_name) + break + except ImportError: + continue + else: + module_name = builtins_module + lobject_name = [name] + try: + __import__(module_name) + except ImportError as exc: + raise ConstructorError( + 'while constructing a Python object', + mark, + f'cannot find module {module_name!r} ({exc!s})', + mark, + ) + module = sys.modules[module_name] + object_name = '.'.join(lobject_name) + obj = module + while lobject_name: + if not hasattr(obj, lobject_name[0]): + + raise ConstructorError( + 'while constructing a Python object', + mark, + f'cannot find {object_name!r} in the module {module.__name__!r}', + mark, + ) + obj = getattr(obj, lobject_name.pop(0)) + return obj + + def construct_python_name(self, suffix: Any, node: Any) -> Any: + value = self.construct_scalar(node) + if value: + raise ConstructorError( + 'while constructing a Python name', + node.start_mark, + f'expected the empty value, but found {value!r}', + node.start_mark, + ) + return self.find_python_name(suffix, node.start_mark) + + def construct_python_module(self, suffix: Any, node: Any) -> Any: + value = self.construct_scalar(node) + if value: + raise ConstructorError( + 'while constructing a Python module', + node.start_mark, + f'expected the empty value, but found {value!r}', + node.start_mark, + ) + return self.find_python_module(suffix, node.start_mark) + + def make_python_instance( + self, suffix: Any, node: Any, args: Any = None, kwds: Any = None, newobj: bool = False, + ) -> Any: + if not args: + args = [] + if not kwds: + kwds = {} + cls = self.find_python_name(suffix, node.start_mark) + if newobj and isinstance(cls, type): + return cls.__new__(cls, *args, **kwds) + else: + return cls(*args, **kwds) + + def set_python_instance_state(self, instance: Any, state: Any) -> None: + if hasattr(instance, '__setstate__'): + instance.__setstate__(state) + else: + slotstate: Dict[Any, Any] = {} + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state + if hasattr(instance, '__dict__'): + instance.__dict__.update(state) + elif state: + slotstate.update(state) + for key, value in slotstate.items(): + setattr(instance, key, value) + + def construct_python_object(self, suffix: Any, node: Any) -> Any: + # Format: + # !!python/object:module.name { ... state ... } + instance = self.make_python_instance(suffix, node, newobj=True) + self.recursive_objects[node] = instance + yield instance + deep = hasattr(instance, '__setstate__') + state = self.construct_mapping(node, deep=deep) + self.set_python_instance_state(instance, state) + + def construct_python_object_apply( + self, suffix: Any, node: Any, newobj: bool = False, + ) -> Any: + # Format: + # !!python/object/apply # (or !!python/object/new) + # args: [ ... arguments ... ] + # kwds: { ... keywords ... } + # state: ... state ... + # listitems: [ ... listitems ... ] + # dictitems: { ... dictitems ... } + # or short format: + # !!python/object/apply [ ... arguments ... ] + # The difference between !!python/object/apply and !!python/object/new + # is how an object is created, check make_python_instance for details. + if isinstance(node, SequenceNode): + args = self.construct_sequence(node, deep=True) + kwds: Dict[Any, Any] = {} + state: Dict[Any, Any] = {} + listitems: List[Any] = [] + dictitems: Dict[Any, Any] = {} + else: + value = self.construct_mapping(node, deep=True) + args = value.get('args', []) + kwds = value.get('kwds', {}) + state = value.get('state', {}) + listitems = value.get('listitems', []) + dictitems = value.get('dictitems', {}) + instance = self.make_python_instance(suffix, node, args, kwds, newobj) + if bool(state): + self.set_python_instance_state(instance, state) + if bool(listitems): + instance.extend(listitems) + if bool(dictitems): + for key in dictitems: + instance[key] = dictitems[key] + return instance + + def construct_python_object_new(self, suffix: Any, node: Any) -> Any: + return self.construct_python_object_apply(suffix, node, newobj=True) + + @classmethod + def add_default_constructor( + cls, tag: str, method: Any = None, tag_base: str = 'tag:yaml.org,2002:python/', + ) -> None: + if not tag.startswith('tag:'): + if method is None: + method = 'construct_yaml_' + tag + tag = tag_base + tag + cls.add_constructor(tag, getattr(cls, method)) + + +Constructor.add_constructor('tag:yaml.org,2002:python/none', Constructor.construct_yaml_null) + +Constructor.add_constructor('tag:yaml.org,2002:python/bool', Constructor.construct_yaml_bool) + +Constructor.add_constructor('tag:yaml.org,2002:python/str', Constructor.construct_python_str) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/unicode', Constructor.construct_python_unicode, +) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/bytes', Constructor.construct_python_bytes, +) + +Constructor.add_constructor('tag:yaml.org,2002:python/int', Constructor.construct_yaml_int) + +Constructor.add_constructor('tag:yaml.org,2002:python/long', Constructor.construct_python_long) + +Constructor.add_constructor('tag:yaml.org,2002:python/float', Constructor.construct_yaml_float) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/complex', Constructor.construct_python_complex, +) + +Constructor.add_constructor('tag:yaml.org,2002:python/list', Constructor.construct_yaml_seq) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/tuple', Constructor.construct_python_tuple, +) +# for tag in 'bool str unicode bytes int long float complex tuple'.split(): +# Constructor.add_default_constructor(tag) + +Constructor.add_constructor('tag:yaml.org,2002:python/dict', Constructor.construct_yaml_map) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/name:', Constructor.construct_python_name, +) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/module:', Constructor.construct_python_module, +) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object:', Constructor.construct_python_object, +) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object/apply:', Constructor.construct_python_object_apply, +) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object/new:', Constructor.construct_python_object_new, +) + + +class RoundTripConstructor(SafeConstructor): + """need to store the comments on the node itself, + as well as on the items + """ + + def comment(self, idx: Any) -> Any: + assert self.loader.comment_handling is not None + x = self.scanner.comments[idx] + x.set_assigned() + return x + + def comments(self, list_of_comments: Any, idx: Optional[Any] = None) -> Any: + # hand in the comment and optional pre, eol, post segment + if list_of_comments is None: + return [] + if idx is not None: + if list_of_comments[idx] is None: + return [] + list_of_comments = list_of_comments[idx] + for x in list_of_comments: + yield self.comment(x) + + def construct_scalar(self, node: Any) -> Any: + if not isinstance(node, ScalarNode): + raise ConstructorError( + None, None, f'expected a scalar node, but found {node.id!s}', node.start_mark, + ) + + if node.style == '|' and isinstance(node.value, str): + lss = LiteralScalarString(node.value, anchor=node.anchor) + if self.loader and self.loader.comment_handling is None: + if node.comment and node.comment[1]: + lss.comment = node.comment[1][0] # type: ignore + else: + # NEWCMNT + if node.comment is not None and node.comment[1]: + # nprintf('>>>>nc1', node.comment) + # EOL comment after | + lss.comment = self.comment(node.comment[1][0]) # type: ignore + return lss + if node.style == '>' and isinstance(node.value, str): + fold_positions: List[int] = [] + idx = -1 + while True: + idx = node.value.find('\a', idx + 1) + if idx < 0: + break + fold_positions.append(idx - len(fold_positions)) + fss = FoldedScalarString(node.value.replace('\a', ''), anchor=node.anchor) + if self.loader and self.loader.comment_handling is None: + if node.comment and node.comment[1]: + fss.comment = node.comment[1][0] # type: ignore + else: + # NEWCMNT + if node.comment is not None and node.comment[1]: + # nprintf('>>>>nc2', node.comment) + # EOL comment after > + fss.comment = self.comment(node.comment[1][0]) # type: ignore + if fold_positions: + fss.fold_pos = fold_positions # type: ignore + return fss + elif bool(self._preserve_quotes) and isinstance(node.value, str): + if node.style == "'": + return SingleQuotedScalarString(node.value, anchor=node.anchor) + if node.style == '"': + return DoubleQuotedScalarString(node.value, anchor=node.anchor) + # if node.ctag: + # data2 = TaggedScalar() + # data2.value = node.value + # data2.style = node.style + # data2.yaml_set_ctag(node.ctag) + # if node.anchor: + # from ruamel.yaml.serializer import templated_id + + # if not templated_id(node.anchor): + # data2.yaml_set_anchor(node.anchor, always_dump=True) + # return data2 + if node.anchor: + return PlainScalarString(node.value, anchor=node.anchor) + return node.value + + def construct_yaml_int(self, node: Any) -> Any: + width: Any = None + value_su = self.construct_scalar(node) + try: + sx = value_su.rstrip('_') + underscore: Any = [len(sx) - sx.rindex('_') - 1, False, False] + except ValueError: + underscore = None + except IndexError: + underscore = None + value_s = value_su.replace('_', "") + sign = +1 + if value_s[0] == '-': + sign = -1 + if value_s[0] in '+-': + value_s = value_s[1:] + if value_s == '0': + return 0 + elif value_s.startswith('0b'): + if self.resolver.processing_version > (1, 1) and value_s[2] == '0': + width = len(value_s[2:]) + if underscore is not None: + underscore[1] = value_su[2] == '_' + underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_' + return BinaryInt( + sign * int(value_s[2:], 2), + width=width, + underscore=underscore, + anchor=node.anchor, + ) + elif value_s.startswith('0x'): + # default to lower-case if no a-fA-F in string + if self.resolver.processing_version > (1, 1) and value_s[2] == '0': + width = len(value_s[2:]) + hex_fun: Any = HexInt + for ch in value_s[2:]: + if ch in 'ABCDEF': # first non-digit is capital + hex_fun = HexCapsInt + break + if ch in 'abcdef': + break + if underscore is not None: + underscore[1] = value_su[2] == '_' + underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_' + return hex_fun( + sign * int(value_s[2:], 16), + width=width, + underscore=underscore, + anchor=node.anchor, + ) + elif value_s.startswith('0o'): + if self.resolver.processing_version > (1, 1) and value_s[2] == '0': + width = len(value_s[2:]) + if underscore is not None: + underscore[1] = value_su[2] == '_' + underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_' + return OctalInt( + sign * int(value_s[2:], 8), + width=width, + underscore=underscore, + anchor=node.anchor, + ) + elif self.resolver.processing_version != (1, 2) and value_s[0] == '0': + return OctalInt( + sign * int(value_s, 8), width=width, underscore=underscore, anchor=node.anchor, + ) + elif self.resolver.processing_version != (1, 2) and ':' in value_s: + digits = [int(part) for part in value_s.split(':')] + digits.reverse() + base = 1 + value = 0 + for digit in digits: + value += digit * base + base *= 60 + return sign * value + elif self.resolver.processing_version > (1, 1) and value_s[0] == '0': + # not an octal, an integer with leading zero(s) + if underscore is not None: + # cannot have a leading underscore + underscore[2] = len(value_su) > 1 and value_su[-1] == '_' + return ScalarInt(sign * int(value_s), width=len(value_s), underscore=underscore) + elif underscore: + # cannot have a leading underscore + underscore[2] = len(value_su) > 1 and value_su[-1] == '_' + return ScalarInt( + sign * int(value_s), width=None, underscore=underscore, anchor=node.anchor, + ) + elif node.anchor: + return ScalarInt(sign * int(value_s), width=None, anchor=node.anchor) + else: + return sign * int(value_s) + + def construct_yaml_float(self, node: Any) -> Any: + def leading_zeros(v: Any) -> int: + lead0 = 0 + idx = 0 + while idx < len(v) and v[idx] in '0.': + if v[idx] == '0': + lead0 += 1 + idx += 1 + return lead0 + + # underscore = None + m_sign: Any = False + value_so = self.construct_scalar(node) + value_s = value_so.replace('_', "").lower() + sign = +1 + if value_s[0] == '-': + sign = -1 + if value_s[0] in '+-': + m_sign = value_s[0] + value_s = value_s[1:] + if value_s == '.inf': + return sign * self.inf_value + if value_s == '.nan': + return self.nan_value + if self.resolver.processing_version != (1, 2) and ':' in value_s: + digits = [float(part) for part in value_s.split(':')] + digits.reverse() + base = 1 + value = 0.0 + for digit in digits: + value += digit * base + base *= 60 + return sign * value + if 'e' in value_s: + try: + mantissa, exponent = value_so.split('e') + exp = 'e' + except ValueError: + mantissa, exponent = value_so.split('E') + exp = 'E' + if self.resolver.processing_version != (1, 2): + # value_s is lower case independent of input + if '.' not in mantissa: + warnings.warn(MantissaNoDotYAML1_1Warning(node, value_so), stacklevel=1) + lead0 = leading_zeros(mantissa) + width = len(mantissa) + prec = mantissa.find('.') + if m_sign: + width -= 1 + e_width = len(exponent) + e_sign = exponent[0] in '+-' + # nprint('sf', width, prec, m_sign, exp, e_width, e_sign) + return ScalarFloat( + sign * float(value_s), + width=width, + prec=prec, + m_sign=m_sign, + m_lead0=lead0, + exp=exp, + e_width=e_width, + e_sign=e_sign, + anchor=node.anchor, + ) + width = len(value_so) + # you can't use index, !!float 42 would be a float without a dot + prec = value_so.find('.') + lead0 = leading_zeros(value_so) + return ScalarFloat( + sign * float(value_s), + width=width, + prec=prec, + m_sign=m_sign, + m_lead0=lead0, + anchor=node.anchor, + ) + + def construct_yaml_str(self, node: Any) -> Any: + if node.ctag.handle: + value = self.construct_unknown(node) + else: + value = self.construct_scalar(node) + if isinstance(value, ScalarString): + return value + return value + + def construct_rt_sequence(self, node: Any, seqtyp: Any, deep: bool = False) -> Any: + if not isinstance(node, SequenceNode): + raise ConstructorError( + None, + None, + f'expected a sequence node, but found {node.id!s}', + node.start_mark, + ) + ret_val = [] + if self.loader and self.loader.comment_handling is None: + if node.comment: + seqtyp._yaml_add_comment(node.comment[:2]) + if len(node.comment) > 2: + # this happens e.g. if you have a sequence element that is a flow-style + # mapping and that has no EOL comment but a following commentline or + # empty line + seqtyp.yaml_end_comment_extend(node.comment[2], clear=True) + else: + # NEWCMNT + if node.comment: + nprintf('nc3', node.comment) + if node.anchor: + from ruamel.yaml.serializer import templated_id + + if not templated_id(node.anchor): + seqtyp.yaml_set_anchor(node.anchor) + for idx, child in enumerate(node.value): + if child.comment: + seqtyp._yaml_add_comment(child.comment, key=idx) + child.comment = None # if moved to sequence remove from child + ret_val.append(self.construct_object(child, deep=deep)) + seqtyp._yaml_set_idx_line_col( + idx, [child.start_mark.line, child.start_mark.column], + ) + return ret_val + + def flatten_mapping(self, node: Any) -> Any: + """ + This implements the merge key feature http://yaml.org/type/merge.html + by inserting keys from the merge dict/list of dicts if not yet + available in this node + """ + + def constructed(value_node: Any) -> Any: + # If the contents of a merge are defined within the + # merge marker, then they won't have been constructed + # yet. But if they were already constructed, we need to use + # the existing object. + if value_node in self.constructed_objects: + value = self.constructed_objects[value_node] + else: + value = self.construct_object(value_node, deep=True) + return value + + # merge = [] + merge_map_list: List[Any] = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == 'tag:yaml.org,2002:merge': + if merge_map_list: # double << key + if self.allow_duplicate_keys: + del node.value[index] + index += 1 + continue + args = [ + 'while constructing a mapping', + node.start_mark, + f'found duplicate key "{key_node.value}"', + key_node.start_mark, + """ + To suppress this check see: + http://yaml.readthedocs.io/en/latest/api.html#duplicate-keys + """, + """\ + Duplicate keys will become an error in future releases, and are errors + by default when using the new API. + """, + ] + if self.allow_duplicate_keys is None: + warnings.warn(DuplicateKeyFutureWarning(*args), stacklevel=1) + else: + raise DuplicateKeyError(*args) + del node.value[index] + if isinstance(value_node, MappingNode): + merge_map_list.append((index, constructed(value_node))) + # self.flatten_mapping(value_node) + # merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + # submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError( + 'while constructing a mapping', + node.start_mark, + f'expected a mapping for merging, but found {subnode.id!s}', + subnode.start_mark, + ) + merge_map_list.append((index, constructed(subnode))) + # self.flatten_mapping(subnode) + # submerge.append(subnode.value) + # submerge.reverse() + # for value in submerge: + # merge.extend(value) + else: + raise ConstructorError( + 'while constructing a mapping', + node.start_mark, + 'expected a mapping or list of mappings for merging, ' + f'but found {value_node.id!s}', + value_node.start_mark, + ) + elif key_node.tag == 'tag:yaml.org,2002:value': + key_node.tag = 'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + return merge_map_list + # if merge: + # node.value = merge + node.value + + def _sentinel(self) -> None: + pass + + def construct_mapping(self, node: Any, maptyp: Any, deep: bool = False) -> Any: # type: ignore # NOQA + if not isinstance(node, MappingNode): + raise ConstructorError( + None, None, f'expected a mapping node, but found {node.id!s}', node.start_mark, + ) + merge_map = self.flatten_mapping(node) + # mapping = {} + if self.loader and self.loader.comment_handling is None: + if node.comment: + maptyp._yaml_add_comment(node.comment[:2]) + if len(node.comment) > 2: + maptyp.yaml_end_comment_extend(node.comment[2], clear=True) + else: + # NEWCMNT + if node.comment: + # nprintf('nc4', node.comment, node.start_mark) + if maptyp.ca.pre is None: + maptyp.ca.pre = [] + for cmnt in self.comments(node.comment, 0): + maptyp.ca.pre.append(cmnt) + if node.anchor: + from ruamel.yaml.serializer import templated_id + + if not templated_id(node.anchor): + maptyp.yaml_set_anchor(node.anchor) + last_key, last_value = None, self._sentinel + for key_node, value_node in node.value: + # keys can be list -> deep + key = self.construct_object(key_node, deep=True) + # lists are not hashable, but tuples are + if not isinstance(key, Hashable): + if isinstance(key, MutableSequence): + key_s = CommentedKeySeq(key) + if key_node.flow_style is True: + key_s.fa.set_flow_style() + elif key_node.flow_style is False: + key_s.fa.set_block_style() + key_s._yaml_set_line_col(key.lc.line, key.lc.col) # type: ignore + key = key_s + elif isinstance(key, MutableMapping): + key_m = CommentedKeyMap(key) + if key_node.flow_style is True: + key_m.fa.set_flow_style() + elif key_node.flow_style is False: + key_m.fa.set_block_style() + key_m._yaml_set_line_col(key.lc.line, key.lc.col) # type: ignore + key = key_m + if not isinstance(key, Hashable): + raise ConstructorError( + 'while constructing a mapping', + node.start_mark, + 'found unhashable key', + key_node.start_mark, + ) + value = self.construct_object(value_node, deep=deep) + if self.check_mapping_key(node, key_node, maptyp, key, value): + if self.loader and self.loader.comment_handling is None: + if key_node.comment and len(key_node.comment) > 4 and key_node.comment[4]: + if last_value is None: + key_node.comment[0] = key_node.comment.pop(4) + maptyp._yaml_add_comment(key_node.comment, value=last_key) + else: + key_node.comment[2] = key_node.comment.pop(4) + maptyp._yaml_add_comment(key_node.comment, key=key) + key_node.comment = None + if key_node.comment: + maptyp._yaml_add_comment(key_node.comment, key=key) + if value_node.comment: + maptyp._yaml_add_comment(value_node.comment, value=key) + else: + # NEWCMNT + if key_node.comment: + nprintf('nc5a', key, key_node.comment) + if key_node.comment[0]: + maptyp.ca.set(key, C_KEY_PRE, key_node.comment[0]) + if key_node.comment[1]: + maptyp.ca.set(key, C_KEY_EOL, key_node.comment[1]) + if key_node.comment[2]: + maptyp.ca.set(key, C_KEY_POST, key_node.comment[2]) + if value_node.comment: + nprintf('nc5b', key, value_node.comment) + if value_node.comment[0]: + maptyp.ca.set(key, C_VALUE_PRE, value_node.comment[0]) + if value_node.comment[1]: + maptyp.ca.set(key, C_VALUE_EOL, value_node.comment[1]) + if value_node.comment[2]: + maptyp.ca.set(key, C_VALUE_POST, value_node.comment[2]) + maptyp._yaml_set_kv_line_col( + key, + [ + key_node.start_mark.line, + key_node.start_mark.column, + value_node.start_mark.line, + value_node.start_mark.column, + ], + ) + maptyp[key] = value + last_key, last_value = key, value # could use indexing + # do this last, or <<: before a key will prevent insertion in instances + # of collections.OrderedDict (as they have no __contains__ + if merge_map: + maptyp.add_yaml_merge(merge_map) + + def construct_setting(self, node: Any, typ: Any, deep: bool = False) -> Any: + if not isinstance(node, MappingNode): + raise ConstructorError( + None, None, f'expected a mapping node, but found {node.id!s}', node.start_mark, + ) + if self.loader and self.loader.comment_handling is None: + if node.comment: + typ._yaml_add_comment(node.comment[:2]) + if len(node.comment) > 2: + typ.yaml_end_comment_extend(node.comment[2], clear=True) + else: + # NEWCMNT + if node.comment: + nprintf('nc6', node.comment) + if node.anchor: + from ruamel.yaml.serializer import templated_id + + if not templated_id(node.anchor): + typ.yaml_set_anchor(node.anchor) + for key_node, value_node in node.value: + # keys can be list -> deep + key = self.construct_object(key_node, deep=True) + # lists are not hashable, but tuples are + if not isinstance(key, Hashable): + if isinstance(key, list): + key = tuple(key) + if not isinstance(key, Hashable): + raise ConstructorError( + 'while constructing a mapping', + node.start_mark, + 'found unhashable key', + key_node.start_mark, + ) + # construct but should be null + value = self.construct_object(value_node, deep=deep) # NOQA + self.check_set_key(node, key_node, typ, key) + if self.loader and self.loader.comment_handling is None: + if key_node.comment: + typ._yaml_add_comment(key_node.comment, key=key) + if value_node.comment: + typ._yaml_add_comment(value_node.comment, value=key) + else: + # NEWCMNT + if key_node.comment: + nprintf('nc7a', key_node.comment) + if value_node.comment: + nprintf('nc7b', value_node.comment) + typ.add(key) + + def construct_yaml_seq(self, node: Any) -> Iterator[CommentedSeq]: + data = CommentedSeq() + data._yaml_set_line_col(node.start_mark.line, node.start_mark.column) + # if node.comment: + # data._yaml_add_comment(node.comment) + yield data + data.extend(self.construct_rt_sequence(node, data)) + self.set_collection_style(data, node) + + def construct_yaml_map(self, node: Any) -> Iterator[CommentedMap]: + data = CommentedMap() + data._yaml_set_line_col(node.start_mark.line, node.start_mark.column) + yield data + self.construct_mapping(node, data, deep=True) + self.set_collection_style(data, node) + + def set_collection_style(self, data: Any, node: Any) -> None: + if len(data) == 0: + return + if node.flow_style is True: + data.fa.set_flow_style() + elif node.flow_style is False: + data.fa.set_block_style() + + def construct_yaml_object(self, node: Any, cls: Any) -> Any: + from dataclasses import is_dataclass, InitVar, MISSING + + data = cls.__new__(cls) + yield data + if hasattr(data, '__setstate__'): + state = SafeConstructor.construct_mapping(self, node, deep=True) + data.__setstate__(state) + elif is_dataclass(data): + mapping = SafeConstructor.construct_mapping(self, node) + init_var_defaults = {} + for field in data.__dataclass_fields__.values(): + # nprintf('field', field, field.default is MISSING, + # isinstance(field.type, InitVar)) + # in 3.7, InitVar is a singleton + if ( + isinstance(field.type, InitVar) or field.type is InitVar + ) and field.default is not MISSING: + init_var_defaults[field.name] = field.default + for attr, value in mapping.items(): + if attr not in init_var_defaults: + setattr(data, attr, value) + post_init = getattr(data, '__post_init__', None) + if post_init is not None: + kw = {} + for name, default in init_var_defaults.items(): + kw[name] = mapping.get(name, default) + post_init(**kw) + else: + state = SafeConstructor.construct_mapping(self, node) + if hasattr(data, '__attrs_attrs__'): # issue 394 + data.__init__(**state) + else: + data.__dict__.update(state) + if node.anchor: + from ruamel.yaml.serializer import templated_id + from ruamel.yaml.anchor import Anchor + + if not templated_id(node.anchor): + if not hasattr(data, Anchor.attrib): + a = Anchor() + setattr(data, Anchor.attrib, a) + else: + a = getattr(data, Anchor.attrib) + a.value = node.anchor + + def construct_yaml_omap(self, node: Any) -> Iterator[CommentedOrderedMap]: + # Note: we do now check for duplicate keys + omap = CommentedOrderedMap() + omap._yaml_set_line_col(node.start_mark.line, node.start_mark.column) + if node.flow_style is True: + omap.fa.set_flow_style() + elif node.flow_style is False: + omap.fa.set_block_style() + yield omap + if self.loader and self.loader.comment_handling is None: + if node.comment: + omap._yaml_add_comment(node.comment[:2]) + if len(node.comment) > 2: + omap.yaml_end_comment_extend(node.comment[2], clear=True) + else: + # NEWCMNT + if node.comment: + nprintf('nc8', node.comment) + if not isinstance(node, SequenceNode): + raise ConstructorError( + 'while constructing an ordered map', + node.start_mark, + f'expected a sequence, but found {node.id!s}', + node.start_mark, + ) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError( + 'while constructing an ordered map', + node.start_mark, + f'expected a mapping of length 1, but found {subnode.id!s}', + subnode.start_mark, + ) + if len(subnode.value) != 1: + raise ConstructorError( + 'while constructing an ordered map', + node.start_mark, + f'expected a single mapping item, but found {len(subnode.value):d} items', + subnode.start_mark, + ) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + assert key not in omap + value = self.construct_object(value_node) + if self.loader and self.loader.comment_handling is None: + if key_node.comment: + omap._yaml_add_comment(key_node.comment, key=key) + if subnode.comment: + omap._yaml_add_comment(subnode.comment, key=key) + if value_node.comment: + omap._yaml_add_comment(value_node.comment, value=key) + else: + # NEWCMNT + if key_node.comment: + nprintf('nc9a', key_node.comment) + if subnode.comment: + nprintf('nc9b', subnode.comment) + if value_node.comment: + nprintf('nc9c', value_node.comment) + omap[key] = value + + def construct_yaml_set(self, node: Any) -> Iterator[CommentedSet]: + data = CommentedSet() + data._yaml_set_line_col(node.start_mark.line, node.start_mark.column) + if node.flow_style is True: + data.fa.set_flow_style() + elif node.flow_style is False: + data.fa.set_block_style() + yield data + self.construct_setting(node, data) + + def construct_unknown( + self, node: Any, + ) -> Iterator[Union[CommentedMap, TaggedScalar, CommentedSeq]]: + try: + if isinstance(node, MappingNode): + data = CommentedMap() + data._yaml_set_line_col(node.start_mark.line, node.start_mark.column) + if node.flow_style is True: + data.fa.set_flow_style() + elif node.flow_style is False: + data.fa.set_block_style() + data.yaml_set_ctag(node.ctag) + yield data + if node.anchor: + from ruamel.yaml.serializer import templated_id + + if not templated_id(node.anchor): + data.yaml_set_anchor(node.anchor) + self.construct_mapping(node, data) + return + elif isinstance(node, ScalarNode): + data2 = TaggedScalar() + data2.value = self.construct_scalar(node) + data2.style = node.style + data2.yaml_set_ctag(node.ctag) + yield data2 + if node.anchor: + from ruamel.yaml.serializer import templated_id + + if not templated_id(node.anchor): + data2.yaml_set_anchor(node.anchor, always_dump=True) + return + elif isinstance(node, SequenceNode): + data3 = CommentedSeq() + data3._yaml_set_line_col(node.start_mark.line, node.start_mark.column) + if node.flow_style is True: + data3.fa.set_flow_style() + elif node.flow_style is False: + data3.fa.set_block_style() + data3.yaml_set_ctag(node.ctag) + yield data3 + if node.anchor: + from ruamel.yaml.serializer import templated_id + + if not templated_id(node.anchor): + data3.yaml_set_anchor(node.anchor) + data3.extend(self.construct_sequence(node)) + return + except: # NOQA + pass + raise ConstructorError( + None, + None, + f'could not determine a constructor for the tag {node.tag!r}', + node.start_mark, + ) + + def construct_yaml_timestamp( + self, node: Any, values: Any = None, + ) -> Union[datetime.date, datetime.datetime, TimeStamp]: + try: + match = self.timestamp_regexp.match(node.value) + except TypeError: + match = None + if match is None: + raise ConstructorError( + None, + None, + f'failed to construct timestamp from "{node.value}"', + node.start_mark, + ) + values = match.groupdict() + if not values['hour']: + return create_timestamp(**values) + # return SafeConstructor.construct_yaml_timestamp(self, node, values) + for part in ['t', 'tz_sign', 'tz_hour', 'tz_minute']: + if values[part]: + break + else: + return create_timestamp(**values) + # return SafeConstructor.construct_yaml_timestamp(self, node, values) + dd = create_timestamp(**values) # this has delta applied + delta = None + if values['tz_sign']: + tz_hour = int(values['tz_hour']) + minutes = values['tz_minute'] + tz_minute = int(minutes) if minutes else 0 + delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) + if values['tz_sign'] == '-': + delta = -delta + # should check for None and solve issue 366 should be tzinfo=delta) + # isinstance(datetime.datetime.now, datetime.date) is true) + if isinstance(dd, datetime.datetime): + data = TimeStamp( + dd.year, dd.month, dd.day, dd.hour, dd.minute, dd.second, dd.microsecond, + ) + else: + # ToDo: make this into a DateStamp? + data = TimeStamp(dd.year, dd.month, dd.day, 0, 0, 0, 0) + return data + if delta: + data._yaml['delta'] = delta + tz = values['tz_sign'] + values['tz_hour'] + if values['tz_minute']: + tz += ':' + values['tz_minute'] + data._yaml['tz'] = tz + else: + if values['tz']: # no delta + data._yaml['tz'] = values['tz'] + if values['t']: + data._yaml['t'] = True + return data + + def construct_yaml_sbool(self, node: Any) -> Union[bool, ScalarBoolean]: + b = SafeConstructor.construct_yaml_bool(self, node) + if node.anchor: + return ScalarBoolean(b, anchor=node.anchor) + return b + + +RoundTripConstructor.add_default_constructor('bool', method='construct_yaml_sbool') + +for tag in 'null int float binary timestamp omap pairs set str seq map'.split(): + RoundTripConstructor.add_default_constructor(tag) + +RoundTripConstructor.add_constructor(None, RoundTripConstructor.construct_unknown) diff --git a/cyaml.py b/cyaml.py new file mode 100644 index 0000000..3f15ffc --- /dev/null +++ b/cyaml.py @@ -0,0 +1,195 @@ +# coding: utf-8 + +from _ruamel_yaml import CParser, CEmitter # type: ignore + +from ruamel.yaml.constructor import Constructor, BaseConstructor, SafeConstructor +from ruamel.yaml.representer import Representer, SafeRepresenter, BaseRepresenter +from ruamel.yaml.resolver import Resolver, BaseResolver + + +from typing import Any, Union, Optional # NOQA +from ruamel.yaml.compat import StreamTextType, StreamType, VersionType # NOQA + +__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', 'CBaseDumper', 'CSafeDumper', 'CDumper'] + + +# this includes some hacks to solve the usage of resolver by lower level +# parts of the parser + + +class CBaseLoader(CParser, BaseConstructor, BaseResolver): # type: ignore + def __init__( + self, + stream: StreamTextType, + version: Optional[VersionType] = None, + preserve_quotes: Optional[bool] = None, + ) -> None: + CParser.__init__(self, stream) + self._parser = self._composer = self + BaseConstructor.__init__(self, loader=self) + BaseResolver.__init__(self, loadumper=self) + # self.descend_resolver = self._resolver.descend_resolver + # self.ascend_resolver = self._resolver.ascend_resolver + # self.resolve = self._resolver.resolve + + +class CSafeLoader(CParser, SafeConstructor, Resolver): # type: ignore + def __init__( + self, + stream: StreamTextType, + version: Optional[VersionType] = None, + preserve_quotes: Optional[bool] = None, + ) -> None: + CParser.__init__(self, stream) + self._parser = self._composer = self + SafeConstructor.__init__(self, loader=self) + Resolver.__init__(self, loadumper=self) + # self.descend_resolver = self._resolver.descend_resolver + # self.ascend_resolver = self._resolver.ascend_resolver + # self.resolve = self._resolver.resolve + + +class CLoader(CParser, Constructor, Resolver): # type: ignore + def __init__( + self, + stream: StreamTextType, + version: Optional[VersionType] = None, + preserve_quotes: Optional[bool] = None, + ) -> None: + CParser.__init__(self, stream) + self._parser = self._composer = self + Constructor.__init__(self, loader=self) + Resolver.__init__(self, loadumper=self) + # self.descend_resolver = self._resolver.descend_resolver + # self.ascend_resolver = self._resolver.ascend_resolver + # self.resolve = self._resolver.resolve + + +class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): # type: ignore + def __init__( + self: StreamType, + stream: Any, + default_style: Any = None, + default_flow_style: Any = None, + canonical: Optional[bool] = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + encoding: Any = None, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Any = None, + tags: Any = None, + block_seq_indent: Any = None, + top_level_colon_align: Any = None, + prefix_colon: Any = None, + ) -> None: + # NOQA + CEmitter.__init__( + self, + stream, + canonical=canonical, + indent=indent, + width=width, + encoding=encoding, + allow_unicode=allow_unicode, + line_break=line_break, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, + tags=tags, + ) + self._emitter = self._serializer = self._representer = self + BaseRepresenter.__init__( + self, + default_style=default_style, + default_flow_style=default_flow_style, + dumper=self, + ) + BaseResolver.__init__(self, loadumper=self) + + +class CSafeDumper(CEmitter, SafeRepresenter, Resolver): # type: ignore + def __init__( + self: StreamType, + stream: Any, + default_style: Any = None, + default_flow_style: Any = None, + canonical: Optional[bool] = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + encoding: Any = None, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Any = None, + tags: Any = None, + block_seq_indent: Any = None, + top_level_colon_align: Any = None, + prefix_colon: Any = None, + ) -> None: + # NOQA + self._emitter = self._serializer = self._representer = self + CEmitter.__init__( + self, + stream, + canonical=canonical, + indent=indent, + width=width, + encoding=encoding, + allow_unicode=allow_unicode, + line_break=line_break, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, + tags=tags, + ) + self._emitter = self._serializer = self._representer = self + SafeRepresenter.__init__( + self, default_style=default_style, default_flow_style=default_flow_style, + ) + Resolver.__init__(self) + + +class CDumper(CEmitter, Representer, Resolver): # type: ignore + def __init__( + self: StreamType, + stream: Any, + default_style: Any = None, + default_flow_style: Any = None, + canonical: Optional[bool] = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + encoding: Any = None, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Any = None, + tags: Any = None, + block_seq_indent: Any = None, + top_level_colon_align: Any = None, + prefix_colon: Any = None, + ) -> None: + # NOQA + CEmitter.__init__( + self, + stream, + canonical=canonical, + indent=indent, + width=width, + encoding=encoding, + allow_unicode=allow_unicode, + line_break=line_break, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, + tags=tags, + ) + self._emitter = self._serializer = self._representer = self + Representer.__init__( + self, default_style=default_style, default_flow_style=default_flow_style, + ) + Resolver.__init__(self) diff --git a/docinfo.py b/docinfo.py new file mode 100644 index 0000000..aec6ea7 --- /dev/null +++ b/docinfo.py @@ -0,0 +1,67 @@ + +from __future__ import annotations + +""" +DocInfo + +Although it was possible to read tag directives before this, all handle/prefix +pairs for all documents in all streams were stored in one dictionary per +YAML instance, making it impossible to distinguish where such a pair came +from without sublassing the scanner. + +ToDo: +DocInfo can be used by a yaml dumper to dump a class +- if connected to the root of a data structure +- if provided to the dumper? +""" + +from typing import Optional, Tuple +from dataclasses import dataclass, field, MISSING # NOQA + + +@dataclass(order=True, frozen=True) +class Version: + major: int + minor: int + + # def __repr__(self): + # return f'Version("{self.major}.{self.minor}")' + + +def version( + major: int | str | Tuple[int, int] | None, + minor: Optional[int] = None, +) -> Optional[Version]: + if major is None: + assert minor is None + return None + if isinstance(major, str): + assert minor is None + parts = major.split('.') + assert len(parts) == 2 + return Version(int(parts[0]), int(parts[1])) + elif isinstance(major, tuple): + assert minor is None + assert len(major) == 2 + major, minor = major + assert minor is not None + return Version(major, minor) + + +@dataclass(frozen=True) +class Tag: + handle: str + prefix: str + + +@dataclass +class DocInfo: + """ + Store document information, can be used for analysis of a loaded YAML document + requested_version: if explicitly set before load + doc_version: from %YAML directive + tags: from %TAG directives in scanned order + """ + requested_version: Optional[Version] = None + doc_version: Optional[Version] = None + tags: list[Tag] = field(default_factory=list) diff --git a/dumper.py b/dumper.py new file mode 100644 index 0000000..e6457a6 --- /dev/null +++ b/dumper.py @@ -0,0 +1,218 @@ +# coding: utf-8 + +from ruamel.yaml.emitter import Emitter +from ruamel.yaml.serializer import Serializer +from ruamel.yaml.representer import ( + Representer, + SafeRepresenter, + BaseRepresenter, + RoundTripRepresenter, +) +from ruamel.yaml.resolver import Resolver, BaseResolver, VersionedResolver + +from typing import Any, Dict, List, Union, Optional # NOQA +from ruamel.yaml.compat import StreamType, VersionType # NOQA + +__all__ = ['BaseDumper', 'SafeDumper', 'Dumper', 'RoundTripDumper'] + + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): + def __init__( + self: Any, + stream: StreamType, + default_style: Any = None, + default_flow_style: Any = None, + canonical: Optional[bool] = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + encoding: Any = None, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Any = None, + tags: Any = None, + block_seq_indent: Any = None, + top_level_colon_align: Any = None, + prefix_colon: Any = None, + ) -> None: + # NOQA + Emitter.__init__( + self, + stream, + canonical=canonical, + indent=indent, + width=width, + allow_unicode=allow_unicode, + line_break=line_break, + block_seq_indent=block_seq_indent, + dumper=self, + ) + Serializer.__init__( + self, + encoding=encoding, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, + tags=tags, + dumper=self, + ) + BaseRepresenter.__init__( + self, + default_style=default_style, + default_flow_style=default_flow_style, + dumper=self, + ) + BaseResolver.__init__(self, loadumper=self) + + +class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): + def __init__( + self, + stream: StreamType, + default_style: Any = None, + default_flow_style: Any = None, + canonical: Optional[bool] = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + encoding: Any = None, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Any = None, + tags: Any = None, + block_seq_indent: Any = None, + top_level_colon_align: Any = None, + prefix_colon: Any = None, + ) -> None: + # NOQA + Emitter.__init__( + self, + stream, + canonical=canonical, + indent=indent, + width=width, + allow_unicode=allow_unicode, + line_break=line_break, + block_seq_indent=block_seq_indent, + dumper=self, + ) + Serializer.__init__( + self, + encoding=encoding, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, + tags=tags, + dumper=self, + ) + SafeRepresenter.__init__( + self, + default_style=default_style, + default_flow_style=default_flow_style, + dumper=self, + ) + Resolver.__init__(self, loadumper=self) + + +class Dumper(Emitter, Serializer, Representer, Resolver): + def __init__( + self, + stream: StreamType, + default_style: Any = None, + default_flow_style: Any = None, + canonical: Optional[bool] = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + encoding: Any = None, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Any = None, + tags: Any = None, + block_seq_indent: Any = None, + top_level_colon_align: Any = None, + prefix_colon: Any = None, + ) -> None: + # NOQA + Emitter.__init__( + self, + stream, + canonical=canonical, + indent=indent, + width=width, + allow_unicode=allow_unicode, + line_break=line_break, + block_seq_indent=block_seq_indent, + dumper=self, + ) + Serializer.__init__( + self, + encoding=encoding, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, + tags=tags, + dumper=self, + ) + Representer.__init__( + self, + default_style=default_style, + default_flow_style=default_flow_style, + dumper=self, + ) + Resolver.__init__(self, loadumper=self) + + +class RoundTripDumper(Emitter, Serializer, RoundTripRepresenter, VersionedResolver): + def __init__( + self, + stream: StreamType, + default_style: Any = None, + default_flow_style: Optional[bool] = None, + canonical: Optional[int] = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + encoding: Any = None, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Any = None, + tags: Any = None, + block_seq_indent: Any = None, + top_level_colon_align: Any = None, + prefix_colon: Any = None, + ) -> None: + # NOQA + Emitter.__init__( + self, + stream, + canonical=canonical, + indent=indent, + width=width, + allow_unicode=allow_unicode, + line_break=line_break, + block_seq_indent=block_seq_indent, + top_level_colon_align=top_level_colon_align, + prefix_colon=prefix_colon, + dumper=self, + ) + Serializer.__init__( + self, + encoding=encoding, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, + tags=tags, + dumper=self, + ) + RoundTripRepresenter.__init__( + self, + default_style=default_style, + default_flow_style=default_flow_style, + dumper=self, + ) + VersionedResolver.__init__(self, loader=self) diff --git a/emitter.py b/emitter.py new file mode 100644 index 0000000..1d58be2 --- /dev/null +++ b/emitter.py @@ -0,0 +1,1779 @@ +# coding: utf-8 + +# Emitter expects events obeying the following grammar: +# stream ::= STREAM-START document* STREAM-END +# document ::= DOCUMENT-START node DOCUMENT-END +# node ::= SCALAR | sequence | mapping +# sequence ::= SEQUENCE-START node* SEQUENCE-END +# mapping ::= MAPPING-START (node node)* MAPPING-END + +import sys +from ruamel.yaml.error import YAMLError, YAMLStreamError +from ruamel.yaml.events import * # NOQA + +# fmt: off +from ruamel.yaml.compat import nprint, dbg, DBG_EVENT, \ + check_anchorname_char, nprintf # NOQA +# fmt: on + + +from typing import Any, Dict, List, Union, Text, Tuple, Optional # NOQA +from ruamel.yaml.compat import StreamType # NOQA + +__all__ = ['Emitter', 'EmitterError'] + + +class EmitterError(YAMLError): + pass + + +class ScalarAnalysis: + def __init__( + self, + scalar: Any, + empty: Any, + multiline: Any, + allow_flow_plain: bool, + allow_block_plain: bool, + allow_single_quoted: bool, + allow_double_quoted: bool, + allow_block: bool, + ) -> None: + self.scalar = scalar + self.empty = empty + self.multiline = multiline + self.allow_flow_plain = allow_flow_plain + self.allow_block_plain = allow_block_plain + self.allow_single_quoted = allow_single_quoted + self.allow_double_quoted = allow_double_quoted + self.allow_block = allow_block + + def __repr__(self) -> str: + return f'scalar={self.scalar!r}, empty={self.empty}, multiline={self.multiline}, allow_flow_plain={self.allow_flow_plain}, allow_block_plain={self.allow_block_plain}, allow_single_quoted={self.allow_single_quoted}, allow_double_quoted={self.allow_double_quoted}, allow_block={self.allow_block}' # NOQA + + +class Indents: + # replacement for the list based stack of None/int + def __init__(self) -> None: + self.values: List[Tuple[Any, bool]] = [] + + def append(self, val: Any, seq: Any) -> None: + self.values.append((val, seq)) + + def pop(self) -> Any: + return self.values.pop()[0] + + def seq_seq(self) -> bool: + try: + if self.values[-2][1] and self.values[-1][1]: + return True + except IndexError: + pass + return False + + def last_seq(self) -> bool: + # return the seq(uence) value for the element added before the last one + # in increase_indent() + try: + return self.values[-2][1] + except IndexError: + return False + + def seq_flow_align( + self, seq_indent: int, column: int, pre_comment: Optional[bool] = False, + ) -> int: + # extra spaces because of dash + # nprint('seq_flow_align', self.values, pre_comment) + if len(self.values) < 2 or not self.values[-1][1]: + if len(self.values) == 0 or not pre_comment: + return 0 + base = self.values[-1][0] if self.values[-1][0] is not None else 0 + if pre_comment: + return base + seq_indent # type: ignore + # return (len(self.values)) * seq_indent + # -1 for the dash + return base + seq_indent - column - 1 # type: ignore + + def __len__(self) -> int: + return len(self.values) + + +class Emitter: + # fmt: off + DEFAULT_TAG_PREFIXES = { + '!': '!', + 'tag:yaml.org,2002:': '!!', + '!!': '!!', + } + # fmt: on + + MAX_SIMPLE_KEY_LENGTH = 128 + flow_seq_start = '[' + flow_seq_end = ']' + flow_seq_separator = ',' + flow_map_start = '{' + flow_map_end = '}' + flow_map_separator = ',' + + def __init__( + self, + stream: StreamType, + canonical: Any = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + block_seq_indent: Optional[int] = None, + top_level_colon_align: Optional[bool] = None, + prefix_colon: Any = None, + brace_single_entry_mapping_in_flow_sequence: Optional[bool] = None, + dumper: Any = None, + ) -> None: + # NOQA + self.dumper = dumper + if self.dumper is not None and getattr(self.dumper, '_emitter', None) is None: + self.dumper._emitter = self + self.stream = stream + + # Encoding can be overriden by STREAM-START. + self.encoding: Optional[Text] = None + self.allow_space_break = None + + # Emitter is a state machine with a stack of states to handle nested + # structures. + self.states: List[Any] = [] + self.state: Any = self.expect_stream_start + + # Current event and the event queue. + self.events: List[Any] = [] + self.event: Any = None + + # The current indentation level and the stack of previous indents. + self.indents = Indents() + self.indent: Optional[int] = None + + # flow_context is an expanding/shrinking list consisting of '{' and '[' + # for each unclosed flow context. If empty list that means block context + self.flow_context: List[Text] = [] + + # Contexts. + self.root_context = False + self.sequence_context = False + self.mapping_context = False + self.simple_key_context = False + + # Characteristics of the last emitted character: + # - current position. + # - is it a whitespace? + # - is it an indention character + # (indentation space, '-', '?', or ':')? + self.line = 0 + self.column = 0 + self.whitespace = True + self.indention = True + self.compact_seq_seq = True # dash after dash + self.compact_seq_map = True # key after dash + # self.compact_ms = False # dash after key, only when excplicit key with ? + self.no_newline: Optional[bool] = None # set if directly after `- ` + + # Whether the document requires an explicit document end indicator + self.open_ended = False + + # colon handling + self.colon = ':' + self.prefixed_colon = self.colon if prefix_colon is None else prefix_colon + self.colon + # single entry mappings in flow sequence + self.brace_single_entry_mapping_in_flow_sequence = ( + brace_single_entry_mapping_in_flow_sequence # NOQA + ) + + # Formatting details. + self.canonical = canonical + self.allow_unicode = allow_unicode + # set to False to get "\Uxxxxxxxx" for non-basic unicode like emojis + self.unicode_supplementary = sys.maxunicode > 0xFFFF + self.sequence_dash_offset = block_seq_indent if block_seq_indent else 0 + self.top_level_colon_align = top_level_colon_align + self.best_sequence_indent = 2 + self.requested_indent = indent # specific for literal zero indent + if indent and 1 < indent < 10: + self.best_sequence_indent = indent + self.best_map_indent = self.best_sequence_indent + # if self.best_sequence_indent < self.sequence_dash_offset + 1: + # self.best_sequence_indent = self.sequence_dash_offset + 1 + self.best_width = 80 + if width and width > self.best_sequence_indent * 2: + self.best_width = width + self.best_line_break: Any = '\n' + if line_break in ['\r', '\n', '\r\n']: + self.best_line_break = line_break + + # Tag prefixes. + self.tag_prefixes: Any = None + + # Prepared anchor and tag. + self.prepared_anchor: Any = None + self.prepared_tag: Any = None + + # Scalar analysis and style. + self.analysis: Any = None + self.style: Any = None + + self.scalar_after_indicator = True # write a scalar on the same line as `---` + + self.alt_null = 'null' + + @property + def stream(self) -> Any: + try: + return self._stream + except AttributeError: + raise YAMLStreamError('output stream needs to be specified') + + @stream.setter + def stream(self, val: Any) -> None: + if val is None: + return + if not hasattr(val, 'write'): + raise YAMLStreamError('stream argument needs to have a write() method') + self._stream = val + + @property + def serializer(self) -> Any: + try: + if hasattr(self.dumper, 'typ'): + return self.dumper.serializer + return self.dumper._serializer + except AttributeError: + return self # cyaml + + @property + def flow_level(self) -> int: + return len(self.flow_context) + + def dispose(self) -> None: + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def emit(self, event: Any) -> None: + if dbg(DBG_EVENT): + nprint(event) + self.events.append(event) + while not self.need_more_events(): + self.event = self.events.pop(0) + self.state() + self.event = None + + # In some cases, we wait for a few next events before emitting. + + def need_more_events(self) -> bool: + if not self.events: + return True + event = self.events[0] + if isinstance(event, DocumentStartEvent): + return self.need_events(1) + elif isinstance(event, SequenceStartEvent): + return self.need_events(2) + elif isinstance(event, MappingStartEvent): + return self.need_events(3) + else: + return False + + def need_events(self, count: int) -> bool: + level = 0 + for event in self.events[1:]: + if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): + level += 1 + elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): + level -= 1 + elif isinstance(event, StreamEndEvent): + level = -1 + if level < 0: + return False + return len(self.events) < count + 1 + + def increase_indent( + self, flow: bool = False, sequence: Optional[bool] = None, indentless: bool = False, + ) -> None: + self.indents.append(self.indent, sequence) + if self.indent is None: # top level + if flow: + # self.indent = self.best_sequence_indent if self.indents.last_seq() else \ + # self.best_map_indent + # self.indent = self.best_sequence_indent + self.indent = self.requested_indent + else: + self.indent = 0 + elif not indentless: + self.indent += ( + self.best_sequence_indent if self.indents.last_seq() else self.best_map_indent + ) + # if self.indents.last_seq(): + # if self.indent == 0: # top level block sequence + # self.indent = self.best_sequence_indent - self.sequence_dash_offset + # else: + # self.indent += self.best_sequence_indent + # else: + # self.indent += self.best_map_indent + + # States. + + # Stream handlers. + + def expect_stream_start(self) -> None: + if isinstance(self.event, StreamStartEvent): + if self.event.encoding and not hasattr(self.stream, 'encoding'): + self.encoding = self.event.encoding + self.write_stream_start() + self.state = self.expect_first_document_start + else: + raise EmitterError(f'expected StreamStartEvent, but got {self.event!s}') + + def expect_nothing(self) -> None: + raise EmitterError(f'expected nothing, but got {self.event!s}') + + # Document handlers. + + def expect_first_document_start(self) -> Any: + return self.expect_document_start(first=True) + + def expect_document_start(self, first: bool = False) -> None: + if isinstance(self.event, DocumentStartEvent): + if (self.event.version or self.event.tags) and self.open_ended: + self.write_indicator('...', True) + self.write_indent() + if self.event.version: + version_text = self.prepare_version(self.event.version) + self.write_version_directive(version_text) + self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() + if self.event.tags: + handles = sorted(self.event.tags.keys()) + for handle in handles: + prefix = self.event.tags[handle] + self.tag_prefixes[prefix] = handle + handle_text = self.prepare_tag_handle(handle) + prefix_text = self.prepare_tag_prefix(prefix) + self.write_tag_directive(handle_text, prefix_text) + implicit = ( + first + and not self.event.explicit + and not self.canonical + and not self.event.version + and not self.event.tags + and not self.check_empty_document() + ) + if not implicit: + self.write_indent() + self.write_indicator('---', True) + if self.canonical: + self.write_indent() + self.state = self.expect_document_root + elif isinstance(self.event, StreamEndEvent): + if self.open_ended: + self.write_indicator('...', True) + self.write_indent() + self.write_stream_end() + self.state = self.expect_nothing + else: + raise EmitterError(f'expected DocumentStartEvent, but got {self.event!s}') + + def expect_document_end(self) -> None: + if isinstance(self.event, DocumentEndEvent): + self.write_indent() + if self.event.explicit: + self.write_indicator('...', True) + self.write_indent() + self.flush_stream() + self.state = self.expect_document_start + else: + raise EmitterError(f'expected DocumentEndEvent, but got {self.event!s}') + + def expect_document_root(self) -> None: + self.states.append(self.expect_document_end) + self.expect_node(root=True) + + # Node handlers. + + def expect_node( + self, + root: bool = False, + sequence: bool = False, + mapping: bool = False, + simple_key: bool = False, + ) -> None: + self.root_context = root + self.sequence_context = sequence # not used in PyYAML + force_flow_indent = False + self.mapping_context = mapping + self.simple_key_context = simple_key + if isinstance(self.event, AliasEvent): + self.expect_alias() + elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): + if ( + self.process_anchor('&') + and isinstance(self.event, ScalarEvent) + and self.sequence_context + ): + self.sequence_context = False + if ( + root + and isinstance(self.event, ScalarEvent) + and not self.scalar_after_indicator + ): + self.write_indent() + self.process_tag() + if isinstance(self.event, ScalarEvent): + # nprint('@', self.indention, self.no_newline, self.column) + self.expect_scalar() + elif isinstance(self.event, SequenceStartEvent): + i2, n2 = self.indention, self.no_newline # NOQA + if self.event.comment: + if self.event.flow_style is False: + if self.write_post_comment(self.event): + self.indention = False + self.no_newline = True + if self.event.flow_style: + column = self.column + if self.write_pre_comment(self.event): + if self.event.flow_style: + # force_flow_indent = True + force_flow_indent = not self.indents.values[-1][1] + self.indention = i2 + self.no_newline = not self.indention + if self.event.flow_style: + self.column = column + if ( + self.flow_level + or self.canonical + or self.event.flow_style + or self.check_empty_sequence() + ): + self.expect_flow_sequence(force_flow_indent) + else: + self.expect_block_sequence() + if self.indents.seq_seq(): + # - - + self.indention = True + self.no_newline = False + elif isinstance(self.event, MappingStartEvent): + if self.event.flow_style is False and self.event.comment: + self.write_post_comment(self.event) + if self.event.comment and self.event.comment[1]: + self.write_pre_comment(self.event) + if self.event.flow_style and self.indents.values: + force_flow_indent = not self.indents.values[-1][1] + if ( + self.flow_level + or self.canonical + or self.event.flow_style + or self.check_empty_mapping() + ): + self.expect_flow_mapping( + single=self.event.nr_items == 1, force_flow_indent=force_flow_indent, + ) + else: + self.expect_block_mapping() + else: + raise EmitterError('expected NodeEvent, but got {self.event!s}') + + def expect_alias(self) -> None: + if self.event.anchor is None: + raise EmitterError('anchor is not specified for alias') + self.process_anchor('*') + self.state = self.states.pop() + + def expect_scalar(self) -> None: + self.increase_indent(flow=True) + self.process_scalar() + self.indent = self.indents.pop() + self.state = self.states.pop() + + # Flow sequence handlers. + + def expect_flow_sequence(self, force_flow_indent: Optional[bool] = False) -> None: + if force_flow_indent: + self.increase_indent(flow=True, sequence=True) + ind = self.indents.seq_flow_align( + self.best_sequence_indent, self.column, force_flow_indent, + ) + self.write_indicator(' ' * ind + self.flow_seq_start, True, whitespace=True) + if not force_flow_indent: + self.increase_indent(flow=True, sequence=True) + self.flow_context.append('[') + self.state = self.expect_first_flow_sequence_item + + def expect_first_flow_sequence_item(self) -> None: + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + popped = self.flow_context.pop() + assert popped == '[' + self.write_indicator(self.flow_seq_end, False) + if self.event.comment and self.event.comment[0]: + # eol comment on empty flow sequence + self.write_post_comment(self.event) + elif self.flow_level == 0: + self.write_line_break() + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + def expect_flow_sequence_item(self) -> None: + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + popped = self.flow_context.pop() + assert popped == '[' + if self.canonical: + # ToDo: so-39595807, maybe add a space to the flow_seq_separator + # and strip the last space, if space then indent, else do not + # not sure that [1,2,3] is a valid YAML seq + self.write_indicator(self.flow_seq_separator, False) + self.write_indent() + self.write_indicator(self.flow_seq_end, False) + if self.event.comment and self.event.comment[0]: + # eol comment on flow sequence + self.write_post_comment(self.event) + else: + self.no_newline = False + self.state = self.states.pop() + else: + self.write_indicator(self.flow_seq_separator, False) + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + # Flow mapping handlers. + + def expect_flow_mapping( + self, single: Optional[bool] = False, force_flow_indent: Optional[bool] = False, + ) -> None: + if force_flow_indent: + self.increase_indent(flow=True, sequence=False) + ind = self.indents.seq_flow_align( + self.best_sequence_indent, self.column, force_flow_indent, + ) + map_init = self.flow_map_start + if ( + single + and self.flow_level + and self.flow_context[-1] == '[' + and not self.canonical + and not self.brace_single_entry_mapping_in_flow_sequence + ): + # single map item with flow context, no curly braces necessary + map_init = '' + self.write_indicator(' ' * ind + map_init, True, whitespace=True) + self.flow_context.append(map_init) + if not force_flow_indent: + self.increase_indent(flow=True, sequence=False) + self.state = self.expect_first_flow_mapping_key + + def expect_first_flow_mapping_key(self) -> None: + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + popped = self.flow_context.pop() + assert popped == '{' # empty flow mapping + self.write_indicator(self.flow_map_end, False) + if self.event.comment and self.event.comment[0]: + # eol comment on empty mapping + self.write_post_comment(self.event) + elif self.flow_level == 0: + self.write_line_break() + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_key(self) -> None: + if isinstance(self.event, MappingEndEvent): + # if self.event.comment and self.event.comment[1]: + # self.write_pre_comment(self.event) + self.indent = self.indents.pop() + popped = self.flow_context.pop() + assert popped in ['{', ''] + if self.canonical: + self.write_indicator(self.flow_map_separator, False) + self.write_indent() + if popped != '': + self.write_indicator(self.flow_map_end, False) + if self.event.comment and self.event.comment[0]: + # eol comment on flow mapping, never reached on empty mappings + self.write_post_comment(self.event) + else: + self.no_newline = False + self.state = self.states.pop() + else: + self.write_indicator(self.flow_map_separator, False) + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_simple_value(self) -> None: + if getattr(self.event, 'style', '?') != '-': # suppress for flow style sets + self.write_indicator(self.prefixed_colon, False) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + def expect_flow_mapping_value(self) -> None: + if self.canonical or self.column > self.best_width: + self.write_indent() + self.write_indicator(self.prefixed_colon, True) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + # Block sequence handlers. + + def expect_block_sequence(self) -> None: + if self.mapping_context: + indentless = not self.indention + else: + indentless = False + if not self.compact_seq_seq and self.column != 0: + self.write_line_break() + self.increase_indent(flow=False, sequence=True, indentless=indentless) + self.state = self.expect_first_block_sequence_item + + def expect_first_block_sequence_item(self) -> Any: + return self.expect_block_sequence_item(first=True) + + def expect_block_sequence_item(self, first: bool = False) -> None: + if not first and isinstance(self.event, SequenceEndEvent): + if self.event.comment and self.event.comment[1]: + # final comments on a block list e.g. empty line + self.write_pre_comment(self.event) + self.indent = self.indents.pop() + self.state = self.states.pop() + self.no_newline = False + else: + if self.event.comment and self.event.comment[1]: + self.write_pre_comment(self.event) + nonl = self.no_newline if self.column == 0 else False + self.write_indent() + ind = self.sequence_dash_offset # if len(self.indents) > 1 else 0 + self.write_indicator(' ' * ind + '-', True, indention=True) + if nonl or self.sequence_dash_offset + 2 > self.best_sequence_indent: + self.no_newline = True + self.states.append(self.expect_block_sequence_item) + self.expect_node(sequence=True) + + # Block mapping handlers. + + def expect_block_mapping(self) -> None: + if not self.mapping_context and not (self.compact_seq_map or self.column == 0): + self.write_line_break() + self.increase_indent(flow=False, sequence=False) + self.state = self.expect_first_block_mapping_key + + def expect_first_block_mapping_key(self) -> None: + return self.expect_block_mapping_key(first=True) + + def expect_block_mapping_key(self, first: Any = False) -> None: + if not first and isinstance(self.event, MappingEndEvent): + if self.event.comment and self.event.comment[1]: + # final comments from a doc + self.write_pre_comment(self.event) + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + if self.event.comment and self.event.comment[1]: + # final comments from a doc + self.write_pre_comment(self.event) + self.write_indent() + if self.check_simple_key(): + if not isinstance( + self.event, (SequenceStartEvent, MappingStartEvent), + ): # sequence keys + try: + if self.event.style == '?': + self.write_indicator('?', True, indention=True) + except AttributeError: # aliases have no style + pass + self.states.append(self.expect_block_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + # test on style for alias in !!set + if isinstance(self.event, AliasEvent) and not self.event.style == '?': + self.stream.write(' ') + else: + self.write_indicator('?', True, indention=True) + self.states.append(self.expect_block_mapping_value) + self.expect_node(mapping=True) + + def expect_block_mapping_simple_value(self) -> None: + if getattr(self.event, 'style', None) != '?': + # prefix = '' + if self.indent == 0 and self.top_level_colon_align is not None: + # write non-prefixed colon + c = ' ' * (self.top_level_colon_align - self.column) + self.colon + else: + c = self.prefixed_colon + self.write_indicator(c, False) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + def expect_block_mapping_value(self) -> None: + self.write_indent() + self.write_indicator(self.prefixed_colon, True, indention=True) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + # Checkers. + + def check_empty_sequence(self) -> bool: + return ( + isinstance(self.event, SequenceStartEvent) + and bool(self.events) + and isinstance(self.events[0], SequenceEndEvent) + ) + + def check_empty_mapping(self) -> bool: + return ( + isinstance(self.event, MappingStartEvent) + and bool(self.events) + and isinstance(self.events[0], MappingEndEvent) + ) + + def check_empty_document(self) -> bool: + if not isinstance(self.event, DocumentStartEvent) or not self.events: + return False + event = self.events[0] + return ( + isinstance(event, ScalarEvent) + and event.anchor is None + and event.tag is None + and event.implicit + and event.value == "" + ) + + def check_simple_key(self) -> bool: + length = 0 + if isinstance(self.event, NodeEvent) and self.event.anchor is not None: + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + length += len(self.prepared_anchor) + if ( + isinstance(self.event, (ScalarEvent, CollectionStartEvent)) + and self.event.tag is not None + ): + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.ctag) + length += len(self.prepared_tag) + if isinstance(self.event, ScalarEvent): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + length += len(self.analysis.scalar) + return length < self.MAX_SIMPLE_KEY_LENGTH and ( + isinstance(self.event, AliasEvent) + or (isinstance(self.event, SequenceStartEvent) and self.event.flow_style is True) + or (isinstance(self.event, MappingStartEvent) and self.event.flow_style is True) + or ( + isinstance(self.event, ScalarEvent) + # if there is an explicit style for an empty string, it is a simple key + and not (self.analysis.empty and self.style and self.style not in '\'"') + and not self.analysis.multiline + ) + or self.check_empty_sequence() + or self.check_empty_mapping() + ) + + # Anchor, Tag, and Scalar processors. + + def process_anchor(self, indicator: Any) -> bool: + if self.event.anchor is None: + self.prepared_anchor = None + return False + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + if self.prepared_anchor: + self.write_indicator(indicator + self.prepared_anchor, True) + # issue 288 + self.no_newline = False + self.prepared_anchor = None + return True + + def process_tag(self) -> None: + tag = self.event.tag + if isinstance(self.event, ScalarEvent): + if self.style is None: + self.style = self.choose_scalar_style() + if ( + self.event.value == '' + and self.style == "'" + and tag == 'tag:yaml.org,2002:null' + and self.alt_null is not None + ): + self.event.value = self.alt_null + self.analysis = None + self.style = self.choose_scalar_style() + if (not self.canonical or tag is None) and ( + (self.style == "" and self.event.implicit[0]) + or (self.style != "" and self.event.implicit[1]) + ): + self.prepared_tag = None + return + if self.event.implicit[0] and tag is None: + tag = '!' + self.prepared_tag = None + else: + if (not self.canonical or tag is None) and self.event.implicit: + self.prepared_tag = None + return + if tag is None: + raise EmitterError('tag is not specified') + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.ctag) + if self.prepared_tag: + self.write_indicator(self.prepared_tag, True) + if ( + self.sequence_context + and not self.flow_level + and isinstance(self.event, ScalarEvent) + ): + self.no_newline = True + self.prepared_tag = None + + def choose_scalar_style(self) -> Any: + # issue 449 needs this otherwise emits single quoted empty string + if self.event.value == '' and self.event.ctag.handle == '!!': + return None + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.event.style == '"' or self.canonical: + return '"' + if (not self.event.style or self.event.style == '?' or self.event.style == '-') and ( + self.event.implicit[0] or not self.event.implicit[2] + ): + if not ( + self.simple_key_context and (self.analysis.empty or self.analysis.multiline) + ) and ( + self.flow_level + and self.analysis.allow_flow_plain + or (not self.flow_level and self.analysis.allow_block_plain) + ): + return "" + if self.event.style == '-': + return "" + self.analysis.allow_block = True + if self.event.style and self.event.style in '|>': + if ( + not self.flow_level + and not self.simple_key_context + and self.analysis.allow_block + ): + return self.event.style + if not self.event.style and self.analysis.allow_double_quoted: + if "'" in self.event.value or '\n' in self.event.value: + return '"' + if not self.event.style or self.event.style == "'": + if self.analysis.allow_single_quoted and not ( + self.simple_key_context and self.analysis.multiline + ): + return "'" + return '"' + + def process_scalar(self) -> None: + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.style is None: + self.style = self.choose_scalar_style() + split = not self.simple_key_context + # if self.analysis.multiline and split \ + # and (not self.style or self.style in '\'\"'): + # self.write_indent() + # nprint('xx', self.sequence_context, self.flow_level) + if self.sequence_context and not self.flow_level: + self.write_indent() + if self.style == '"': + self.write_double_quoted(self.analysis.scalar, split) + elif self.style == "'": + self.write_single_quoted(self.analysis.scalar, split) + elif self.style == '>': + try: + cmx = self.event.comment[1][0] + except (IndexError, TypeError) as e: # NOQA + cmx = "" + self.write_folded(self.analysis.scalar, cmx) + if ( + self.event.comment + and self.event.comment[0] + and self.event.comment[0].column >= self.indent + ): + # comment following a folded scalar must dedent (issue 376) + self.event.comment[0].column = self.indent - 1 # type: ignore + elif self.style == '|': + # self.write_literal(self.analysis.scalar, self.event.comment) + try: + cmx = self.event.comment[1][0] + except (IndexError, TypeError): + cmx = "" + self.write_literal(self.analysis.scalar, cmx) + if ( + self.event.comment + and self.event.comment[0] + and self.event.comment[0].column >= self.indent + ): + # comment following a literal scalar must dedent (issue 376) + self.event.comment[0].column = self.indent - 1 # type: ignore + else: + self.write_plain(self.analysis.scalar, split) + self.analysis = None + self.style = None + if self.event.comment: + self.write_post_comment(self.event) + + # Analyzers. + + def prepare_version(self, version: Any) -> Any: + major, minor = version + if major != 1: + raise EmitterError(f'unsupported YAML version: {major:d}.{minor:d}') + return f'{major:d}.{minor:d}' + + def prepare_tag_handle(self, handle: Any) -> Any: + if not handle: + raise EmitterError('tag handle must not be empty') + if handle[0] != '!' or handle[-1] != '!': + raise EmitterError(f"tag handle must start and end with '!': {handle!r}") + for ch in handle[1:-1]: + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_'): + raise EmitterError(f'invalid character {ch!r} in the tag handle: {handle!r}') + return handle + + def prepare_tag_prefix(self, prefix: Any) -> Any: + if not prefix: + raise EmitterError('tag prefix must not be empty') + chunks: List[Any] = [] + start = end = 0 + if prefix[0] == '!': + end = 1 + ch_set = "-;/?:@&=+$,_.~*'()[]" + if self.dumper: + version = getattr(self.dumper, 'version', (1, 2)) + if version is None or version >= (1, 2): + ch_set += '#' + while end < len(prefix): + ch = prefix[end] + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in ch_set: + end += 1 + else: + if start < end: + chunks.append(prefix[start:end]) + start = end = end + 1 + data = ch + for ch in data: + chunks.append(f'%{ord(ch):02X}') + if start < end: + chunks.append(prefix[start:end]) + return "".join(chunks) + + def prepare_tag(self, tag: Any) -> Any: + if not tag: + raise EmitterError('tag must not be empty') + tag = str(tag) + if tag == '!' or tag == '!!': + return tag + handle = None + suffix = tag + prefixes = sorted(self.tag_prefixes.keys()) + for prefix in prefixes: + if tag.startswith(prefix) and (prefix == '!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = tag[len(prefix) :] + chunks: List[Any] = [] + start = end = 0 + ch_set = "-;/?:@&=+$,_.~*'()[]" + if self.dumper: + version = getattr(self.dumper, 'version', (1, 2)) + if version is None or version >= (1, 2): + ch_set += '#' + while end < len(suffix): + ch = suffix[end] + if ( + '0' <= ch <= '9' + or 'A' <= ch <= 'Z' + or 'a' <= ch <= 'z' + or ch in ch_set + or (ch == '!' and handle != '!') + ): + end += 1 + else: + if start < end: + chunks.append(suffix[start:end]) + start = end = end + 1 + data = ch + for ch in data: + chunks.append(f'%{ord(ch):02X}') + if start < end: + chunks.append(suffix[start:end]) + suffix_text = "".join(chunks) + if handle: + return f'{handle!s}{suffix_text!s}' + else: + return f'!<{suffix_text!s}>' + + def prepare_anchor(self, anchor: Any) -> Any: + if not anchor: + raise EmitterError('anchor must not be empty') + for ch in anchor: + if not check_anchorname_char(ch): + raise EmitterError(f'invalid character {ch!r} in the anchor: {anchor!r}') + return anchor + + def analyze_scalar(self, scalar: Any) -> Any: + # Empty scalar is a special case. + if not scalar: + return ScalarAnalysis( + scalar=scalar, + empty=True, + multiline=False, + allow_flow_plain=False, + allow_block_plain=True, + allow_single_quoted=True, + allow_double_quoted=True, + allow_block=False, + ) + + # Indicators and special characters. + block_indicators = False + flow_indicators = False + line_breaks = False + special_characters = False + + # Important whitespace combinations. + leading_space = False + leading_break = False + trailing_space = False + trailing_break = False + break_space = False + space_break = False + + # Check document indicators. + if scalar.startswith('---') or scalar.startswith('...'): + block_indicators = True + flow_indicators = True + + # First character or preceded by a whitespace. + preceeded_by_whitespace = True + + # Last character or followed by a whitespace. + followed_by_whitespace = len(scalar) == 1 or scalar[1] in '\0 \t\r\n\x85\u2028\u2029' + + # The previous character is a space. + previous_space = False + + # The previous character is a break. + previous_break = False + + index = 0 + while index < len(scalar): + ch = scalar[index] + + # Check for indicators. + if index == 0: + # Leading indicators are special characters. + if ch in '#,[]{}&*!|>\'"%@`': + flow_indicators = True + block_indicators = True + if ch in '?:': # ToDo + if self.serializer.use_version == (1, 1): + flow_indicators = True + elif len(scalar) == 1: # single character + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == '-' and followed_by_whitespace: + flow_indicators = True + block_indicators = True + else: + # Some indicators cannot appear within a scalar as well. + if ch in ',[]{}': # http://yaml.org/spec/1.2/spec.html#id2788859 + flow_indicators = True + if ch == '?' and self.serializer.use_version == (1, 1): + flow_indicators = True + if ch == ':': + if followed_by_whitespace: + flow_indicators = True + block_indicators = True + if ch == '#' and preceeded_by_whitespace: + flow_indicators = True + block_indicators = True + + # Check for line breaks, special, and unicode characters. + if ch in '\n\x85\u2028\u2029': + line_breaks = True + if not (ch == '\n' or '\x20' <= ch <= '\x7E'): + if ( + ch == '\x85' + or '\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD' + or (self.unicode_supplementary and ('\U00010000' <= ch <= '\U0010FFFF')) + ) and ch != '\uFEFF': + # unicode_characters = True + if not self.allow_unicode: + special_characters = True + else: + special_characters = True + + # Detect important whitespace combinations. + if ch == ' ': + if index == 0: + leading_space = True + if index == len(scalar) - 1: + trailing_space = True + if previous_break: + break_space = True + previous_space = True + previous_break = False + elif ch in '\n\x85\u2028\u2029': + if index == 0: + leading_break = True + if index == len(scalar) - 1: + trailing_break = True + if previous_space: + space_break = True + previous_space = False + previous_break = True + else: + previous_space = False + previous_break = False + + # Prepare for the next character. + index += 1 + preceeded_by_whitespace = ch in '\0 \t\r\n\x85\u2028\u2029' + followed_by_whitespace = ( + index + 1 >= len(scalar) or scalar[index + 1] in '\0 \t\r\n\x85\u2028\u2029' + ) + + # Let's decide what styles are allowed. + allow_flow_plain = True + allow_block_plain = True + allow_single_quoted = True + allow_double_quoted = True + allow_block = True + + # Leading and trailing whitespaces are bad for plain scalars. + if leading_space or leading_break or trailing_space or trailing_break: + allow_flow_plain = allow_block_plain = False + + # We do not permit trailing spaces for block scalars. + if trailing_space: + allow_block = False + + # Spaces at the beginning of a new line are only acceptable for block + # scalars. + if break_space: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + + # Spaces followed by breaks, as well as special character are only + # allowed for double quoted scalars. + if special_characters: + allow_flow_plain = allow_block_plain = allow_single_quoted = allow_block = False + elif space_break: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + if not self.allow_space_break: + allow_block = False + + # Although the plain scalar writer supports breaks, we never emit + # multiline plain scalars. + if line_breaks: + allow_flow_plain = allow_block_plain = False + + # Flow indicators are forbidden for flow plain scalars. + if flow_indicators: + allow_flow_plain = False + + # Block indicators are forbidden for block plain scalars. + if block_indicators: + allow_block_plain = False + + return ScalarAnalysis( + scalar=scalar, + empty=False, + multiline=line_breaks, + allow_flow_plain=allow_flow_plain, + allow_block_plain=allow_block_plain, + allow_single_quoted=allow_single_quoted, + allow_double_quoted=allow_double_quoted, + allow_block=allow_block, + ) + + # Writers. + + def flush_stream(self) -> None: + if hasattr(self.stream, 'flush'): + self.stream.flush() + + def write_stream_start(self) -> None: + # Write BOM if needed. + if self.encoding and self.encoding.startswith('utf-16'): + self.stream.write('\uFEFF'.encode(self.encoding)) + + def write_stream_end(self) -> None: + self.flush_stream() + + def write_indicator( + self, + indicator: Any, + need_whitespace: Any, + whitespace: bool = False, + indention: bool = False, + ) -> None: + if self.whitespace or not need_whitespace: + data = indicator + else: + data = ' ' + indicator + self.whitespace = whitespace + self.indention = self.indention and indention + self.column += len(data) + self.open_ended = False + if bool(self.encoding): + data = data.encode(self.encoding) + self.stream.write(data) + + def write_indent(self) -> None: + indent = self.indent or 0 + if ( + not self.indention + or self.column > indent + or (self.column == indent and not self.whitespace) + ): + if bool(self.no_newline): + self.no_newline = False + else: + self.write_line_break() + if self.column < indent: + self.whitespace = True + data = ' ' * (indent - self.column) + self.column = indent + if self.encoding: + data = data.encode(self.encoding) # type: ignore + self.stream.write(data) + + def write_line_break(self, data: Any = None) -> None: + if data is None: + data = self.best_line_break + self.whitespace = True + self.indention = True + self.line += 1 + self.column = 0 + if bool(self.encoding): + data = data.encode(self.encoding) + self.stream.write(data) + + def write_version_directive(self, version_text: Any) -> None: + data: Any = f'%YAML {version_text!s}' + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + def write_tag_directive(self, handle_text: Any, prefix_text: Any) -> None: + data: Any = f'%TAG {handle_text!s} {prefix_text!s}' + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + # Scalar streams. + + def write_single_quoted(self, text: Any, split: Any = True) -> None: + if self.root_context: + if self.requested_indent is not None: + self.write_line_break() + if self.requested_indent != 0: + self.write_indent() + self.write_indicator("'", True) + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch is None or ch != ' ': + if ( + start + 1 == end + and self.column > self.best_width + and split + and start != 0 + and end != len(text) + ): + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if bool(self.encoding): + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': + self.write_line_break() + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029' or ch == "'": + if start < end: + data = text[start:end] + self.column += len(data) + if bool(self.encoding): + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch == "'": + data = "''" + self.column += 2 + if bool(self.encoding): + data = data.encode(self.encoding) + self.stream.write(data) + start = end + 1 + if ch is not None: + spaces = ch == ' ' + breaks = ch in '\n\x85\u2028\u2029' + end += 1 + self.write_indicator("'", False) + + ESCAPE_REPLACEMENTS = { + '\0': '0', + '\x07': 'a', + '\x08': 'b', + '\x09': 't', + '\x0A': 'n', + '\x0B': 'v', + '\x0C': 'f', + '\x0D': 'r', + '\x1B': 'e', + '"': '"', + '\\': '\\', + '\x85': 'N', + '\xA0': '_', + '\u2028': 'L', + '\u2029': 'P', + } + + def write_double_quoted(self, text: Any, split: Any = True) -> None: + if self.root_context: + if self.requested_indent is not None: + self.write_line_break() + if self.requested_indent != 0: + self.write_indent() + self.write_indicator('"', True) + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if ( + ch is None + or ch in '"\\\x85\u2028\u2029\uFEFF' + or not ( + '\x20' <= ch <= '\x7E' + or ( + self.allow_unicode + and ( + ('\xA0' <= ch <= '\uD7FF') + or ('\uE000' <= ch <= '\uFFFD') + or ('\U00010000' <= ch <= '\U0010FFFF') + ) + ) + ) + ): + if start < end: + data = text[start:end] + self.column += len(data) + if bool(self.encoding): + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + if ch in self.ESCAPE_REPLACEMENTS: + data = '\\' + self.ESCAPE_REPLACEMENTS[ch] + elif ch <= '\xFF': + data = '\\x%02X' % ord(ch) + elif ch <= '\uFFFF': + data = '\\u%04X' % ord(ch) + else: + data = '\\U%08X' % ord(ch) + self.column += len(data) + if bool(self.encoding): + data = data.encode(self.encoding) + self.stream.write(data) + start = end + 1 + if ( + 0 < end < len(text) - 1 + and (ch == ' ' or start >= end) + and self.column + (end - start) > self.best_width + and split + ): + # SO https://stackoverflow.com/a/75634614/1307905 + # data = text[start:end] + u'\\' # <<< replaced with following six lines + need_backquote = True + if len(text) > end: + try: + space_pos = text.index(' ', end) + if ( + '"' not in text[end:space_pos] + and "'" not in text[end:space_pos] + and text[space_pos + 1] != ' ' + and text[end - 1 : end + 1] != ' ' + ): + need_backquote = False + except (ValueError, IndexError): + pass + data = text[start:end] + ('\\' if need_backquote else '') + if start < end: + start = end + self.column += len(data) + if bool(self.encoding): + data = data.encode(self.encoding) + self.stream.write(data) + self.write_indent() + self.whitespace = False + self.indention = False + if text[start] == ' ': + if not need_backquote: + # remove leading space it will load from the newline + start += 1 + # data = u'\\' # <<< replaced with following line + data = '\\' if need_backquote else '' + self.column += len(data) + if bool(self.encoding): + data = data.encode(self.encoding) + self.stream.write(data) + end += 1 + self.write_indicator('"', False) + + def determine_block_hints(self, text: Any) -> Any: + indent = 0 + indicator = '' + hints = '' + if text: + if text[0] in ' \n\x85\u2028\u2029': + indent = 2 + hints += str(indent) + elif self.root_context: + for end in ['\n---', '\n...']: + pos = 0 + while True: + pos = text.find(end, pos) + if pos == -1: + break + try: + if text[pos + 4] in ' \r\n': + break + except IndexError: + pass + pos += 1 + if pos > -1: + break + if pos > 0: + indent = 2 + if text[-1] not in '\n\x85\u2028\u2029': + indicator = '-' + elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': + indicator = '+' + hints += indicator + return hints, indent, indicator + + def write_folded(self, text: Any, comment: Any) -> None: + hints, _indent, _indicator = self.determine_block_hints(text) + if not isinstance(comment, str): + comment = '' + self.write_indicator('>' + hints + comment, True) + if _indicator == '+': + self.open_ended = True + self.write_line_break() + leading_space = True + spaces = False + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in '\n\x85\u2028\u2029\a': + if ( + not leading_space + and ch is not None + and ch != ' ' + and text[start] == '\n' + ): + self.write_line_break() + leading_space = ch == ' ' + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + elif spaces: + if ch != ' ': + if start + 1 == end and self.column > self.best_width: + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if bool(self.encoding): + data = data.encode(self.encoding) + self.stream.write(data) + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029\a': + data = text[start:end] + self.column += len(data) + if bool(self.encoding): + data = data.encode(self.encoding) + self.stream.write(data) + if ch == '\a': + if end < (len(text) - 1) and not text[end + 2].isspace(): + self.write_line_break() + self.write_indent() + end += 2 # \a and the space that is inserted on the fold + else: + raise EmitterError('unexcpected fold indicator \\a before space') + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = ch in '\n\x85\u2028\u2029' + spaces = ch == ' ' + end += 1 + + def write_literal(self, text: Any, comment: Any = None) -> None: + hints, _indent, _indicator = self.determine_block_hints(text) + # if comment is not None: + # try: + # hints += comment[1][0] + # except (TypeError, IndexError) as e: + # pass + if not isinstance(comment, str): + comment = '' + self.write_indicator('|' + hints + comment, True) + # try: + # nprintf('selfev', comment) + # cmx = comment[1][0] + # if cmx: + # self.stream.write(cmx) + # except (TypeError, IndexError) as e: + # pass + if _indicator == '+': + self.open_ended = True + self.write_line_break() + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + if self.root_context: + idnx = self.indent if self.indent is not None else 0 + self.stream.write(' ' * (_indent + idnx)) + else: + self.write_indent() + start = end + else: + if ch is None or ch in '\n\x85\u2028\u2029': + data = text[start:end] + if bool(self.encoding): + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = ch in '\n\x85\u2028\u2029' + end += 1 + + def write_plain(self, text: Any, split: Any = True) -> None: + if self.root_context: + if self.requested_indent is not None: + self.write_line_break() + if self.requested_indent != 0: + self.write_indent() + else: + self.open_ended = True + if not text: + return + if not self.whitespace: + data = ' ' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) # type: ignore + self.stream.write(data) + self.whitespace = False + self.indention = False + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch != ' ': + if start + 1 == end and self.column > self.best_width and split: + self.write_indent() + self.whitespace = False + self.indention = False + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) # type: ignore + self.stream.write(data) + start = end + elif breaks: + if ch not in '\n\x85\u2028\u2029': # type: ignore + if text[start] == '\n': + self.write_line_break() + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + self.whitespace = False + self.indention = False + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029': + data = text[start:end] + if ( + len(data) > self.best_width + and self.indent is not None + and self.column > self.indent + ): + # words longer than line length get a line of their own + self.write_indent() + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) # type: ignore + try: + self.stream.write(data) + except: # NOQA + sys.stdout.write(repr(data) + '\n') + raise + start = end + if ch is not None: + spaces = ch == ' ' + breaks = ch in '\n\x85\u2028\u2029' + end += 1 + + def write_comment(self, comment: Any, pre: bool = False) -> None: + value = comment.value + # nprintf(f'{self.column:02d} {comment.start_mark.column:02d} {value!r}') + if not pre and value[-1] == '\n': + value = value[:-1] + try: + # get original column position + col = comment.start_mark.column + if comment.value and comment.value.startswith('\n'): + # never inject extra spaces if the comment starts with a newline + # and not a real comment (e.g. if you have an empty line following a key-value + col = self.column + elif col < self.column + 1: + ValueError + except ValueError: + col = self.column + 1 + # nprint('post_comment', self.line, self.column, value) + try: + # at least one space if the current column >= the start column of the comment + # but not at the start of a line + nr_spaces = col - self.column + if self.column and value.strip() and nr_spaces < 1 and value[0] != '\n': + nr_spaces = 1 + value = ' ' * nr_spaces + value + try: + if bool(self.encoding): + value = value.encode(self.encoding) + except UnicodeDecodeError: + pass + self.stream.write(value) + except TypeError: + raise + if not pre: + self.write_line_break() + + def write_pre_comment(self, event: Any) -> bool: + if event.comment is None: + return False + comments = event.comment[1] + if comments is None: + return False + try: + start_events = (MappingStartEvent, SequenceStartEvent) + for comment in comments: + if isinstance(event, start_events) and getattr(comment, 'pre_done', None): + continue + if self.column != 0: + self.write_line_break() + self.write_comment(comment, pre=True) + if isinstance(event, start_events): + comment.pre_done = True + except TypeError: + sys.stdout.write(f'eventtt {type(event)} {event}') + raise + return True + + def write_post_comment(self, event: Any) -> bool: + if self.event.comment[0] is None: + return False + comment = event.comment[0] + self.write_comment(comment) + return True + + +class RoundTripEmitter(Emitter): + def prepare_tag(self, ctag: Any) -> Any: + if not ctag: + raise EmitterError('tag must not be empty') + tag = str(ctag) + if tag == '!' or tag == '!!': + return tag + handle = ctag.handle + suffix = ctag.suffix + prefixes = sorted(self.tag_prefixes.keys()) + # print('handling', repr(tag), repr(suffix), repr(handle)) + if handle is None: + for prefix in prefixes: + if tag.startswith(prefix) and (prefix == '!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = suffix[len(prefix) :] + if handle: + return f'{handle!s}{suffix!s}' + else: + return f'!<{suffix!s}>' diff --git a/error.py b/error.py new file mode 100644 index 0000000..4843fdb --- /dev/null +++ b/error.py @@ -0,0 +1,297 @@ +# coding: utf-8 + +import warnings +import textwrap + +from typing import Any, Dict, Optional, List, Text # NOQA + + +__all__ = [ + 'FileMark', + 'StringMark', + 'CommentMark', + 'YAMLError', + 'MarkedYAMLError', + 'ReusedAnchorWarning', + 'UnsafeLoaderWarning', + 'MarkedYAMLWarning', + 'MarkedYAMLFutureWarning', +] + + +class StreamMark: + __slots__ = 'name', 'index', 'line', 'column' + + def __init__(self, name: Any, index: int, line: int, column: int) -> None: + self.name = name + self.index = index + self.line = line + self.column = column + + def __str__(self) -> Any: + where = f' in "{self.name!s}", line {self.line + 1:d}, column {self.column + 1:d}' + return where + + def __eq__(self, other: Any) -> bool: + if self.line != other.line or self.column != other.column: + return False + if self.name != other.name or self.index != other.index: + return False + return True + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + +class FileMark(StreamMark): + __slots__ = () + + +class StringMark(StreamMark): + __slots__ = 'name', 'index', 'line', 'column', 'buffer', 'pointer' + + def __init__( + self, name: Any, index: int, line: int, column: int, buffer: Any, pointer: Any, + ) -> None: + StreamMark.__init__(self, name, index, line, column) + self.buffer = buffer + self.pointer = pointer + + def get_snippet(self, indent: int = 4, max_length: int = 75) -> Any: + if self.buffer is None: # always False + return None + head = "" + start = self.pointer + while start > 0 and self.buffer[start - 1] not in '\0\r\n\x85\u2028\u2029': + start -= 1 + if self.pointer - start > max_length / 2 - 1: + head = ' ... ' + start += 5 + break + tail = "" + end = self.pointer + while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029': + end += 1 + if end - self.pointer > max_length / 2 - 1: + tail = ' ... ' + end -= 5 + break + snippet = self.buffer[start:end] + caret = '^' + caret = f'^ (line: {self.line + 1})' + return ( + ' ' * indent + + head + + snippet + + tail + + '\n' + + ' ' * (indent + self.pointer - start + len(head)) + + caret + ) + + def __str__(self) -> Any: + snippet = self.get_snippet() + where = f' in "{self.name!s}", line {self.line + 1:d}, column {self.column + 1:d}' + if snippet is not None: + where += ':\n' + snippet + return where + + def __repr__(self) -> Any: + snippet = self.get_snippet() + where = f' in "{self.name!s}", line {self.line + 1:d}, column {self.column + 1:d}' + if snippet is not None: + where += ':\n' + snippet + return where + + +class CommentMark: + __slots__ = ('column',) + + def __init__(self, column: Any) -> None: + self.column = column + + +class YAMLError(Exception): + pass + + +class MarkedYAMLError(YAMLError): + def __init__( + self, + context: Any = None, + context_mark: Any = None, + problem: Any = None, + problem_mark: Any = None, + note: Any = None, + warn: Any = None, + ) -> None: + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + # warn is ignored + + def __str__(self) -> Any: + lines: List[str] = [] + if self.context is not None: + lines.append(self.context) + if self.context_mark is not None and ( + self.problem is None + or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column + ): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None and self.note: + note = textwrap.dedent(self.note) + lines.append(note) + return '\n'.join(lines) + + +class YAMLStreamError(Exception): + pass + + +class YAMLWarning(Warning): + pass + + +class MarkedYAMLWarning(YAMLWarning): + def __init__( + self, + context: Any = None, + context_mark: Any = None, + problem: Any = None, + problem_mark: Any = None, + note: Any = None, + warn: Any = None, + ) -> None: + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + self.warn = warn + + def __str__(self) -> Any: + lines: List[str] = [] + if self.context is not None: + lines.append(self.context) + if self.context_mark is not None and ( + self.problem is None + or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column + ): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None and self.note: + note = textwrap.dedent(self.note) + lines.append(note) + if self.warn is not None and self.warn: + warn = textwrap.dedent(self.warn) + lines.append(warn) + return '\n'.join(lines) + + +class ReusedAnchorWarning(YAMLWarning): + pass + + +class UnsafeLoaderWarning(YAMLWarning): + text = """ +The default 'Loader' for 'load(stream)' without further arguments can be unsafe. +Use 'load(stream, Loader=ruamel.yaml.Loader)' explicitly if that is OK. +Alternatively include the following in your code: + + import warnings + warnings.simplefilter('ignore', ruamel.yaml.error.UnsafeLoaderWarning) + +In most other cases you should consider using 'safe_load(stream)'""" + pass + + +warnings.simplefilter('once', UnsafeLoaderWarning) + + +class MantissaNoDotYAML1_1Warning(YAMLWarning): + def __init__(self, node: Any, flt_str: Any) -> None: + self.node = node + self.flt = flt_str + + def __str__(self) -> Any: + line = self.node.start_mark.line + col = self.node.start_mark.column + return f""" +In YAML 1.1 floating point values should have a dot ('.') in their mantissa. +See the Floating-Point Language-Independent Type for YAML™ Version 1.1 specification +( http://yaml.org/type/float.html ). This dot is not required for JSON nor for YAML 1.2 + +Correct your float: "{self.flt}" on line: {line}, column: {col} + +or alternatively include the following in your code: + + import warnings + warnings.simplefilter('ignore', ruamel.yaml.error.MantissaNoDotYAML1_1Warning) + +""" + + +warnings.simplefilter('once', MantissaNoDotYAML1_1Warning) + + +class YAMLFutureWarning(Warning): + pass + + +class MarkedYAMLFutureWarning(YAMLFutureWarning): + def __init__( + self, + context: Any = None, + context_mark: Any = None, + problem: Any = None, + problem_mark: Any = None, + note: Any = None, + warn: Any = None, + ) -> None: + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + self.warn = warn + + def __str__(self) -> Any: + lines: List[str] = [] + if self.context is not None: + lines.append(self.context) + + if self.context_mark is not None and ( + self.problem is None + or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column + ): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None and self.note: + note = textwrap.dedent(self.note) + lines.append(note) + if self.warn is not None and self.warn: + warn = textwrap.dedent(self.warn) + lines.append(warn) + return '\n'.join(lines) diff --git a/events.py b/events.py new file mode 100644 index 0000000..a570a0d --- /dev/null +++ b/events.py @@ -0,0 +1,264 @@ +# coding: utf-8 + +# Abstract classes. + +from typing import Any, Dict, Optional, List # NOQA +from ruamel.yaml.tag import Tag + +SHOW_LINES = False + + +def CommentCheck() -> None: + pass + + +class Event: + __slots__ = 'start_mark', 'end_mark', 'comment' + crepr = 'Unspecified Event' + + def __init__( + self, start_mark: Any = None, end_mark: Any = None, comment: Any = CommentCheck, + ) -> None: + self.start_mark = start_mark + self.end_mark = end_mark + # assert comment is not CommentCheck + if comment is CommentCheck: + comment = None + self.comment = comment + + def __repr__(self) -> Any: + if True: + arguments = [] + if hasattr(self, 'value'): + # if you use repr(getattr(self, 'value')) then flake8 complains about + # abuse of getattr with a constant. When you change to self.value + # then mypy throws an error + arguments.append(repr(self.value)) + for key in ['anchor', 'tag', 'implicit', 'flow_style', 'style']: + v = getattr(self, key, None) + if v is not None: + arguments.append(f'{key!s}={v!r}') + if self.comment not in [None, CommentCheck]: + arguments.append(f'comment={self.comment!r}') + if SHOW_LINES: + arguments.append( + f'({self.start_mark.line}:{self.start_mark.column}/' + f'{self.end_mark.line}:{self.end_mark.column})', + ) + arguments = ', '.join(arguments) # type: ignore + else: + attributes = [ + key + for key in ['anchor', 'tag', 'implicit', 'value', 'flow_style', 'style'] + if hasattr(self, key) + ] + arguments = ', '.join([f'{key!s}={getattr(self, key)!r}' for key in attributes]) + if self.comment not in [None, CommentCheck]: + arguments += f', comment={self.comment!r}' + return f'{self.__class__.__name__!s}({arguments!s})' + + def compact_repr(self) -> str: + return f'{self.crepr}' + + +class NodeEvent(Event): + __slots__ = ('anchor',) + + def __init__( + self, anchor: Any, start_mark: Any = None, end_mark: Any = None, comment: Any = None, + ) -> None: + Event.__init__(self, start_mark, end_mark, comment) + self.anchor = anchor + + +class CollectionStartEvent(NodeEvent): + __slots__ = 'ctag', 'implicit', 'flow_style', 'nr_items' + + def __init__( + self, + anchor: Any, + tag: Any, + implicit: Any, + start_mark: Any = None, + end_mark: Any = None, + flow_style: Any = None, + comment: Any = None, + nr_items: Optional[int] = None, + ) -> None: + NodeEvent.__init__(self, anchor, start_mark, end_mark, comment) + self.ctag = tag + self.implicit = implicit + self.flow_style = flow_style + self.nr_items = nr_items + + @property + def tag(self) -> Optional[str]: + return None if self.ctag is None else str(self.ctag) + + +class CollectionEndEvent(Event): + __slots__ = () + + +# Implementations. + + +class StreamStartEvent(Event): + __slots__ = ('encoding',) + crepr = '+STR' + + def __init__( + self, + start_mark: Any = None, + end_mark: Any = None, + encoding: Any = None, + comment: Any = None, + ) -> None: + Event.__init__(self, start_mark, end_mark, comment) + self.encoding = encoding + + +class StreamEndEvent(Event): + __slots__ = () + crepr = '-STR' + + +class DocumentStartEvent(Event): + __slots__ = 'explicit', 'version', 'tags' + crepr = '+DOC' + + def __init__( + self, + start_mark: Any = None, + end_mark: Any = None, + explicit: Any = None, + version: Any = None, + tags: Any = None, + comment: Any = None, + ) -> None: + Event.__init__(self, start_mark, end_mark, comment) + self.explicit = explicit + self.version = version + self.tags = tags + + def compact_repr(self) -> str: + start = ' ---' if self.explicit else '' + return f'{self.crepr}{start}' + + +class DocumentEndEvent(Event): + __slots__ = ('explicit',) + crepr = '-DOC' + + def __init__( + self, + start_mark: Any = None, + end_mark: Any = None, + explicit: Any = None, + comment: Any = None, + ) -> None: + Event.__init__(self, start_mark, end_mark, comment) + self.explicit = explicit + + def compact_repr(self) -> str: + end = ' ...' if self.explicit else '' + return f'{self.crepr}{end}' + + +class AliasEvent(NodeEvent): + __slots__ = 'style' + crepr = '=ALI' + + def __init__( + self, + anchor: Any, + start_mark: Any = None, + end_mark: Any = None, + style: Any = None, + comment: Any = None, + ) -> None: + NodeEvent.__init__(self, anchor, start_mark, end_mark, comment) + self.style = style + + def compact_repr(self) -> str: + return f'{self.crepr} *{self.anchor}' + + +class ScalarEvent(NodeEvent): + __slots__ = 'ctag', 'implicit', 'value', 'style' + crepr = '=VAL' + + def __init__( + self, + anchor: Any, + tag: Any, + implicit: Any, + value: Any, + start_mark: Any = None, + end_mark: Any = None, + style: Any = None, + comment: Any = None, + ) -> None: + NodeEvent.__init__(self, anchor, start_mark, end_mark, comment) + self.ctag = tag + self.implicit = implicit + self.value = value + self.style = style + + @property + def tag(self) -> Optional[str]: + return None if self.ctag is None else str(self.ctag) + + @tag.setter + def tag(self, val: Any) -> None: + if isinstance(val, str): + val = Tag(suffix=val) + self.ctag = val + + def compact_repr(self) -> str: + style = ':' if self.style is None else self.style + anchor = f'&{self.anchor} ' if self.anchor else '' + tag = f'<{self.tag!s}> ' if self.tag else '' + value = self.value + for ch, rep in [ + ('\\', '\\\\'), + ('\t', '\\t'), + ('\n', '\\n'), + ('\a', ''), # remove from folded + ('\r', '\\r'), + ('\b', '\\b'), + ]: + value = value.replace(ch, rep) + return f'{self.crepr} {anchor}{tag}{style}{value}' + + +class SequenceStartEvent(CollectionStartEvent): + __slots__ = () + crepr = '+SEQ' + + def compact_repr(self) -> str: + flow = ' []' if self.flow_style else '' + anchor = f' &{self.anchor}' if self.anchor else '' + tag = f' <{self.tag!s}>' if self.tag else '' + return f'{self.crepr}{flow}{anchor}{tag}' + + +class SequenceEndEvent(CollectionEndEvent): + __slots__ = () + crepr = '-SEQ' + + +class MappingStartEvent(CollectionStartEvent): + __slots__ = () + crepr = '+MAP' + + def compact_repr(self) -> str: + flow = ' {}' if self.flow_style else '' + anchor = f' &{self.anchor}' if self.anchor else '' + tag = f' <{self.tag!s}>' if self.tag else '' + return f'{self.crepr}{flow}{anchor}{tag}' + + +class MappingEndEvent(CollectionEndEvent): + __slots__ = () + crepr = '-MAP' diff --git a/loader.py b/loader.py new file mode 100644 index 0000000..d6c708b --- /dev/null +++ b/loader.py @@ -0,0 +1,90 @@ +# coding: utf-8 + +from ruamel.yaml.reader import Reader +from ruamel.yaml.scanner import Scanner, RoundTripScanner +from ruamel.yaml.parser import Parser, RoundTripParser +from ruamel.yaml.composer import Composer +from ruamel.yaml.constructor import ( + BaseConstructor, + SafeConstructor, + Constructor, + RoundTripConstructor, +) +from ruamel.yaml.resolver import VersionedResolver + +from typing import Any, Dict, List, Union, Optional # NOQA +from ruamel.yaml.compat import StreamTextType, VersionType # NOQA + +__all__ = ['BaseLoader', 'SafeLoader', 'Loader', 'RoundTripLoader'] + + +class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, VersionedResolver): + def __init__( + self, + stream: StreamTextType, + version: Optional[VersionType] = None, + preserve_quotes: Optional[bool] = None, + ) -> None: + self.comment_handling = None + Reader.__init__(self, stream, loader=self) + Scanner.__init__(self, loader=self) + Parser.__init__(self, loader=self) + Composer.__init__(self, loader=self) + BaseConstructor.__init__(self, loader=self) + VersionedResolver.__init__(self, version, loader=self) + + +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, VersionedResolver): + def __init__( + self, + stream: StreamTextType, + version: Optional[VersionType] = None, + preserve_quotes: Optional[bool] = None, + ) -> None: + self.comment_handling = None + Reader.__init__(self, stream, loader=self) + Scanner.__init__(self, loader=self) + Parser.__init__(self, loader=self) + Composer.__init__(self, loader=self) + SafeConstructor.__init__(self, loader=self) + VersionedResolver.__init__(self, version, loader=self) + + +class Loader(Reader, Scanner, Parser, Composer, Constructor, VersionedResolver): + def __init__( + self, + stream: StreamTextType, + version: Optional[VersionType] = None, + preserve_quotes: Optional[bool] = None, + ) -> None: + self.comment_handling = None + Reader.__init__(self, stream, loader=self) + Scanner.__init__(self, loader=self) + Parser.__init__(self, loader=self) + Composer.__init__(self, loader=self) + Constructor.__init__(self, loader=self) + VersionedResolver.__init__(self, version, loader=self) + + +class RoundTripLoader( + Reader, + RoundTripScanner, + RoundTripParser, + Composer, + RoundTripConstructor, + VersionedResolver, +): + def __init__( + self, + stream: StreamTextType, + version: Optional[VersionType] = None, + preserve_quotes: Optional[bool] = None, + ) -> None: + # self.reader = Reader.__init__(self, stream) + self.comment_handling = None # issue 385 + Reader.__init__(self, stream, loader=self) + RoundTripScanner.__init__(self, loader=self) + RoundTripParser.__init__(self, loader=self) + Composer.__init__(self, loader=self) + RoundTripConstructor.__init__(self, preserve_quotes=preserve_quotes, loader=self) + VersionedResolver.__init__(self, version, loader=self) @@ -0,0 +1,1514 @@ +# coding: utf-8 + +from __future__ import annotations + +import sys +import os +import warnings +import glob +from importlib import import_module + + +import ruamel.yaml +from ruamel.yaml.error import UnsafeLoaderWarning, YAMLError # NOQA + +from ruamel.yaml.tokens import * # NOQA +from ruamel.yaml.events import * # NOQA +from ruamel.yaml.nodes import * # NOQA + +from ruamel.yaml.loader import BaseLoader, SafeLoader, Loader, RoundTripLoader # NOQA +from ruamel.yaml.dumper import BaseDumper, SafeDumper, Dumper, RoundTripDumper # NOQA +from ruamel.yaml.compat import StringIO, BytesIO, with_metaclass, nprint, nprintf # NOQA +from ruamel.yaml.resolver import VersionedResolver, Resolver # NOQA +from ruamel.yaml.representer import ( + BaseRepresenter, + SafeRepresenter, + Representer, + RoundTripRepresenter, +) +from ruamel.yaml.constructor import ( + BaseConstructor, + SafeConstructor, + Constructor, + RoundTripConstructor, +) +from ruamel.yaml.loader import Loader as UnsafeLoader # NOQA +from ruamel.yaml.comments import CommentedMap, CommentedSeq, C_PRE +from ruamel.yaml.docinfo import DocInfo, version, Version + +from typing import List, Set, Dict, Tuple, Union, Any, Callable, Optional, Text, Type # NOQA +from types import TracebackType +from ruamel.yaml.compat import StreamType, StreamTextType, VersionType # NOQA +from pathlib import Path # NOQA + +try: + from _ruamel_yaml import CParser, CEmitter # type: ignore +except: # NOQA + CParser = CEmitter = None + +# import io + + +# YAML is an acronym, i.e. spoken: rhymes with "camel". And thus a +# subset of abbreviations, which should be all caps according to PEP8 + + +class YAML: + def __init__( + self: Any, + *, + typ: Optional[Union[List[Text], Text]] = None, + pure: Any = False, + output: Any = None, + plug_ins: Any = None, + ) -> None: # input=None, + """ + typ: 'rt'/None -> RoundTripLoader/RoundTripDumper, (default) + 'safe' -> SafeLoader/SafeDumper, + 'unsafe' -> normal/unsafe Loader/Dumper (pending deprecation) + 'full' -> full Dumper only, including python built-ins that are + potentially unsafe to load + 'base' -> baseloader + pure: if True only use Python modules + input/output: needed to work as context manager + plug_ins: a list of plug-in files + """ + + self.typ = ['rt'] if typ is None else (typ if isinstance(typ, list) else [typ]) + self.pure = pure + + # self._input = input + self._output = output + self._context_manager: Any = None + + self.plug_ins: List[Any] = [] + for pu in ([] if plug_ins is None else plug_ins) + self.official_plug_ins(): + file_name = pu.replace(os.sep, '.') + self.plug_ins.append(import_module(file_name)) + self.Resolver: Any = ruamel.yaml.resolver.VersionedResolver + self.allow_unicode = True + self.Reader: Any = None + self.Representer: Any = None + self.Constructor: Any = None + self.Scanner: Any = None + self.Serializer: Any = None + self.default_flow_style: Any = None + self.comment_handling = None + typ_found = 1 + setup_rt = False + if 'rt' in self.typ: + setup_rt = True + elif 'safe' in self.typ: + self.Emitter = ( + ruamel.yaml.emitter.Emitter if pure or CEmitter is None else CEmitter + ) + self.Representer = ruamel.yaml.representer.SafeRepresenter + self.Parser = ruamel.yaml.parser.Parser if pure or CParser is None else CParser + self.Composer = ruamel.yaml.composer.Composer + self.Constructor = ruamel.yaml.constructor.SafeConstructor + elif 'base' in self.typ: + self.Emitter = ruamel.yaml.emitter.Emitter + self.Representer = ruamel.yaml.representer.BaseRepresenter + self.Parser = ruamel.yaml.parser.Parser if pure or CParser is None else CParser + self.Composer = ruamel.yaml.composer.Composer + self.Constructor = ruamel.yaml.constructor.BaseConstructor + elif 'unsafe' in self.typ: + warnings.warn( + "\nyou should no longer specify 'unsafe'.\nFor **dumping only** use yaml=YAML(typ='full')\n", # NOQA + PendingDeprecationWarning, + stacklevel=2, + ) + self.Emitter = ( + ruamel.yaml.emitter.Emitter if pure or CEmitter is None else CEmitter + ) + self.Representer = ruamel.yaml.representer.Representer + self.Parser = ruamel.yaml.parser.Parser if pure or CParser is None else CParser + self.Composer = ruamel.yaml.composer.Composer + self.Constructor = ruamel.yaml.constructor.Constructor + elif 'full' in self.typ: + self.Emitter = ( + ruamel.yaml.emitter.Emitter if pure or CEmitter is None else CEmitter + ) + self.Representer = ruamel.yaml.representer.Representer + self.Parser = ruamel.yaml.parser.Parser if pure or CParser is None else CParser + # self.Composer = ruamel.yaml.composer.Composer + # self.Constructor = ruamel.yaml.constructor.Constructor + elif 'rtsc' in self.typ: + self.default_flow_style = False + # no optimized rt-dumper yet + self.Emitter = ruamel.yaml.emitter.RoundTripEmitter + self.Serializer = ruamel.yaml.serializer.Serializer + self.Representer = ruamel.yaml.representer.RoundTripRepresenter + self.Scanner = ruamel.yaml.scanner.RoundTripScannerSC + # no optimized rt-parser yet + self.Parser = ruamel.yaml.parser.RoundTripParserSC + self.Composer = ruamel.yaml.composer.Composer + self.Constructor = ruamel.yaml.constructor.RoundTripConstructor + self.comment_handling = C_PRE + else: + setup_rt = True + typ_found = 0 + if setup_rt: + self.default_flow_style = False + # no optimized rt-dumper yet + self.Emitter = ruamel.yaml.emitter.RoundTripEmitter + self.Serializer = ruamel.yaml.serializer.Serializer + self.Representer = ruamel.yaml.representer.RoundTripRepresenter + self.Scanner = ruamel.yaml.scanner.RoundTripScanner + # no optimized rt-parser yet + self.Parser = ruamel.yaml.parser.RoundTripParser + self.Composer = ruamel.yaml.composer.Composer + self.Constructor = ruamel.yaml.constructor.RoundTripConstructor + del setup_rt + self.stream = None + self.canonical = None + self.old_indent = None + self.width: Union[int, None] = None + self.line_break = None + + self.map_indent: Union[int, None] = None + self.sequence_indent: Union[int, None] = None + self.sequence_dash_offset: int = 0 + self.compact_seq_seq = None + self.compact_seq_map = None + self.sort_base_mapping_type_on_output = None # default: sort + + self.top_level_colon_align = None + self.prefix_colon = None + self._version: Optional[Any] = None + self.preserve_quotes: Optional[bool] = None + self.allow_duplicate_keys = False # duplicate keys in map, set + self.encoding = 'utf-8' + self.explicit_start: Union[bool, None] = None + self.explicit_end: Union[bool, None] = None + self._tags = None + self.doc_infos: List[DocInfo] = [] + self.default_style = None + self.top_level_block_style_scalar_no_indent_error_1_1 = False + # directives end indicator with single scalar document + self.scalar_after_indicator: Optional[bool] = None + # [a, b: 1, c: {d: 2}] vs. [a, {b: 1}, {c: {d: 2}}] + self.brace_single_entry_mapping_in_flow_sequence = False + for module in self.plug_ins: + if getattr(module, 'typ', None) in self.typ: + typ_found += 1 + module.init_typ(self) + break + if typ_found == 0: + raise NotImplementedError( + f'typ "{self.typ}" not recognised (need to install plug-in?)', + ) + + @property + def reader(self) -> Any: + try: + return self._reader # type: ignore + except AttributeError: + self._reader = self.Reader(None, loader=self) + return self._reader + + @property + def scanner(self) -> Any: + try: + return self._scanner # type: ignore + except AttributeError: + self._scanner = self.Scanner(loader=self) + return self._scanner + + @property + def parser(self) -> Any: + attr = '_' + sys._getframe().f_code.co_name + if not hasattr(self, attr): + if self.Parser is not CParser: + setattr(self, attr, self.Parser(loader=self)) + else: + if getattr(self, '_stream', None) is None: + # wait for the stream + return None + else: + # if not hasattr(self._stream, 'read') and hasattr(self._stream, 'open'): + # # pathlib.Path() instance + # setattr(self, attr, CParser(self._stream)) + # else: + setattr(self, attr, CParser(self._stream)) + # self._parser = self._composer = self + # nprint('scanner', self.loader.scanner) + + return getattr(self, attr) + + @property + def composer(self) -> Any: + attr = '_' + sys._getframe().f_code.co_name + if not hasattr(self, attr): + setattr(self, attr, self.Composer(loader=self)) + return getattr(self, attr) + + @property + def constructor(self) -> Any: + attr = '_' + sys._getframe().f_code.co_name + if not hasattr(self, attr): + if self.Constructor is None: + if 'full' in self.typ: + raise YAMLError( + "\nyou can only use yaml=YAML(typ='full') for dumping\n", # NOQA + ) + cnst = self.Constructor(preserve_quotes=self.preserve_quotes, loader=self) # type: ignore # NOQA + cnst.allow_duplicate_keys = self.allow_duplicate_keys + setattr(self, attr, cnst) + return getattr(self, attr) + + @property + def resolver(self) -> Any: + try: + rslvr = self._resolver # type: ignore + except AttributeError: + rslvr = None + if rslvr is None or rslvr._loader_version != self.version: + rslvr = self._resolver = self.Resolver(version=self.version, loader=self) + return rslvr + + @property + def emitter(self) -> Any: + attr = '_' + sys._getframe().f_code.co_name + if not hasattr(self, attr): + if self.Emitter is not CEmitter: + _emitter = self.Emitter( + None, + canonical=self.canonical, + indent=self.old_indent, + width=self.width, + allow_unicode=self.allow_unicode, + line_break=self.line_break, + prefix_colon=self.prefix_colon, + brace_single_entry_mapping_in_flow_sequence=self.brace_single_entry_mapping_in_flow_sequence, # NOQA + dumper=self, + ) + setattr(self, attr, _emitter) + if self.map_indent is not None: + _emitter.best_map_indent = self.map_indent + if self.sequence_indent is not None: + _emitter.best_sequence_indent = self.sequence_indent + if self.sequence_dash_offset is not None: + _emitter.sequence_dash_offset = self.sequence_dash_offset + # _emitter.block_seq_indent = self.sequence_dash_offset + if self.compact_seq_seq is not None: + _emitter.compact_seq_seq = self.compact_seq_seq + if self.compact_seq_map is not None: + _emitter.compact_seq_map = self.compact_seq_map + else: + if getattr(self, '_stream', None) is None: + # wait for the stream + return None + return None + return getattr(self, attr) + + @property + def serializer(self) -> Any: + attr = '_' + sys._getframe().f_code.co_name + if not hasattr(self, attr): + setattr( + self, + attr, + self.Serializer( + encoding=self.encoding, + explicit_start=self.explicit_start, + explicit_end=self.explicit_end, + version=self.version, + tags=self.tags, + dumper=self, + ), + ) + return getattr(self, attr) + + @property + def representer(self) -> Any: + attr = '_' + sys._getframe().f_code.co_name + if not hasattr(self, attr): + repres = self.Representer( + default_style=self.default_style, + default_flow_style=self.default_flow_style, + dumper=self, + ) + if self.sort_base_mapping_type_on_output is not None: + repres.sort_base_mapping_type_on_output = self.sort_base_mapping_type_on_output + setattr(self, attr, repres) + return getattr(self, attr) + + def scan(self, stream: StreamTextType) -> Any: + """ + Scan a YAML stream and produce scanning tokens. + """ + if not hasattr(stream, 'read') and hasattr(stream, 'open'): + # pathlib.Path() instance + with stream.open('rb') as fp: + return self.scan(fp) + self.doc_infos.append(DocInfo(requested_version=version(self.version))) + self.tags = {} + _, parser = self.get_constructor_parser(stream) + try: + while self.scanner.check_token(): + yield self.scanner.get_token() + finally: + parser.dispose() + for comp in ('reader', 'scanner'): + try: + getattr(getattr(self, '_' + comp), f'reset_{comp}')() + except AttributeError: + pass + + def parse(self, stream: StreamTextType) -> Any: + """ + Parse a YAML stream and produce parsing events. + """ + if not hasattr(stream, 'read') and hasattr(stream, 'open'): + # pathlib.Path() instance + with stream.open('rb') as fp: + return self.parse(fp) + self.doc_infos.append(DocInfo(requested_version=version(self.version))) + self.tags = {} + _, parser = self.get_constructor_parser(stream) + try: + while parser.check_event(): + yield parser.get_event() + finally: + parser.dispose() + for comp in ('reader', 'scanner'): + try: + getattr(getattr(self, '_' + comp), f'reset_{comp}')() + except AttributeError: + pass + + def compose(self, stream: Union[Path, StreamTextType]) -> Any: + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + if not hasattr(stream, 'read') and hasattr(stream, 'open'): + # pathlib.Path() instance + with stream.open('rb') as fp: + return self.compose(fp) + self.doc_infos.append(DocInfo(requested_version=version(self.version))) + self.tags = {} + constructor, parser = self.get_constructor_parser(stream) + try: + return constructor.composer.get_single_node() + finally: + parser.dispose() + for comp in ('reader', 'scanner'): + try: + getattr(getattr(self, '_' + comp), f'reset_{comp}')() + except AttributeError: + pass + + def compose_all(self, stream: Union[Path, StreamTextType]) -> Any: + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + self.doc_infos.append(DocInfo(requested_version=version(self.version))) + self.tags = {} + constructor, parser = self.get_constructor_parser(stream) + try: + while constructor.composer.check_node(): + yield constructor.composer.get_node() + finally: + parser.dispose() + for comp in ('reader', 'scanner'): + try: + getattr(getattr(self, '_' + comp), f'reset_{comp}')() + except AttributeError: + pass + + # separate output resolver? + + # def load(self, stream=None): + # if self._context_manager: + # if not self._input: + # raise TypeError("Missing input stream while dumping from context manager") + # for data in self._context_manager.load(): + # yield data + # return + # if stream is None: + # raise TypeError("Need a stream argument when not loading from context manager") + # return self.load_one(stream) + + def load(self, stream: Union[Path, StreamTextType]) -> Any: + """ + at this point you either have the non-pure Parser (which has its own reader and + scanner) or you have the pure Parser. + If the pure Parser is set, then set the Reader and Scanner, if not already set. + If either the Scanner or Reader are set, you cannot use the non-pure Parser, + so reset it to the pure parser and set the Reader resp. Scanner if necessary + """ + if not hasattr(stream, 'read') and hasattr(stream, 'open'): + # pathlib.Path() instance + with stream.open('rb') as fp: + return self.load(fp) + self.doc_infos.append(DocInfo(requested_version=version(self.version))) + self.tags = {} + constructor, parser = self.get_constructor_parser(stream) + try: + return constructor.get_single_data() + finally: + parser.dispose() + for comp in ('reader', 'scanner'): + try: + getattr(getattr(self, '_' + comp), f'reset_{comp}')() + except AttributeError: + pass + + def load_all(self, stream: Union[Path, StreamTextType]) -> Any: # *, skip=None): + if not hasattr(stream, 'read') and hasattr(stream, 'open'): + # pathlib.Path() instance + with stream.open('r') as fp: + for d in self.load_all(fp): + yield d + return + # if skip is None: + # skip = [] + # elif isinstance(skip, int): + # skip = [skip] + self.doc_infos.append(DocInfo(requested_version=version(self.version))) + self.tags = {} + constructor, parser = self.get_constructor_parser(stream) + try: + while constructor.check_data(): + yield constructor.get_data() + self.doc_infos.append(DocInfo(requested_version=version(self.version))) + finally: + parser.dispose() + for comp in ('reader', 'scanner'): + try: + getattr(getattr(self, '_' + comp), f'reset_{comp}')() + except AttributeError: + pass + + def get_constructor_parser(self, stream: StreamTextType) -> Any: + """ + the old cyaml needs special setup, and therefore the stream + """ + if self.Constructor is None: + if 'full' in self.typ: + raise YAMLError( + "\nyou can only use yaml=YAML(typ='full') for dumping\n", # NOQA + ) + if self.Parser is not CParser: + if self.Reader is None: + self.Reader = ruamel.yaml.reader.Reader + if self.Scanner is None: + self.Scanner = ruamel.yaml.scanner.Scanner + self.reader.stream = stream + else: + if self.Reader is not None: + if self.Scanner is None: + self.Scanner = ruamel.yaml.scanner.Scanner + self.Parser = ruamel.yaml.parser.Parser + self.reader.stream = stream + elif self.Scanner is not None: + if self.Reader is None: + self.Reader = ruamel.yaml.reader.Reader + self.Parser = ruamel.yaml.parser.Parser + self.reader.stream = stream + else: + # combined C level reader>scanner>parser + # does some calls to the resolver, e.g. BaseResolver.descend_resolver + # if you just initialise the CParser, to much of resolver.py + # is actually used + rslvr = self.Resolver + # if rslvr is ruamel.yaml.resolver.VersionedResolver: + # rslvr = ruamel.yaml.resolver.Resolver + + class XLoader(self.Parser, self.Constructor, rslvr): # type: ignore + def __init__( + selfx, + stream: StreamTextType, + version: Optional[VersionType] = self.version, + preserve_quotes: Optional[bool] = None, + ) -> None: + # NOQA + CParser.__init__(selfx, stream) + selfx._parser = selfx._composer = selfx + self.Constructor.__init__(selfx, loader=selfx) + selfx.allow_duplicate_keys = self.allow_duplicate_keys + rslvr.__init__(selfx, version=version, loadumper=selfx) + + self._stream = stream + loader = XLoader(stream) + return loader, loader + return self.constructor, self.parser + + def emit(self, events: Any, stream: Any) -> None: + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + _, _, emitter = self.get_serializer_representer_emitter(stream, None) + try: + for event in events: + emitter.emit(event) + finally: + try: + emitter.dispose() + except AttributeError: + raise + + def serialize(self, node: Any, stream: Optional[StreamType]) -> Any: + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + self.serialize_all([node], stream) + + def serialize_all(self, nodes: Any, stream: Optional[StreamType]) -> Any: + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + serializer, _, emitter = self.get_serializer_representer_emitter(stream, None) + try: + serializer.open() + for node in nodes: + serializer.serialize(node) + serializer.close() + finally: + try: + emitter.dispose() + except AttributeError: + raise + + def dump( + self: Any, data: Union[Path, StreamType], stream: Any = None, *, transform: Any = None, + ) -> Any: + if self._context_manager: + if not self._output: + raise TypeError('Missing output stream while dumping from context manager') + if transform is not None: + x = self.__class__.__name__ + raise TypeError( + f'{x}.dump() in the context manager cannot have transform keyword', + ) + self._context_manager.dump(data) + else: # old style + if stream is None: + raise TypeError('Need a stream argument when not dumping from context manager') + return self.dump_all([data], stream, transform=transform) + + def dump_all( + self, documents: Any, stream: Union[Path, StreamType], *, transform: Any = None, + ) -> Any: + if self._context_manager: + raise NotImplementedError + self._output = stream + self._context_manager = YAMLContextManager(self, transform=transform) + for data in documents: + self._context_manager.dump(data) + self._context_manager.teardown_output() + self._output = None + self._context_manager = None + + def Xdump_all(self, documents: Any, stream: Any, *, transform: Any = None) -> Any: + """ + Serialize a sequence of Python objects into a YAML stream. + """ + if not hasattr(stream, 'write') and hasattr(stream, 'open'): + # pathlib.Path() instance + with stream.open('w') as fp: + return self.dump_all(documents, fp, transform=transform) + # The stream should have the methods `write` and possibly `flush`. + if self.top_level_colon_align is True: + tlca: Any = max([len(str(x)) for x in documents[0]]) + else: + tlca = self.top_level_colon_align + if transform is not None: + fstream = stream + if self.encoding is None: + stream = StringIO() + else: + stream = BytesIO() + serializer, representer, emitter = self.get_serializer_representer_emitter( + stream, tlca, + ) + try: + self.serializer.open() + for data in documents: + try: + self.representer.represent(data) + except AttributeError: + # nprint(dir(dumper._representer)) + raise + self.serializer.close() + finally: + try: + self.emitter.dispose() + except AttributeError: + raise + # self.dumper.dispose() # cyaml + delattr(self, '_serializer') + delattr(self, '_emitter') + if transform: + val = stream.getvalue() + if self.encoding: + val = val.decode(self.encoding) + if fstream is None: + transform(val) + else: + fstream.write(transform(val)) + return None + + def get_serializer_representer_emitter(self, stream: StreamType, tlca: Any) -> Any: + # we have only .Serializer to deal with (vs .Reader & .Scanner), much simpler + if self.Emitter is not CEmitter: + if self.Serializer is None: + self.Serializer = ruamel.yaml.serializer.Serializer + self.emitter.stream = stream + self.emitter.top_level_colon_align = tlca + if self.scalar_after_indicator is not None: + self.emitter.scalar_after_indicator = self.scalar_after_indicator + return self.serializer, self.representer, self.emitter + if self.Serializer is not None: + # cannot set serializer with CEmitter + self.Emitter = ruamel.yaml.emitter.Emitter + self.emitter.stream = stream + self.emitter.top_level_colon_align = tlca + if self.scalar_after_indicator is not None: + self.emitter.scalar_after_indicator = self.scalar_after_indicator + return self.serializer, self.representer, self.emitter + # C routines + + rslvr = ( + ruamel.yaml.resolver.BaseResolver + if 'base' in self.typ + else ruamel.yaml.resolver.Resolver + ) + + class XDumper(CEmitter, self.Representer, rslvr): # type: ignore + def __init__( + selfx: StreamType, + stream: Any, + default_style: Any = None, + default_flow_style: Any = None, + canonical: Optional[bool] = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + encoding: Any = None, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Any = None, + tags: Any = None, + block_seq_indent: Any = None, + top_level_colon_align: Any = None, + prefix_colon: Any = None, + ) -> None: + # NOQA + CEmitter.__init__( + selfx, + stream, + canonical=canonical, + indent=indent, + width=width, + encoding=encoding, + allow_unicode=allow_unicode, + line_break=line_break, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, + tags=tags, + ) + selfx._emitter = selfx._serializer = selfx._representer = selfx + self.Representer.__init__( + selfx, default_style=default_style, default_flow_style=default_flow_style, + ) + rslvr.__init__(selfx) + + self._stream = stream + dumper = XDumper( + stream, + default_style=self.default_style, + default_flow_style=self.default_flow_style, + canonical=self.canonical, + indent=self.old_indent, + width=self.width, + allow_unicode=self.allow_unicode, + line_break=self.line_break, + encoding=self.encoding, + explicit_start=self.explicit_start, + explicit_end=self.explicit_end, + version=self.version, + tags=self.tags, + ) + self._emitter = self._serializer = dumper + return dumper, dumper, dumper + + # basic types + def map(self, **kw: Any) -> Any: + if 'rt' in self.typ: + return CommentedMap(**kw) + else: + return dict(**kw) + + def seq(self, *args: Any) -> Any: + if 'rt' in self.typ: + return CommentedSeq(*args) + else: + return list(*args) + + # helpers + def official_plug_ins(self) -> Any: + """search for list of subdirs that are plug-ins, if __file__ is not available, e.g. + single file installers that are not properly emulating a file-system (issue 324) + no plug-ins will be found. If any are packaged, you know which file that are + and you can explicitly provide it during instantiation: + yaml = ruamel.yaml.YAML(plug_ins=['ruamel/yaml/jinja2/__plug_in__']) + """ + try: + bd = os.path.dirname(__file__) + except NameError: + return [] + gpbd = os.path.dirname(os.path.dirname(bd)) + res = [x.replace(gpbd, "")[1:-3] for x in glob.glob(bd + '/*/__plug_in__.py')] + return res + + def register_class(self, cls: Any) -> Any: + """ + register a class for dumping/loading + - if it has attribute yaml_tag use that to register, else use class name + - if it has methods to_yaml/from_yaml use those to dump/load else dump attributes + as mapping + """ + tag = getattr(cls, 'yaml_tag', '!' + cls.__name__) + try: + self.representer.add_representer(cls, cls.to_yaml) + except AttributeError: + + def t_y(representer: Any, data: Any) -> Any: + return representer.represent_yaml_object( + tag, data, cls, flow_style=representer.default_flow_style, + ) + + self.representer.add_representer(cls, t_y) + try: + self.constructor.add_constructor(tag, cls.from_yaml) + except AttributeError: + + def f_y(constructor: Any, node: Any) -> Any: + return constructor.construct_yaml_object(node, cls) + + self.constructor.add_constructor(tag, f_y) + return cls + + # ### context manager + + def __enter__(self) -> Any: + self._context_manager = YAMLContextManager(self) + return self + + def __exit__( + self, + typ: Optional[Type[BaseException]], + value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> None: + if typ: + nprint('typ', typ) + self._context_manager.teardown_output() + # self._context_manager.teardown_input() + self._context_manager = None + + # ### backwards compatibility + def _indent(self, mapping: Any = None, sequence: Any = None, offset: Any = None) -> None: + if mapping is not None: + self.map_indent = mapping + if sequence is not None: + self.sequence_indent = sequence + if offset is not None: + self.sequence_dash_offset = offset + + @property + def version(self) -> Optional[Tuple[int, int]]: + return self._version + + @version.setter + def version(self, val: VersionType) -> None: + if val is None: + self._version = val + return + elif isinstance(val, str): + sval = tuple(int(x) for x in val.split('.')) + elif isinstance(val, (list, tuple)): + sval = tuple(int(x) for x in val) + elif isinstance(val, Version): + sval = (val.major, val.minor) + else: + raise TypeError(f'unknown version type {type(val)}') + assert len(sval) == 2, f'version can only have major.minor, got {val}' + assert sval[0] == 1, f'version major part can only be 1, got {val}' + assert sval[1] in [1, 2], f'version minor part can only be 2 or 1, got {val}' + self._version = sval + + @property + def tags(self) -> Any: + return self._tags + + @tags.setter + def tags(self, val: Any) -> None: + self._tags = val + + @property + def indent(self) -> Any: + return self._indent + + @indent.setter + def indent(self, val: Any) -> None: + self.old_indent = val + + @property + def block_seq_indent(self) -> Any: + return self.sequence_dash_offset + + @block_seq_indent.setter + def block_seq_indent(self, val: Any) -> None: + self.sequence_dash_offset = val + + def compact(self, seq_seq: Any = None, seq_map: Any = None) -> None: + self.compact_seq_seq = seq_seq + self.compact_seq_map = seq_map + + +class YAMLContextManager: + def __init__(self, yaml: Any, transform: Any = None) -> None: + # used to be: (Any, Optional[Callable]) -> None + self._yaml = yaml + self._output_inited = False + self._output_path = None + self._output = self._yaml._output + self._transform = transform + + # self._input_inited = False + # self._input = input + # self._input_path = None + # self._transform = yaml.transform + # self._fstream = None + + if not hasattr(self._output, 'write') and hasattr(self._output, 'open'): + # pathlib.Path() instance, open with the same mode + self._output_path = self._output + self._output = self._output_path.open('w') + + # if not hasattr(self._stream, 'write') and hasattr(stream, 'open'): + # if not hasattr(self._input, 'read') and hasattr(self._input, 'open'): + # # pathlib.Path() instance, open with the same mode + # self._input_path = self._input + # self._input = self._input_path.open('r') + + if self._transform is not None: + self._fstream = self._output + if self._yaml.encoding is None: + self._output = StringIO() + else: + self._output = BytesIO() + + def teardown_output(self) -> None: + if self._output_inited: + self._yaml.serializer.close() + else: + return + try: + self._yaml.emitter.dispose() + except AttributeError: + raise + # self.dumper.dispose() # cyaml + try: + delattr(self._yaml, '_serializer') + delattr(self._yaml, '_emitter') + except AttributeError: + raise + if self._transform: + val = self._output.getvalue() + if self._yaml.encoding: + val = val.decode(self._yaml.encoding) + if self._fstream is None: + self._transform(val) + else: + self._fstream.write(self._transform(val)) + self._fstream.flush() + self._output = self._fstream # maybe not necessary + if self._output_path is not None: + self._output.close() + + def init_output(self, first_data: Any) -> None: + if self._yaml.top_level_colon_align is True: + tlca: Any = max([len(str(x)) for x in first_data]) + else: + tlca = self._yaml.top_level_colon_align + self._yaml.get_serializer_representer_emitter(self._output, tlca) + self._yaml.serializer.open() + self._output_inited = True + + def dump(self, data: Any) -> None: + if not self._output_inited: + self.init_output(data) + try: + self._yaml.representer.represent(data) + except AttributeError: + # nprint(dir(dumper._representer)) + raise + + # def teardown_input(self): + # pass + # + # def init_input(self): + # # set the constructor and parser on YAML() instance + # self._yaml.get_constructor_parser(stream) + # + # def load(self): + # if not self._input_inited: + # self.init_input() + # try: + # while self._yaml.constructor.check_data(): + # yield self._yaml.constructor.get_data() + # finally: + # parser.dispose() + # try: + # self._reader.reset_reader() # type: ignore + # except AttributeError: + # pass + # try: + # self._scanner.reset_scanner() # type: ignore + # except AttributeError: + # pass + + +def yaml_object(yml: Any) -> Any: + """ decorator for classes that needs to dump/load objects + The tag for such objects is taken from the class attribute yaml_tag (or the + class name in lowercase in case unavailable) + If methods to_yaml and/or from_yaml are available, these are called for dumping resp. + loading, default routines (dumping a mapping of the attributes) used otherwise. + """ + + def yo_deco(cls: Any) -> Any: + tag = getattr(cls, 'yaml_tag', '!' + cls.__name__) + try: + yml.representer.add_representer(cls, cls.to_yaml) + except AttributeError: + + def t_y(representer: Any, data: Any) -> Any: + return representer.represent_yaml_object( + tag, data, cls, flow_style=representer.default_flow_style, + ) + + yml.representer.add_representer(cls, t_y) + try: + yml.constructor.add_constructor(tag, cls.from_yaml) + except AttributeError: + + def f_y(constructor: Any, node: Any) -> Any: + return constructor.construct_yaml_object(node, cls) + + yml.constructor.add_constructor(tag, f_y) + return cls + + return yo_deco + + +######################################################################################## +def warn_deprecation(fun: Any, method: Any, arg: str = '') -> None: + warnings.warn( + f'\n{fun} will be removed, use\n\n yaml=YAML({arg})\n yaml.{method}(...)\n\ninstead', # NOQA + PendingDeprecationWarning, # this will show when testing with pytest/tox + stacklevel=3, + ) + + +def error_deprecation(fun: Any, method: Any, arg: str = '', comment: str = 'instead of') -> None: # NOQA + import inspect + + s = f'\n"{fun}()" has been removed, use\n\n yaml = YAML({arg})\n yaml.{method}(...)\n\n{comment}' # NOQA + try: + info = inspect.getframeinfo(inspect.stack()[2][0]) + context = '' if info.code_context is None else "".join(info.code_context) + s += f' file "{info.filename}", line {info.lineno}\n\n{context}' + except Exception as e: + _ = e + s += '\n' + if sys.version_info < (3, 10): + raise AttributeError(s) + else: + raise AttributeError(s, name=None) + + +_error_dep_arg = "typ='rt'" +_error_dep_comment = "and register any classes that you use, or check the tag attribute on the loaded data,\ninstead of" # NOQA + +######################################################################################## + + +def scan(stream: StreamTextType, Loader: Any = Loader) -> Any: + """ + Scan a YAML stream and produce scanning tokens. + """ + error_deprecation('scan', 'scan', arg=_error_dep_arg, comment=_error_dep_comment) + + +def parse(stream: StreamTextType, Loader: Any = Loader) -> Any: + """ + Parse a YAML stream and produce parsing events. + """ + error_deprecation('parse', 'parse', arg=_error_dep_arg, comment=_error_dep_comment) + + +def compose(stream: StreamTextType, Loader: Any = Loader) -> Any: + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + error_deprecation('compose', 'compose', arg=_error_dep_arg, comment=_error_dep_comment) + + +def compose_all(stream: StreamTextType, Loader: Any = Loader) -> Any: + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + error_deprecation('compose', 'compose', arg=_error_dep_arg, comment=_error_dep_comment) + + +def load( + stream: Any, Loader: Any = None, version: Any = None, preserve_quotes: Any = None, +) -> Any: + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + error_deprecation('load', 'load', arg=_error_dep_arg, comment=_error_dep_comment) + + +def load_all( + stream: Any, Loader: Any = None, version: Any = None, preserve_quotes: Any = None, +) -> Any: + # NOQA + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + error_deprecation('load_all', 'load_all', arg=_error_dep_arg, comment=_error_dep_comment) + + +def safe_load(stream: StreamTextType, version: Optional[VersionType] = None) -> Any: + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + Resolve only basic YAML tags. + """ + error_deprecation('safe_load', 'load', arg="typ='safe', pure=True") + + +def safe_load_all(stream: StreamTextType, version: Optional[VersionType] = None) -> Any: + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + Resolve only basic YAML tags. + """ + error_deprecation('safe_load_all', 'load_all', arg="typ='safe', pure=True") + + +def round_trip_load( + stream: StreamTextType, + version: Optional[VersionType] = None, + preserve_quotes: Optional[bool] = None, +) -> Any: + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + Resolve only basic YAML tags. + """ + error_deprecation('round_trip_load_all', 'load') + + +def round_trip_load_all( + stream: StreamTextType, + version: Optional[VersionType] = None, + preserve_quotes: Optional[bool] = None, +) -> Any: + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + Resolve only basic YAML tags. + """ + error_deprecation('round_trip_load_all', 'load_all') + + +def emit( + events: Any, + stream: Optional[StreamType] = None, + Dumper: Any = Dumper, + canonical: Optional[bool] = None, + indent: Union[int, None] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, +) -> Any: + # NOQA + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + error_deprecation('emit', 'emit', arg="typ='safe', pure=True") + + +enc = None + + +def serialize_all( + nodes: Any, + stream: Optional[StreamType] = None, + Dumper: Any = Dumper, + canonical: Any = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + encoding: Any = enc, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Optional[VersionType] = None, + tags: Any = None, +) -> Any: + # NOQA + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + error_deprecation('serialize_all', 'serialize_all', arg="typ='safe', pure=True") + + +def serialize( + node: Any, stream: Optional[StreamType] = None, Dumper: Any = Dumper, **kwds: Any, +) -> Any: + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + error_deprecation('serialize', 'serialize', arg="typ='safe', pure=True") + + +def dump_all( + documents: Any, + stream: Optional[StreamType] = None, + Dumper: Any = Dumper, + default_style: Any = None, + default_flow_style: Any = None, + canonical: Optional[bool] = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + encoding: Any = enc, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Any = None, + tags: Any = None, + block_seq_indent: Any = None, + top_level_colon_align: Any = None, + prefix_colon: Any = None, +) -> Any: + # NOQA + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + error_deprecation('dump_all', 'dump_all', arg="typ='unsafe', pure=True") + + +def dump( + data: Any, + stream: Optional[StreamType] = None, + Dumper: Any = Dumper, + default_style: Any = None, + default_flow_style: Any = None, + canonical: Optional[bool] = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + encoding: Any = enc, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Optional[VersionType] = None, + tags: Any = None, + block_seq_indent: Any = None, +) -> Any: + # NOQA + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + + default_style ∈ None, '', '"', "'", '|', '>' + + """ + error_deprecation('dump', 'dump', arg="typ='unsafe', pure=True") + + +def safe_dump(data: Any, stream: Optional[StreamType] = None, **kwds: Any) -> Any: + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + error_deprecation('safe_dump', 'dump', arg="typ='safe', pure=True") + + +def round_trip_dump( + data: Any, + stream: Optional[StreamType] = None, + Dumper: Any = RoundTripDumper, + default_style: Any = None, + default_flow_style: Any = None, + canonical: Optional[bool] = None, + indent: Optional[int] = None, + width: Optional[int] = None, + allow_unicode: Optional[bool] = None, + line_break: Any = None, + encoding: Any = enc, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Optional[VersionType] = None, + tags: Any = None, + block_seq_indent: Any = None, + top_level_colon_align: Any = None, + prefix_colon: Any = None, +) -> Any: + allow_unicode = True if allow_unicode is None else allow_unicode + error_deprecation('round_trip_dump', 'dump') + + +# Loader/Dumper are no longer composites, to get to the associated +# Resolver()/Representer(), etc., you need to instantiate the class + + +def add_implicit_resolver( + tag: Any, + regexp: Any, + first: Any = None, + Loader: Any = None, + Dumper: Any = None, + resolver: Any = Resolver, +) -> None: + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + if Loader is None and Dumper is None: + resolver.add_implicit_resolver(tag, regexp, first) + return + if Loader: + if hasattr(Loader, 'add_implicit_resolver'): + Loader.add_implicit_resolver(tag, regexp, first) + elif issubclass( + Loader, (BaseLoader, SafeLoader, ruamel.yaml.loader.Loader, RoundTripLoader), + ): + Resolver.add_implicit_resolver(tag, regexp, first) + else: + raise NotImplementedError + if Dumper: + if hasattr(Dumper, 'add_implicit_resolver'): + Dumper.add_implicit_resolver(tag, regexp, first) + elif issubclass( + Dumper, (BaseDumper, SafeDumper, ruamel.yaml.dumper.Dumper, RoundTripDumper), + ): + Resolver.add_implicit_resolver(tag, regexp, first) + else: + raise NotImplementedError + + +# this code currently not tested +def add_path_resolver( + tag: Any, + path: Any, + kind: Any = None, + Loader: Any = None, + Dumper: Any = None, + resolver: Any = Resolver, +) -> None: + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + if Loader is None and Dumper is None: + resolver.add_path_resolver(tag, path, kind) + return + if Loader: + if hasattr(Loader, 'add_path_resolver'): + Loader.add_path_resolver(tag, path, kind) + elif issubclass( + Loader, (BaseLoader, SafeLoader, ruamel.yaml.loader.Loader, RoundTripLoader), + ): + Resolver.add_path_resolver(tag, path, kind) + else: + raise NotImplementedError + if Dumper: + if hasattr(Dumper, 'add_path_resolver'): + Dumper.add_path_resolver(tag, path, kind) + elif issubclass( + Dumper, (BaseDumper, SafeDumper, ruamel.yaml.dumper.Dumper, RoundTripDumper), + ): + Resolver.add_path_resolver(tag, path, kind) + else: + raise NotImplementedError + + +def add_constructor( + tag: Any, object_constructor: Any, Loader: Any = None, constructor: Any = Constructor, +) -> None: + """ + Add an object constructor for the given tag. + object_onstructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + if Loader is None: + constructor.add_constructor(tag, object_constructor) + else: + if hasattr(Loader, 'add_constructor'): + Loader.add_constructor(tag, object_constructor) + return + if issubclass(Loader, BaseLoader): + BaseConstructor.add_constructor(tag, object_constructor) + elif issubclass(Loader, SafeLoader): + SafeConstructor.add_constructor(tag, object_constructor) + elif issubclass(Loader, Loader): + Constructor.add_constructor(tag, object_constructor) + elif issubclass(Loader, RoundTripLoader): + RoundTripConstructor.add_constructor(tag, object_constructor) + else: + raise NotImplementedError + + +def add_multi_constructor( + tag_prefix: Any, multi_constructor: Any, Loader: Any = None, constructor: Any = Constructor, # NOQA +) -> None: + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + if Loader is None: + constructor.add_multi_constructor(tag_prefix, multi_constructor) + else: + if False and hasattr(Loader, 'add_multi_constructor'): + Loader.add_multi_constructor(tag_prefix, constructor) + return + if issubclass(Loader, BaseLoader): + BaseConstructor.add_multi_constructor(tag_prefix, multi_constructor) + elif issubclass(Loader, SafeLoader): + SafeConstructor.add_multi_constructor(tag_prefix, multi_constructor) + elif issubclass(Loader, ruamel.yaml.loader.Loader): + Constructor.add_multi_constructor(tag_prefix, multi_constructor) + elif issubclass(Loader, RoundTripLoader): + RoundTripConstructor.add_multi_constructor(tag_prefix, multi_constructor) + else: + raise NotImplementedError + + +def add_representer( + data_type: Any, object_representer: Any, Dumper: Any = None, representer: Any = Representer, # NOQA +) -> None: + """ + Add a representer for the given type. + object_representer is a function accepting a Dumper instance + and an instance of the given data type + and producing the corresponding representation node. + """ + if Dumper is None: + representer.add_representer(data_type, object_representer) + else: + if hasattr(Dumper, 'add_representer'): + Dumper.add_representer(data_type, object_representer) + return + if issubclass(Dumper, BaseDumper): + BaseRepresenter.add_representer(data_type, object_representer) + elif issubclass(Dumper, SafeDumper): + SafeRepresenter.add_representer(data_type, object_representer) + elif issubclass(Dumper, Dumper): + Representer.add_representer(data_type, object_representer) + elif issubclass(Dumper, RoundTripDumper): + RoundTripRepresenter.add_representer(data_type, object_representer) + else: + raise NotImplementedError + + +# this code currently not tested +def add_multi_representer( + data_type: Any, multi_representer: Any, Dumper: Any = None, representer: Any = Representer, +) -> None: + """ + Add a representer for the given type. + multi_representer is a function accepting a Dumper instance + and an instance of the given data type or subtype + and producing the corresponding representation node. + """ + if Dumper is None: + representer.add_multi_representer(data_type, multi_representer) + else: + if hasattr(Dumper, 'add_multi_representer'): + Dumper.add_multi_representer(data_type, multi_representer) + return + if issubclass(Dumper, BaseDumper): + BaseRepresenter.add_multi_representer(data_type, multi_representer) + elif issubclass(Dumper, SafeDumper): + SafeRepresenter.add_multi_representer(data_type, multi_representer) + elif issubclass(Dumper, Dumper): + Representer.add_multi_representer(data_type, multi_representer) + elif issubclass(Dumper, RoundTripDumper): + RoundTripRepresenter.add_multi_representer(data_type, multi_representer) + else: + raise NotImplementedError + + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + + def __init__(cls, name: Any, bases: Any, kwds: Any) -> None: + super().__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + cls.yaml_constructor.add_constructor(cls.yaml_tag, cls.from_yaml) # type: ignore + cls.yaml_representer.add_representer(cls, cls.to_yaml) # type: ignore + + +class YAMLObject(with_metaclass(YAMLObjectMetaclass)): # type: ignore + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + + __slots__ = () # no direct instantiation, so allow immutable subclasses + + yaml_constructor = Constructor + yaml_representer = Representer + + yaml_tag: Any = None + yaml_flow_style: Any = None + + @classmethod + def from_yaml(cls, constructor: Any, node: Any) -> Any: + """ + Convert a representation node to a Python object. + """ + return constructor.construct_yaml_object(node, cls) + + @classmethod + def to_yaml(cls, representer: Any, data: Any) -> Any: + """ + Convert a Python object to a representation node. + """ + return representer.represent_yaml_object( + cls.yaml_tag, data, cls, flow_style=cls.yaml_flow_style, + ) diff --git a/nodes.py b/nodes.py new file mode 100644 index 0000000..1721049 --- /dev/null +++ b/nodes.py @@ -0,0 +1,145 @@ +# coding: utf-8 + +import sys + +from typing import Dict, Any, Text, Optional # NOQA +from ruamel.yaml.tag import Tag + + +class Node: + __slots__ = 'ctag', 'value', 'start_mark', 'end_mark', 'comment', 'anchor' + + def __init__( + self, + tag: Any, + value: Any, + start_mark: Any, + end_mark: Any, + comment: Any = None, + anchor: Any = None, + ) -> None: + # you can still get a string from the serializer + self.ctag = tag if isinstance(tag, Tag) else Tag(suffix=tag) + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.comment = comment + self.anchor = anchor + + @property + def tag(self) -> Optional[str]: + return None if self.ctag is None else str(self.ctag) + + @tag.setter + def tag(self, val: Any) -> None: + if isinstance(val, str): + val = Tag(suffix=val) + self.ctag = val + + def __repr__(self) -> Any: + value = self.value + # if isinstance(value, list): + # if len(value) == 0: + # value = '<empty>' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = f'<{len(value)} items>' + # else: + # if len(value) > 75: + # value = repr(value[:70]+' ... ') + # else: + # value = repr(value) + value = repr(value) + return f'{self.__class__.__name__!s}(tag={self.tag!r}, value={value!s})' + + def dump(self, indent: int = 0) -> None: + xx = self.__class__.__name__ + xi = ' ' * indent + if isinstance(self.value, str): + sys.stdout.write(f'{xi}{xx}(tag={self.tag!r}, value={self.value!r})\n') + if self.comment: + sys.stdout.write(f' {xi}comment: {self.comment})\n') + return + sys.stdout.write(f'{xi}{xx}(tag={self.tag!r})\n') + if self.comment: + sys.stdout.write(f' {xi}comment: {self.comment})\n') + for v in self.value: + if isinstance(v, tuple): + for v1 in v: + v1.dump(indent + 1) + elif isinstance(v, Node): + v.dump(indent + 1) + else: + sys.stdout.write(f'Node value type? {type(v)}\n') + + +class ScalarNode(Node): + """ + styles: + ? -> set() ? key, no value + - -> suppressable null value in set + " -> double quoted + ' -> single quoted + | -> literal style + > -> folding style + """ + + __slots__ = ('style',) + id = 'scalar' + + def __init__( + self, + tag: Any, + value: Any, + start_mark: Any = None, + end_mark: Any = None, + style: Any = None, + comment: Any = None, + anchor: Any = None, + ) -> None: + Node.__init__(self, tag, value, start_mark, end_mark, comment=comment, anchor=anchor) + self.style = style + + +class CollectionNode(Node): + __slots__ = ('flow_style',) + + def __init__( + self, + tag: Any, + value: Any, + start_mark: Any = None, + end_mark: Any = None, + flow_style: Any = None, + comment: Any = None, + anchor: Any = None, + ) -> None: + Node.__init__(self, tag, value, start_mark, end_mark, comment=comment) + self.flow_style = flow_style + self.anchor = anchor + + +class SequenceNode(CollectionNode): + __slots__ = () + id = 'sequence' + + +class MappingNode(CollectionNode): + __slots__ = ('merge',) + id = 'mapping' + + def __init__( + self, + tag: Any, + value: Any, + start_mark: Any = None, + end_mark: Any = None, + flow_style: Any = None, + comment: Any = None, + anchor: Any = None, + ) -> None: + CollectionNode.__init__( + self, tag, value, start_mark, end_mark, flow_style, comment, anchor, + ) + self.merge = None diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..b031aa6 --- /dev/null +++ b/parser.py @@ -0,0 +1,860 @@ +# coding: utf-8 + +# The following YAML grammar is LL(1) and is parsed by a recursive descent +# parser. +# +# stream ::= STREAM-START implicit_document? explicit_document* +# STREAM-END +# implicit_document ::= block_node DOCUMENT-END* +# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +# block_node_or_indentless_sequence ::= +# ALIAS +# | properties (block_content | +# indentless_block_sequence)? +# | block_content +# | indentless_block_sequence +# block_node ::= ALIAS +# | properties block_content? +# | block_content +# flow_node ::= ALIAS +# | properties flow_content? +# | flow_content +# properties ::= TAG ANCHOR? | ANCHOR TAG? +# block_content ::= block_collection | flow_collection | SCALAR +# flow_content ::= flow_collection | SCALAR +# block_collection ::= block_sequence | block_mapping +# flow_collection ::= flow_sequence | flow_mapping +# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* +# BLOCK-END +# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +# block_mapping ::= BLOCK-MAPPING_START +# ((KEY block_node_or_indentless_sequence?)? +# (VALUE block_node_or_indentless_sequence?)?)* +# BLOCK-END +# flow_sequence ::= FLOW-SEQUENCE-START +# (flow_sequence_entry FLOW-ENTRY)* +# flow_sequence_entry? +# FLOW-SEQUENCE-END +# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# flow_mapping ::= FLOW-MAPPING-START +# (flow_mapping_entry FLOW-ENTRY)* +# flow_mapping_entry? +# FLOW-MAPPING-END +# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# +# FIRST sets: +# +# stream: { STREAM-START <} +# explicit_document: { DIRECTIVE DOCUMENT-START } +# implicit_document: FIRST(block_node) +# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START +# FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_sequence: { BLOCK-SEQUENCE-START } +# block_mapping: { BLOCK-MAPPING-START } +# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR +# BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START +# FLOW-MAPPING-START BLOCK-ENTRY } +# indentless_sequence: { ENTRY } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_sequence: { FLOW-SEQUENCE-START } +# flow_mapping: { FLOW-MAPPING-START } +# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START +# FLOW-MAPPING-START KEY } +# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START +# FLOW-MAPPING-START KEY } + +# need to have full path with import, as pkg_resources tries to load parser.py in __init__.py +# only to not do anything with the package afterwards +# and for Jython too + + +from ruamel.yaml.error import MarkedYAMLError +from ruamel.yaml.tokens import * # NOQA +from ruamel.yaml.events import * # NOQA +from ruamel.yaml.scanner import Scanner, RoundTripScanner, ScannerError # NOQA +from ruamel.yaml.scanner import BlankLineComment +from ruamel.yaml.comments import C_PRE, C_POST, C_SPLIT_ON_FIRST_BLANK +from ruamel.yaml.compat import nprint, nprintf # NOQA +from ruamel.yaml.tag import Tag + +from typing import Any, Dict, Optional, List, Optional # NOQA + +__all__ = ['Parser', 'RoundTripParser', 'ParserError'] + + +def xprintf(*args: Any, **kw: Any) -> Any: + return nprintf(*args, **kw) + pass + + +class ParserError(MarkedYAMLError): + pass + + +class Parser: + # Since writing a recursive-descendant parser is a straightforward task, we + # do not give many comments here. + + DEFAULT_TAGS = {'!': '!', '!!': 'tag:yaml.org,2002:'} + + def __init__(self, loader: Any) -> None: + self.loader = loader + if self.loader is not None and getattr(self.loader, '_parser', None) is None: + self.loader._parser = self + self.reset_parser() + + def reset_parser(self) -> None: + # Reset the state attributes (to clear self-references) + self.current_event = self.last_event = None + self.tag_handles: Dict[Any, Any] = {} + self.states: List[Any] = [] + self.marks: List[Any] = [] + self.state: Any = self.parse_stream_start + + def dispose(self) -> None: + self.reset_parser() + + @property + def scanner(self) -> Any: + if hasattr(self.loader, 'typ'): + return self.loader.scanner + return self.loader._scanner + + @property + def resolver(self) -> Any: + if hasattr(self.loader, 'typ'): + return self.loader.resolver + return self.loader._resolver + + def check_event(self, *choices: Any) -> bool: + # Check the type of the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + if self.current_event is not None: + if not choices: + return True + for choice in choices: + if isinstance(self.current_event, choice): + return True + return False + + def peek_event(self) -> Any: + # Get the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + return self.current_event + + def get_event(self) -> Any: + # Get the next event and proceed further. + if self.current_event is None: + if self.state: + self.current_event = self.state() + # assert self.current_event is not None + # if self.current_event.end_mark.line != self.peek_event().start_mark.line: + xprintf('get_event', repr(self.current_event), self.peek_event().start_mark.line) + self.last_event = value = self.current_event + self.current_event = None + return value + + # stream ::= STREAM-START implicit_document? explicit_document* + # STREAM-END + # implicit_document ::= block_node DOCUMENT-END* + # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + + def parse_stream_start(self) -> Any: + # Parse the stream start. + token = self.scanner.get_token() + self.move_token_comment(token) + event = StreamStartEvent(token.start_mark, token.end_mark, encoding=token.encoding) + + # Prepare the next state. + self.state = self.parse_implicit_document_start + + return event + + def parse_implicit_document_start(self) -> Any: + # Parse an implicit document. + if not self.scanner.check_token(DirectiveToken, DocumentStartToken, StreamEndToken): + # don't need copy, as an implicit tag doesn't add tag_handles + self.tag_handles = self.DEFAULT_TAGS + token = self.scanner.peek_token() + start_mark = end_mark = token.start_mark + event = DocumentStartEvent(start_mark, end_mark, explicit=False) + + # Prepare the next state. + self.states.append(self.parse_document_end) + self.state = self.parse_block_node + + return event + + else: + return self.parse_document_start() + + def parse_document_start(self) -> Any: + # Parse any extra document end indicators. + while self.scanner.check_token(DocumentEndToken): + self.scanner.get_token() + # Parse an explicit document. + if not self.scanner.check_token(StreamEndToken): + version, tags = self.process_directives() + if not self.scanner.check_token(DocumentStartToken): + raise ParserError( + None, + None, + "expected '<document start>', " + f'but found {self.scanner.peek_token().id,!r}', + self.scanner.peek_token().start_mark, + ) + token = self.scanner.get_token() + start_mark = token.start_mark + end_mark = token.end_mark + # if self.loader is not None and \ + # end_mark.line != self.scanner.peek_token().start_mark.line: + # self.loader.scalar_after_indicator = False + event: Any = DocumentStartEvent( + start_mark, + end_mark, + explicit=True, + version=version, + tags=tags, + comment=token.comment, + ) + self.states.append(self.parse_document_end) + self.state = self.parse_document_content + else: + # Parse the end of the stream. + token = self.scanner.get_token() + event = StreamEndEvent(token.start_mark, token.end_mark, comment=token.comment) + assert not self.states + assert not self.marks + self.state = None + return event + + def parse_document_end(self) -> Any: + # Parse the document end. + token = self.scanner.peek_token() + start_mark = end_mark = token.start_mark + explicit = False + if self.scanner.check_token(DocumentEndToken): + token = self.scanner.get_token() + # if token.end_mark.line != self.peek_event().start_mark.line: + pt = self.scanner.peek_token() + if not isinstance(pt, StreamEndToken) and ( + token.end_mark.line == pt.start_mark.line + ): + raise ParserError( + None, + None, + 'found non-comment content after document end marker, ' + f'{self.scanner.peek_token().id,!r}', + self.scanner.peek_token().start_mark, + ) + end_mark = token.end_mark + explicit = True + event = DocumentEndEvent(start_mark, end_mark, explicit=explicit) + + # Prepare the next state. + if self.resolver.processing_version == (1, 1): + self.state = self.parse_document_start + else: + if explicit: + # found a document end marker, can be followed by implicit document + self.state = self.parse_implicit_document_start + else: + self.state = self.parse_document_start + + return event + + def parse_document_content(self) -> Any: + if self.scanner.check_token( + DirectiveToken, DocumentStartToken, DocumentEndToken, StreamEndToken, + ): + event = self.process_empty_scalar(self.scanner.peek_token().start_mark) + self.state = self.states.pop() + return event + else: + return self.parse_block_node() + + def process_directives(self) -> Any: + yaml_version = None + self.tag_handles = {} + while self.scanner.check_token(DirectiveToken): + token = self.scanner.get_token() + if token.name == 'YAML': + if yaml_version is not None: + raise ParserError( + None, None, 'found duplicate YAML directive', token.start_mark, + ) + major, minor = token.value + if major != 1: + raise ParserError( + None, + None, + 'found incompatible YAML document (version 1.* is required)', + token.start_mark, + ) + yaml_version = token.value + elif token.name == 'TAG': + handle, prefix = token.value + if handle in self.tag_handles: + raise ParserError( + None, None, f'duplicate tag handle {handle!r}', token.start_mark, + ) + self.tag_handles[handle] = prefix + if bool(self.tag_handles): + value: Any = (yaml_version, self.tag_handles.copy()) + else: + value = yaml_version, None + if self.loader is not None and hasattr(self.loader, 'tags'): + # ToDo: this is used to keep a single loaded file from losing its version + # info, but it affects following versions that have no explicit directive + self.loader.version = yaml_version + if self.loader.tags is None: + self.loader.tags = {} + for k in self.tag_handles: + self.loader.tags[k] = self.tag_handles[k] + self.loader.doc_infos[-1].tags.append((k, self.tag_handles[k])) + for key in self.DEFAULT_TAGS: + if key not in self.tag_handles: + self.tag_handles[key] = self.DEFAULT_TAGS[key] + return value + + # block_node_or_indentless_sequence ::= ALIAS + # | properties (block_content | indentless_block_sequence)? + # | block_content + # | indentless_block_sequence + # block_node ::= ALIAS + # | properties block_content? + # | block_content + # flow_node ::= ALIAS + # | properties flow_content? + # | flow_content + # properties ::= TAG ANCHOR? | ANCHOR TAG? + # block_content ::= block_collection | flow_collection | SCALAR + # flow_content ::= flow_collection | SCALAR + # block_collection ::= block_sequence | block_mapping + # flow_collection ::= flow_sequence | flow_mapping + + def parse_block_node(self) -> Any: + return self.parse_node(block=True) + + def parse_flow_node(self) -> Any: + return self.parse_node() + + def parse_block_node_or_indentless_sequence(self) -> Any: + return self.parse_node(block=True, indentless_sequence=True) + + # def transform_tag(self, handle: Any, suffix: Any) -> Any: + # return self.tag_handles[handle] + suffix + + def select_tag_transform(self, tag: Tag) -> None: + if tag is None: + return + tag.select_transform(False) + + def parse_node(self, block: bool = False, indentless_sequence: bool = False) -> Any: + if self.scanner.check_token(AliasToken): + token = self.scanner.get_token() + event: Any = AliasEvent(token.value, token.start_mark, token.end_mark) + self.state = self.states.pop() + return event + + anchor = None + tag = None + start_mark = end_mark = tag_mark = None + if self.scanner.check_token(AnchorToken): + token = self.scanner.get_token() + self.move_token_comment(token) + start_mark = token.start_mark + end_mark = token.end_mark + anchor = token.value + if self.scanner.check_token(TagToken): + token = self.scanner.get_token() + tag_mark = token.start_mark + end_mark = token.end_mark + # tag = token.value + tag = Tag( + handle=token.value[0], suffix=token.value[1], handles=self.tag_handles, + ) + elif self.scanner.check_token(TagToken): + token = self.scanner.get_token() + try: + self.move_token_comment(token) + except NotImplementedError: + pass + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark + # tag = token.value + tag = Tag(handle=token.value[0], suffix=token.value[1], handles=self.tag_handles) + if self.scanner.check_token(AnchorToken): + token = self.scanner.get_token() + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark + anchor = token.value + if tag is not None: + self.select_tag_transform(tag) + if tag.check_handle(): + raise ParserError( + 'while parsing a node', + start_mark, + f'found undefined tag handle {tag.handle!r}', + tag_mark, + ) + if start_mark is None: + start_mark = end_mark = self.scanner.peek_token().start_mark + event = None + implicit = tag is None or str(tag) == '!' + if indentless_sequence and self.scanner.check_token(BlockEntryToken): + comment = None + pt = self.scanner.peek_token() + if self.loader and self.loader.comment_handling is None: + if pt.comment and pt.comment[0]: + comment = [pt.comment[0], []] + pt.comment[0] = None + elif pt.comment and pt.comment[0] is None and pt.comment[1]: + comment = [None, pt.comment[1]] + pt.comment[1] = None + elif self.loader: + if pt.comment: + comment = pt.comment + end_mark = self.scanner.peek_token().end_mark + event = SequenceStartEvent( + anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment, + ) + self.state = self.parse_indentless_sequence_entry + return event + + if self.scanner.check_token(ScalarToken): + token = self.scanner.get_token() + # self.scanner.peek_token_same_line_comment(token) + end_mark = token.end_mark + if (token.plain and tag is None) or str(tag) == '!': + dimplicit = (True, False) + elif tag is None: + dimplicit = (False, True) + else: + dimplicit = (False, False) + event = ScalarEvent( + anchor, + tag, + dimplicit, + token.value, + start_mark, + end_mark, + style=token.style, + comment=token.comment, + ) + self.state = self.states.pop() + elif self.scanner.check_token(FlowSequenceStartToken): + pt = self.scanner.peek_token() + end_mark = pt.end_mark + event = SequenceStartEvent( + anchor, + tag, + implicit, + start_mark, + end_mark, + flow_style=True, + comment=pt.comment, + ) + self.state = self.parse_flow_sequence_first_entry + elif self.scanner.check_token(FlowMappingStartToken): + pt = self.scanner.peek_token() + end_mark = pt.end_mark + event = MappingStartEvent( + anchor, + tag, + implicit, + start_mark, + end_mark, + flow_style=True, + comment=pt.comment, + ) + self.state = self.parse_flow_mapping_first_key + elif block and self.scanner.check_token(BlockSequenceStartToken): + end_mark = self.scanner.peek_token().start_mark + # should inserting the comment be dependent on the + # indentation? + pt = self.scanner.peek_token() + comment = pt.comment + # nprint('pt0', type(pt)) + if comment is None or comment[1] is None: + comment = pt.split_old_comment() + # nprint('pt1', comment) + event = SequenceStartEvent( + anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment, + ) + self.state = self.parse_block_sequence_first_entry + elif block and self.scanner.check_token(BlockMappingStartToken): + end_mark = self.scanner.peek_token().start_mark + comment = self.scanner.peek_token().comment + event = MappingStartEvent( + anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment, + ) + self.state = self.parse_block_mapping_first_key + elif anchor is not None or tag is not None: + # Empty scalars are allowed even if a tag or an anchor is + # specified. + event = ScalarEvent(anchor, tag, (implicit, False), "", start_mark, end_mark) + self.state = self.states.pop() + else: + if block: + node = 'block' + else: + node = 'flow' + token = self.scanner.peek_token() + raise ParserError( + f'while parsing a {node!s} node', + start_mark, + f'expected the node content, but found {token.id!r}', + token.start_mark, + ) + return event + + # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* + # BLOCK-END + + def parse_block_sequence_first_entry(self) -> Any: + token = self.scanner.get_token() + # move any comment from start token + # self.move_token_comment(token) + self.marks.append(token.start_mark) + return self.parse_block_sequence_entry() + + def parse_block_sequence_entry(self) -> Any: + if self.scanner.check_token(BlockEntryToken): + token = self.scanner.get_token() + self.move_token_comment(token) + if not self.scanner.check_token(BlockEntryToken, BlockEndToken): + self.states.append(self.parse_block_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_block_sequence_entry + return self.process_empty_scalar(token.end_mark) + if not self.scanner.check_token(BlockEndToken): + token = self.scanner.peek_token() + raise ParserError( + 'while parsing a block collection', + self.marks[-1], + f'expected <block end>, but found {token.id!r}', + token.start_mark, + ) + token = self.scanner.get_token() # BlockEndToken + event = SequenceEndEvent(token.start_mark, token.end_mark, comment=token.comment) + self.state = self.states.pop() + self.marks.pop() + return event + + # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + # indentless_sequence? + # sequence: + # - entry + # - nested + + def parse_indentless_sequence_entry(self) -> Any: + if self.scanner.check_token(BlockEntryToken): + token = self.scanner.get_token() + self.move_token_comment(token) + if not self.scanner.check_token( + BlockEntryToken, KeyToken, ValueToken, BlockEndToken, + ): + self.states.append(self.parse_indentless_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_indentless_sequence_entry + return self.process_empty_scalar(token.end_mark) + token = self.scanner.peek_token() + c = None + if self.loader and self.loader.comment_handling is None: + c = token.comment + start_mark = token.start_mark + else: + start_mark = self.last_event.end_mark # type: ignore + c = self.distribute_comment(token.comment, start_mark.line) # type: ignore + event = SequenceEndEvent(start_mark, start_mark, comment=c) + self.state = self.states.pop() + return event + + # block_mapping ::= BLOCK-MAPPING_START + # ((KEY block_node_or_indentless_sequence?)? + # (VALUE block_node_or_indentless_sequence?)?)* + # BLOCK-END + + def parse_block_mapping_first_key(self) -> Any: + token = self.scanner.get_token() + self.marks.append(token.start_mark) + return self.parse_block_mapping_key() + + def parse_block_mapping_key(self) -> Any: + if self.scanner.check_token(KeyToken): + token = self.scanner.get_token() + self.move_token_comment(token) + if not self.scanner.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_value) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_value + return self.process_empty_scalar(token.end_mark) + if self.resolver.processing_version > (1, 1) and self.scanner.check_token(ValueToken): + self.state = self.parse_block_mapping_value + return self.process_empty_scalar(self.scanner.peek_token().start_mark) + if not self.scanner.check_token(BlockEndToken): + token = self.scanner.peek_token() + raise ParserError( + 'while parsing a block mapping', + self.marks[-1], + f'expected <block end>, but found {token.id!r}', + token.start_mark, + ) + token = self.scanner.get_token() + self.move_token_comment(token) + event = MappingEndEvent(token.start_mark, token.end_mark, comment=token.comment) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_block_mapping_value(self) -> Any: + if self.scanner.check_token(ValueToken): + token = self.scanner.get_token() + # value token might have post comment move it to e.g. block + if self.scanner.check_token(ValueToken): + self.move_token_comment(token) + else: + if not self.scanner.check_token(KeyToken): + self.move_token_comment(token, empty=True) + # else: empty value for this key cannot move token.comment + if not self.scanner.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_key) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_key + comment = token.comment + if comment is None: + token = self.scanner.peek_token() + comment = token.comment + if comment: + token._comment = [None, comment[1]] + comment = [comment[0], None] + return self.process_empty_scalar(token.end_mark, comment=comment) + else: + self.state = self.parse_block_mapping_key + token = self.scanner.peek_token() + return self.process_empty_scalar(token.start_mark) + + # flow_sequence ::= FLOW-SEQUENCE-START + # (flow_sequence_entry FLOW-ENTRY)* + # flow_sequence_entry? + # FLOW-SEQUENCE-END + # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + # + # Note that while production rules for both flow_sequence_entry and + # flow_mapping_entry are equal, their interpretations are different. + # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + # generate an inline mapping (set syntax). + + def parse_flow_sequence_first_entry(self) -> Any: + token = self.scanner.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_sequence_entry(first=True) + + def parse_flow_sequence_entry(self, first: bool = False) -> Any: + if not self.scanner.check_token(FlowSequenceEndToken): + if not first: + if self.scanner.check_token(FlowEntryToken): + self.scanner.get_token() + else: + token = self.scanner.peek_token() + raise ParserError( + 'while parsing a flow sequence', + self.marks[-1], + f"expected ',' or ']', but got {token.id!r}", + token.start_mark, + ) + + if self.scanner.check_token(KeyToken): + token = self.scanner.peek_token() + event: Any = MappingStartEvent( + None, None, True, token.start_mark, token.end_mark, flow_style=True, + ) + self.state = self.parse_flow_sequence_entry_mapping_key + return event + elif not self.scanner.check_token(FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry) + return self.parse_flow_node() + token = self.scanner.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark, comment=token.comment) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_sequence_entry_mapping_key(self) -> Any: + token = self.scanner.get_token() + if not self.scanner.check_token(ValueToken, FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_value + return self.process_empty_scalar(token.end_mark) + + def parse_flow_sequence_entry_mapping_value(self) -> Any: + if self.scanner.check_token(ValueToken): + token = self.scanner.get_token() + if not self.scanner.check_token(FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_end) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_end + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_sequence_entry_mapping_end + token = self.scanner.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_sequence_entry_mapping_end(self) -> Any: + self.state = self.parse_flow_sequence_entry + token = self.scanner.peek_token() + return MappingEndEvent(token.start_mark, token.start_mark) + + # flow_mapping ::= FLOW-MAPPING-START + # (flow_mapping_entry FLOW-ENTRY)* + # flow_mapping_entry? + # FLOW-MAPPING-END + # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + + def parse_flow_mapping_first_key(self) -> Any: + token = self.scanner.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_mapping_key(first=True) + + def parse_flow_mapping_key(self, first: Any = False) -> Any: + if not self.scanner.check_token(FlowMappingEndToken): + if not first: + if self.scanner.check_token(FlowEntryToken): + self.scanner.get_token() + else: + token = self.scanner.peek_token() + raise ParserError( + 'while parsing a flow mapping', + self.marks[-1], + f"expected ',' or '}}', but got {token.id!r}", + token.start_mark, + ) + if self.scanner.check_token(KeyToken): + token = self.scanner.get_token() + if not self.scanner.check_token( + ValueToken, FlowEntryToken, FlowMappingEndToken, + ): + self.states.append(self.parse_flow_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_value + return self.process_empty_scalar(token.end_mark) + elif self.resolver.processing_version > (1, 1) and self.scanner.check_token( + ValueToken, + ): + self.state = self.parse_flow_mapping_value + return self.process_empty_scalar(self.scanner.peek_token().end_mark) + elif not self.scanner.check_token(FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_empty_value) + return self.parse_flow_node() + token = self.scanner.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark, comment=token.comment) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_mapping_value(self) -> Any: + if self.scanner.check_token(ValueToken): + token = self.scanner.get_token() + if not self.scanner.check_token(FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_key) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_mapping_key + token = self.scanner.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_mapping_empty_value(self) -> Any: + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(self.scanner.peek_token().start_mark) + + def process_empty_scalar(self, mark: Any, comment: Any = None) -> Any: + return ScalarEvent(None, None, (True, False), "", mark, mark, comment=comment) + + def move_token_comment( + self, token: Any, nt: Optional[Any] = None, empty: Optional[bool] = False, + ) -> Any: + pass + + +class RoundTripParser(Parser): + """roundtrip is a safe loader, that wants to see the unmangled tag""" + + def select_tag_transform(self, tag: Tag) -> None: + if tag is None: + return + tag.select_transform(True) + + def move_token_comment( + self, token: Any, nt: Optional[Any] = None, empty: Optional[bool] = False, + ) -> Any: + token.move_old_comment(self.scanner.peek_token() if nt is None else nt, empty=empty) + + +class RoundTripParserSC(RoundTripParser): + """roundtrip is a safe loader, that wants to see the unmangled tag""" + + # some of the differences are based on the superclass testing + # if self.loader.comment_handling is not None + + def move_token_comment( + self: Any, token: Any, nt: Any = None, empty: Optional[bool] = False, + ) -> None: + token.move_new_comment(self.scanner.peek_token() if nt is None else nt, empty=empty) + + def distribute_comment(self, comment: Any, line: Any) -> Any: + # ToDo, look at indentation of the comment to determine attachment + if comment is None: + return None + if not comment[0]: + return None + # if comment[0][0] != line + 1: + # nprintf('>>>dcxxx', comment, line) + assert comment[0][0] == line + 1 + # if comment[0] - line > 1: + # return + typ = self.loader.comment_handling & 0b11 + # nprintf('>>>dca', comment, line, typ) + if typ == C_POST: + return None + if typ == C_PRE: + c = [None, None, comment[0]] + comment[0] = None + return c + # nprintf('>>>dcb', comment[0]) + for _idx, cmntidx in enumerate(comment[0]): + # nprintf('>>>dcb', cmntidx) + if isinstance(self.scanner.comments[cmntidx], BlankLineComment): + break + else: + return None # no space found + if _idx == 0: + return None # first line was blank + # nprintf('>>>dcc', idx) + if typ == C_SPLIT_ON_FIRST_BLANK: + c = [None, None, comment[0][:_idx]] + comment[0] = comment[0][_idx:] + return c + raise NotImplementedError # reserved diff --git a/py.typed b/py.typed new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/py.typed diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7e66379 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,4 @@ +[build-system] +requires = ["setuptools", "wheel"] +# test +build-backend = "setuptools.build_meta" diff --git a/reader.py b/reader.py new file mode 100644 index 0000000..3780a2c --- /dev/null +++ b/reader.py @@ -0,0 +1,275 @@ +# coding: utf-8 + +# This module contains abstractions for the input stream. You don't have to +# looks further, there are no pretty code. +# +# We define two classes here. +# +# Mark(source, line, column) +# It's just a record and its only use is producing nice error messages. +# Parser does not use it for any other purposes. +# +# Reader(source, data) +# Reader determines the encoding of `data` and converts it to unicode. +# Reader provides the following methods and attributes: +# reader.peek(length=1) - return the next `length` characters +# reader.forward(length=1) - move the current position to `length` +# characters. +# reader.index - the number of the current character. +# reader.line, stream.column - the line and the column of the current +# character. + +import codecs + +from ruamel.yaml.error import YAMLError, FileMark, StringMark, YAMLStreamError +from ruamel.yaml.util import RegExp + +from typing import Any, Dict, Optional, List, Union, Text, Tuple, Optional # NOQA +# from ruamel.yaml.compat import StreamTextType # NOQA + +__all__ = ['Reader', 'ReaderError'] + + +class ReaderError(YAMLError): + def __init__( + self, name: Any, position: Any, character: Any, encoding: Any, reason: Any, + ) -> None: + self.name = name + self.character = character + self.position = position + self.encoding = encoding + self.reason = reason + + def __str__(self) -> Any: + if isinstance(self.character, bytes): + return ( + f"'{self.encoding!s}' codec can't decode byte #x{ord(self.character):02x}: " + f'{self.reason!s}\n' + f' in "{self.name!s}", position {self.position:d}' + ) + else: + return ( + f'unacceptable character #x{self.character:04x}: {self.reason!s}\n' + f' in "{self.name!s}", position {self.position:d}' + ) + + +class Reader: + # Reader: + # - determines the data encoding and converts it to a unicode string, + # - checks if characters are in allowed range, + # - adds '\0' to the end. + + # Reader accepts + # - a `bytes` object, + # - a `str` object, + # - a file-like object with its `read` method returning `str`, + # - a file-like object with its `read` method returning `unicode`. + + # Yeah, it's ugly and slow. + + def __init__(self, stream: Any, loader: Any = None) -> None: + self.loader = loader + if self.loader is not None and getattr(self.loader, '_reader', None) is None: + self.loader._reader = self + self.reset_reader() + self.stream: Any = stream # as .read is called + + def reset_reader(self) -> None: + self.name: Any = None + self.stream_pointer = 0 + self.eof = True + self.buffer = "" + self.pointer = 0 + self.raw_buffer: Any = None + self.raw_decode = None + self.encoding: Optional[Text] = None + self.index = 0 + self.line = 0 + self.column = 0 + + @property + def stream(self) -> Any: + try: + return self._stream + except AttributeError: + raise YAMLStreamError('input stream needs to be specified') + + @stream.setter + def stream(self, val: Any) -> None: + if val is None: + return + self._stream = None + if isinstance(val, str): + self.name = '<unicode string>' + self.check_printable(val) + self.buffer = val + '\0' + elif isinstance(val, bytes): + self.name = '<byte string>' + self.raw_buffer = val + self.determine_encoding() + else: + if not hasattr(val, 'read'): + raise YAMLStreamError('stream argument needs to have a read() method') + self._stream = val + self.name = getattr(self.stream, 'name', '<file>') + self.eof = False + self.raw_buffer = None + self.determine_encoding() + + def peek(self, index: int = 0) -> Text: + try: + return self.buffer[self.pointer + index] + except IndexError: + self.update(index + 1) + return self.buffer[self.pointer + index] + + def prefix(self, length: int = 1) -> Any: + if self.pointer + length >= len(self.buffer): + self.update(length) + return self.buffer[self.pointer : self.pointer + length] + + def forward_1_1(self, length: int = 1) -> None: + if self.pointer + length + 1 >= len(self.buffer): + self.update(length + 1) + while length != 0: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.index += 1 + if ch in '\n\x85\u2028\u2029' or ( + ch == '\r' and self.buffer[self.pointer] != '\n' + ): + self.line += 1 + self.column = 0 + elif ch != '\uFEFF': + self.column += 1 + length -= 1 + + def forward(self, length: int = 1) -> None: + if self.pointer + length + 1 >= len(self.buffer): + self.update(length + 1) + while length != 0: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.index += 1 + if ch == '\n' or (ch == '\r' and self.buffer[self.pointer] != '\n'): + self.line += 1 + self.column = 0 + elif ch != '\uFEFF': + self.column += 1 + length -= 1 + + def get_mark(self) -> Any: + if self.stream is None: + return StringMark( + self.name, self.index, self.line, self.column, self.buffer, self.pointer, + ) + else: + return FileMark(self.name, self.index, self.line, self.column) + + def determine_encoding(self) -> None: + while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): + self.update_raw() + if isinstance(self.raw_buffer, bytes): + if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): + self.raw_decode = codecs.utf_16_le_decode # type: ignore + self.encoding = 'utf-16-le' + elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): + self.raw_decode = codecs.utf_16_be_decode # type: ignore + self.encoding = 'utf-16-be' + else: + self.raw_decode = codecs.utf_8_decode # type: ignore + self.encoding = 'utf-8' + self.update(1) + + NON_PRINTABLE = RegExp( + '[^\x09\x0A\x0D\x20-\x7E\x85' '\xA0-\uD7FF' '\uE000-\uFFFD' '\U00010000-\U0010FFFF' ']' # NOQA + ) + + _printable_ascii = ('\x09\x0A\x0D' + "".join(map(chr, range(0x20, 0x7F)))).encode('ascii') + + @classmethod + def _get_non_printable_ascii(cls: Text, data: bytes) -> Optional[Tuple[int, Text]]: # type: ignore # NOQA + ascii_bytes = data.encode('ascii') # type: ignore + non_printables = ascii_bytes.translate(None, cls._printable_ascii) # type: ignore + if not non_printables: + return None + non_printable = non_printables[:1] + return ascii_bytes.index(non_printable), non_printable.decode('ascii') + + @classmethod + def _get_non_printable_regex(cls, data: Text) -> Optional[Tuple[int, Text]]: + match = cls.NON_PRINTABLE.search(data) + if not bool(match): + return None + return match.start(), match.group() + + @classmethod + def _get_non_printable(cls, data: Text) -> Optional[Tuple[int, Text]]: + try: + return cls._get_non_printable_ascii(data) # type: ignore + except UnicodeEncodeError: + return cls._get_non_printable_regex(data) + + def check_printable(self, data: Any) -> None: + non_printable_match = self._get_non_printable(data) + if non_printable_match is not None: + start, character = non_printable_match + position = self.index + (len(self.buffer) - self.pointer) + start + raise ReaderError( + self.name, + position, + ord(character), + 'unicode', + 'special characters are not allowed', + ) + + def update(self, length: int) -> None: + if self.raw_buffer is None: + return + self.buffer = self.buffer[self.pointer :] + self.pointer = 0 + while len(self.buffer) < length: + if not self.eof: + self.update_raw() + if self.raw_decode is not None: + try: + data, converted = self.raw_decode(self.raw_buffer, 'strict', self.eof) + except UnicodeDecodeError as exc: + character = self.raw_buffer[exc.start] + if self.stream is not None: + position = self.stream_pointer - len(self.raw_buffer) + exc.start + elif self.stream is not None: + position = self.stream_pointer - len(self.raw_buffer) + exc.start + else: + position = exc.start + raise ReaderError(self.name, position, character, exc.encoding, exc.reason) + else: + data = self.raw_buffer + converted = len(data) + self.check_printable(data) + self.buffer += data + self.raw_buffer = self.raw_buffer[converted:] + if self.eof: + self.buffer += '\0' + self.raw_buffer = None + break + + def update_raw(self, size: Optional[int] = None) -> None: + if size is None: + size = 4096 + data = self.stream.read(size) + if self.raw_buffer is None: + self.raw_buffer = data + else: + self.raw_buffer += data + self.stream_pointer += len(data) + if not data: + self.eof = True + + +# try: +# import psyco +# psyco.bind(Reader) +# except ImportError: +# pass diff --git a/representer.py b/representer.py new file mode 100644 index 0000000..0d1ca12 --- /dev/null +++ b/representer.py @@ -0,0 +1,1127 @@ +# coding: utf-8 + +from ruamel.yaml.error import * # NOQA +from ruamel.yaml.nodes import * # NOQA +from ruamel.yaml.compat import ordereddict +from ruamel.yaml.compat import nprint, nprintf # NOQA +from ruamel.yaml.scalarstring import ( + LiteralScalarString, + FoldedScalarString, + SingleQuotedScalarString, + DoubleQuotedScalarString, + PlainScalarString, +) +from ruamel.yaml.comments import ( + CommentedMap, + CommentedOrderedMap, + CommentedSeq, + CommentedKeySeq, + CommentedKeyMap, + CommentedSet, + comment_attrib, + merge_attrib, + TaggedScalar, +) +from ruamel.yaml.scalarint import ScalarInt, BinaryInt, OctalInt, HexInt, HexCapsInt +from ruamel.yaml.scalarfloat import ScalarFloat +from ruamel.yaml.scalarbool import ScalarBoolean +from ruamel.yaml.timestamp import TimeStamp +from ruamel.yaml.anchor import Anchor + +import collections +import datetime +import types + +import copyreg +import base64 + +from typing import Dict, List, Any, Union, Text, Optional # NOQA + +# fmt: off +__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', + 'RepresenterError', 'RoundTripRepresenter'] +# fmt: on + + +class RepresenterError(YAMLError): + pass + + +class BaseRepresenter: + + yaml_representers: Dict[Any, Any] = {} + yaml_multi_representers: Dict[Any, Any] = {} + + def __init__( + self: Any, + default_style: Any = None, + default_flow_style: Any = None, + dumper: Any = None, + ) -> None: + self.dumper = dumper + if self.dumper is not None: + self.dumper._representer = self + self.default_style = default_style + self.default_flow_style = default_flow_style + self.represented_objects: Dict[Any, Any] = {} + self.object_keeper: List[Any] = [] + self.alias_key: Optional[int] = None + self.sort_base_mapping_type_on_output = True + + @property + def serializer(self) -> Any: + try: + if hasattr(self.dumper, 'typ'): + return self.dumper.serializer + return self.dumper._serializer + except AttributeError: + return self # cyaml + + def represent(self, data: Any) -> None: + node = self.represent_data(data) + self.serializer.serialize(node) + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent_data(self, data: Any) -> Any: + if self.ignore_aliases(data): + self.alias_key = None + else: + self.alias_key = id(data) + if self.alias_key is not None: + if self.alias_key in self.represented_objects: + node = self.represented_objects[self.alias_key] + # if node is None: + # raise RepresenterError( + # f"recursive objects are not allowed: {data!r}") + return node + # self.represented_objects[alias_key] = None + self.object_keeper.append(data) + data_types = type(data).__mro__ + if data_types[0] in self.yaml_representers: + node = self.yaml_representers[data_types[0]](self, data) + else: + for data_type in data_types: + if data_type in self.yaml_multi_representers: + node = self.yaml_multi_representers[data_type](self, data) + break + else: + if None in self.yaml_multi_representers: + node = self.yaml_multi_representers[None](self, data) + elif None in self.yaml_representers: + node = self.yaml_representers[None](self, data) + else: + node = ScalarNode(None, str(data)) + # if alias_key is not None: + # self.represented_objects[alias_key] = node + return node + + def represent_key(self, data: Any) -> Any: + """ + David Fraser: Extract a method to represent keys in mappings, so that + a subclass can choose not to quote them (for example) + used in represent_mapping + https://bitbucket.org/davidfraser/pyyaml/commits/d81df6eb95f20cac4a79eed95ae553b5c6f77b8c + """ + return self.represent_data(data) + + @classmethod + def add_representer(cls, data_type: Any, representer: Any) -> None: + if 'yaml_representers' not in cls.__dict__: + cls.yaml_representers = cls.yaml_representers.copy() + cls.yaml_representers[data_type] = representer + + @classmethod + def add_multi_representer(cls, data_type: Any, representer: Any) -> None: + if 'yaml_multi_representers' not in cls.__dict__: + cls.yaml_multi_representers = cls.yaml_multi_representers.copy() + cls.yaml_multi_representers[data_type] = representer + + def represent_scalar( + self, tag: Any, value: Any, style: Any = None, anchor: Any = None, + ) -> ScalarNode: + if style is None: + style = self.default_style + comment = None + if style and style[0] in '|>': + comment = getattr(value, 'comment', None) + if comment: + comment = [None, [comment]] + if isinstance(tag, str): + tag = Tag(suffix=tag) + node = ScalarNode(tag, value, style=style, comment=comment, anchor=anchor) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + return node + + def represent_sequence( + self, tag: Any, sequence: Any, flow_style: Any = None, + ) -> SequenceNode: + value: List[Any] = [] + if isinstance(tag, str): + tag = Tag(suffix=tag) + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item in sequence: + node_item = self.represent_data(item) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_omap(self, tag: Any, omap: Any, flow_style: Any = None) -> SequenceNode: + value: List[Any] = [] + if isinstance(tag, str): + tag = Tag(suffix=tag) + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item_key in omap: + item_val = omap[item_key] + node_item = self.represent_data({item_key: item_val}) + # if not (isinstance(node_item, ScalarNode) \ + # and not node_item.style): + # best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_mapping(self, tag: Any, mapping: Any, flow_style: Any = None) -> MappingNode: + value: List[Any] = [] + if isinstance(tag, str): + tag = Tag(suffix=tag) + node = MappingNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + if hasattr(mapping, 'items'): + mapping = list(mapping.items()) + if self.sort_base_mapping_type_on_output: + try: + mapping = sorted(mapping) + except TypeError: + pass + for item_key, item_value in mapping: + node_key = self.represent_key(item_key) + node_value = self.represent_data(item_value) + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def ignore_aliases(self, data: Any) -> bool: + return False + + +class SafeRepresenter(BaseRepresenter): + def ignore_aliases(self, data: Any) -> bool: + # https://docs.python.org/3/reference/expressions.html#parenthesized-forms : + # "i.e. two occurrences of the empty tuple may or may not yield the same object" + # so "data is ()" should not be used + if data is None or (isinstance(data, tuple) and data == ()): + return True + if isinstance(data, (bytes, str, bool, int, float)): + return True + return False + + def represent_none(self, data: Any) -> ScalarNode: + return self.represent_scalar('tag:yaml.org,2002:null', 'null') + + def represent_str(self, data: Any) -> Any: + return self.represent_scalar('tag:yaml.org,2002:str', data) + + def represent_binary(self, data: Any) -> ScalarNode: + if hasattr(base64, 'encodebytes'): + data = base64.encodebytes(data).decode('ascii') + else: + # check py2 only? + data = base64.encodestring(data).decode('ascii') # type: ignore + return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|') + + def represent_bool(self, data: Any, anchor: Optional[Any] = None) -> ScalarNode: + try: + value = self.dumper.boolean_representation[bool(data)] + except AttributeError: + if data: + value = 'true' + else: + value = 'false' + return self.represent_scalar('tag:yaml.org,2002:bool', value, anchor=anchor) + + def represent_int(self, data: Any) -> ScalarNode: + return self.represent_scalar('tag:yaml.org,2002:int', str(data)) + + inf_value = 1e300 + while repr(inf_value) != repr(inf_value * inf_value): + inf_value *= inf_value + + def represent_float(self, data: Any) -> ScalarNode: + if data != data or (data == 0.0 and data == 1.0): + value = '.nan' + elif data == self.inf_value: + value = '.inf' + elif data == -self.inf_value: + value = '-.inf' + else: + value = repr(data).lower() + if getattr(self.serializer, 'use_version', None) == (1, 1): + if '.' not in value and 'e' in value: + # Note that in some cases `repr(data)` represents a float number + # without the decimal parts. For instance: + # >>> repr(1e17) + # '1e17' + # Unfortunately, this is not a valid float representation according + # to the definition of the `!!float` tag in YAML 1.1. We fix + # this by adding '.0' before the 'e' symbol. + value = value.replace('e', '.0e', 1) + return self.represent_scalar('tag:yaml.org,2002:float', value) + + def represent_list(self, data: Any) -> SequenceNode: + # pairs = (len(data) > 0 and isinstance(data, list)) + # if pairs: + # for item in data: + # if not isinstance(item, tuple) or len(item) != 2: + # pairs = False + # break + # if not pairs: + return self.represent_sequence('tag:yaml.org,2002:seq', data) + + # value = [] + # for item_key, item_value in data: + # value.append(self.represent_mapping('tag:yaml.org,2002:map', + # [(item_key, item_value)])) + # return SequenceNode('tag:yaml.org,2002:pairs', value) + + def represent_dict(self, data: Any) -> MappingNode: + return self.represent_mapping('tag:yaml.org,2002:map', data) + + def represent_ordereddict(self, data: Any) -> SequenceNode: + return self.represent_omap('tag:yaml.org,2002:omap', data) + + def represent_set(self, data: Any) -> MappingNode: + value: Dict[Any, None] = {} + for key in data: + value[key] = None + return self.represent_mapping('tag:yaml.org,2002:set', value) + + def represent_date(self, data: Any) -> ScalarNode: + value = data.isoformat() + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_datetime(self, data: Any) -> ScalarNode: + value = data.isoformat(' ') + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_yaml_object( + self, tag: Any, data: Any, cls: Any, flow_style: Any = None, + ) -> MappingNode: + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + return self.represent_mapping(tag, state, flow_style=flow_style) + + def represent_undefined(self, data: Any) -> None: + raise RepresenterError(f'cannot represent an object: {data!s}') + + +SafeRepresenter.add_representer(type(None), SafeRepresenter.represent_none) + +SafeRepresenter.add_representer(str, SafeRepresenter.represent_str) + +SafeRepresenter.add_representer(bytes, SafeRepresenter.represent_binary) + +SafeRepresenter.add_representer(bool, SafeRepresenter.represent_bool) + +SafeRepresenter.add_representer(int, SafeRepresenter.represent_int) + +SafeRepresenter.add_representer(float, SafeRepresenter.represent_float) + +SafeRepresenter.add_representer(list, SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(tuple, SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(dict, SafeRepresenter.represent_dict) + +SafeRepresenter.add_representer(set, SafeRepresenter.represent_set) + +SafeRepresenter.add_representer(ordereddict, SafeRepresenter.represent_ordereddict) + +SafeRepresenter.add_representer( + collections.OrderedDict, SafeRepresenter.represent_ordereddict, +) + +SafeRepresenter.add_representer(datetime.date, SafeRepresenter.represent_date) + +SafeRepresenter.add_representer(datetime.datetime, SafeRepresenter.represent_datetime) + +SafeRepresenter.add_representer(None, SafeRepresenter.represent_undefined) + + +class Representer(SafeRepresenter): + def represent_complex(self, data: Any) -> Any: + if data.imag == 0.0: + data = repr(data.real) + elif data.real == 0.0: + data = f'{data.imag!r}j' + elif data.imag > 0: + data = f'{data.real!r}+{data.imag!r}j' + else: + data = f'{data.real!r}{data.imag!r}j' + return self.represent_scalar('tag:yaml.org,2002:python/complex', data) + + def represent_tuple(self, data: Any) -> SequenceNode: + return self.represent_sequence('tag:yaml.org,2002:python/tuple', data) + + def represent_name(self, data: Any) -> ScalarNode: + try: + name = f'{data.__module__!s}.{data.__qualname__!s}' + except AttributeError: + # ToDo: check if this can be reached in Py3 + name = f'{data.__module__!s}.{data.__name__!s}' + return self.represent_scalar('tag:yaml.org,2002:python/name:' + name, "") + + def represent_module(self, data: Any) -> ScalarNode: + return self.represent_scalar('tag:yaml.org,2002:python/module:' + data.__name__, "") + + def represent_object(self, data: Any) -> Union[SequenceNode, MappingNode]: + # We use __reduce__ API to save the data. data.__reduce__ returns + # a tuple of length 2-5: + # (function, args, state, listitems, dictitems) + + # For reconstructing, we calls function(*args), then set its state, + # listitems, and dictitems if they are not None. + + # A special case is when function.__name__ == '__newobj__'. In this + # case we create the object with args[0].__new__(*args). + + # Another special case is when __reduce__ returns a string - we don't + # support it. + + # We produce a !!python/object, !!python/object/new or + # !!python/object/apply node. + + cls = type(data) + if cls in copyreg.dispatch_table: + reduce: Any = copyreg.dispatch_table[cls](data) + elif hasattr(data, '__reduce_ex__'): + reduce = data.__reduce_ex__(2) + elif hasattr(data, '__reduce__'): + reduce = data.__reduce__() + else: + raise RepresenterError(f'cannot represent object: {data!r}') + reduce = (list(reduce) + [None] * 5)[:5] + function, args, state, listitems, dictitems = reduce + args = list(args) + if state is None: + state = {} + if listitems is not None: + listitems = list(listitems) + if dictitems is not None: + dictitems = dict(dictitems) + if function.__name__ == '__newobj__': + function = args[0] + args = args[1:] + tag = 'tag:yaml.org,2002:python/object/new:' + newobj = True + else: + tag = 'tag:yaml.org,2002:python/object/apply:' + newobj = False + try: + function_name = f'{function.__module__!s}.{function.__qualname__!s}' + except AttributeError: + # ToDo: check if this can be reached in Py3 + function_name = f'{function.__module__!s}.{function.__name__!s}' + if not args and not listitems and not dictitems and isinstance(state, dict) and newobj: + return self.represent_mapping( + 'tag:yaml.org,2002:python/object:' + function_name, state, + ) + if not listitems and not dictitems and isinstance(state, dict) and not state: + return self.represent_sequence(tag + function_name, args) + value = {} + if args: + value['args'] = args + if state or not isinstance(state, dict): + value['state'] = state + if listitems: + value['listitems'] = listitems + if dictitems: + value['dictitems'] = dictitems + return self.represent_mapping(tag + function_name, value) + + +Representer.add_representer(complex, Representer.represent_complex) + +Representer.add_representer(tuple, Representer.represent_tuple) + +Representer.add_representer(type, Representer.represent_name) + +Representer.add_representer(types.FunctionType, Representer.represent_name) + +Representer.add_representer(types.BuiltinFunctionType, Representer.represent_name) + +Representer.add_representer(types.ModuleType, Representer.represent_module) + +Representer.add_multi_representer(object, Representer.represent_object) + +Representer.add_multi_representer(type, Representer.represent_name) + + +class RoundTripRepresenter(SafeRepresenter): + # need to add type here and write out the .comment + # in serializer and emitter + + def __init__( + self, default_style: Any = None, default_flow_style: Any = None, dumper: Any = None, + ) -> None: + if not hasattr(dumper, 'typ') and default_flow_style is None: + default_flow_style = False + SafeRepresenter.__init__( + self, + default_style=default_style, + default_flow_style=default_flow_style, + dumper=dumper, + ) + + def ignore_aliases(self, data: Any) -> bool: + try: + if data.anchor is not None and data.anchor.value is not None: + return False + except AttributeError: + pass + return SafeRepresenter.ignore_aliases(self, data) + + def represent_none(self, data: Any) -> ScalarNode: + if len(self.represented_objects) == 0 and not self.serializer.use_explicit_start: + # this will be open ended (although it is not yet) + return self.represent_scalar('tag:yaml.org,2002:null', 'null') + return self.represent_scalar('tag:yaml.org,2002:null', "") + + def represent_literal_scalarstring(self, data: Any) -> ScalarNode: + tag = None + style = '|' + anchor = data.yaml_anchor(any=True) + tag = 'tag:yaml.org,2002:str' + return self.represent_scalar(tag, data, style=style, anchor=anchor) + + represent_preserved_scalarstring = represent_literal_scalarstring + + def represent_folded_scalarstring(self, data: Any) -> ScalarNode: + tag = None + style = '>' + anchor = data.yaml_anchor(any=True) + for fold_pos in reversed(getattr(data, 'fold_pos', [])): + if ( + data[fold_pos] == ' ' + and (fold_pos > 0 and not data[fold_pos - 1].isspace()) + and (fold_pos < len(data) and not data[fold_pos + 1].isspace()) + ): + data = data[:fold_pos] + '\a' + data[fold_pos:] + tag = 'tag:yaml.org,2002:str' + return self.represent_scalar(tag, data, style=style, anchor=anchor) + + def represent_single_quoted_scalarstring(self, data: Any) -> ScalarNode: + tag = None + style = "'" + anchor = data.yaml_anchor(any=True) + tag = 'tag:yaml.org,2002:str' + return self.represent_scalar(tag, data, style=style, anchor=anchor) + + def represent_double_quoted_scalarstring(self, data: Any) -> ScalarNode: + tag = None + style = '"' + anchor = data.yaml_anchor(any=True) + tag = 'tag:yaml.org,2002:str' + return self.represent_scalar(tag, data, style=style, anchor=anchor) + + def represent_plain_scalarstring(self, data: Any) -> ScalarNode: + tag = None + style = '' + anchor = data.yaml_anchor(any=True) + tag = 'tag:yaml.org,2002:str' + return self.represent_scalar(tag, data, style=style, anchor=anchor) + + def insert_underscore( + self, prefix: Any, s: Any, underscore: Any, anchor: Any = None, + ) -> ScalarNode: + if underscore is None: + return self.represent_scalar('tag:yaml.org,2002:int', prefix + s, anchor=anchor) + if underscore[0]: + sl = list(s) + pos = len(s) - underscore[0] + while pos > 0: + sl.insert(pos, '_') + pos -= underscore[0] + s = "".join(sl) + if underscore[1]: + s = '_' + s + if underscore[2]: + s += '_' + return self.represent_scalar('tag:yaml.org,2002:int', prefix + s, anchor=anchor) + + def represent_scalar_int(self, data: Any) -> ScalarNode: + if data._width is not None: + s = f'{data:0{data._width}d}' + else: + s = format(data, 'd') + anchor = data.yaml_anchor(any=True) + return self.insert_underscore("", s, data._underscore, anchor=anchor) + + def represent_binary_int(self, data: Any) -> ScalarNode: + if data._width is not None: + # cannot use '{:#0{}b}', that strips the zeros + s = f'{data:0{data._width}b}' + else: + s = format(data, 'b') + anchor = data.yaml_anchor(any=True) + return self.insert_underscore('0b', s, data._underscore, anchor=anchor) + + def represent_octal_int(self, data: Any) -> ScalarNode: + if data._width is not None: + # cannot use '{:#0{}o}', that strips the zeros + s = f'{data:0{data._width}o}' + else: + s = format(data, 'o') + anchor = data.yaml_anchor(any=True) + prefix = '0o' + if getattr(self.serializer, 'use_version', None) == (1, 1): + prefix = '0' + return self.insert_underscore(prefix, s, data._underscore, anchor=anchor) + + def represent_hex_int(self, data: Any) -> ScalarNode: + if data._width is not None: + # cannot use '{:#0{}x}', that strips the zeros + s = f'{data:0{data._width}x}' + else: + s = format(data, 'x') + anchor = data.yaml_anchor(any=True) + return self.insert_underscore('0x', s, data._underscore, anchor=anchor) + + def represent_hex_caps_int(self, data: Any) -> ScalarNode: + if data._width is not None: + # cannot use '{:#0{}X}', that strips the zeros + s = f'{data:0{data._width}X}' + else: + s = format(data, 'X') + anchor = data.yaml_anchor(any=True) + return self.insert_underscore('0x', s, data._underscore, anchor=anchor) + + def represent_scalar_float(self, data: Any) -> ScalarNode: + """ this is way more complicated """ + value = None + anchor = data.yaml_anchor(any=True) + if data != data or (data == 0.0 and data == 1.0): + value = '.nan' + elif data == self.inf_value: + value = '.inf' + elif data == -self.inf_value: + value = '-.inf' + if value: + return self.represent_scalar('tag:yaml.org,2002:float', value, anchor=anchor) + if data._exp is None and data._prec > 0 and data._prec == data._width - 1: + # no exponent, but trailing dot + value = f'{data._m_sign if data._m_sign else ""}{abs(int(data)):d}.' + elif data._exp is None: + # no exponent, "normal" dot + prec = data._prec + ms = data._m_sign if data._m_sign else "" + if prec < 0: + value = f'{ms}{abs(int(data)):0{data._width - len(ms)}d}' + else: + # -1 for the dot + value = f'{ms}{abs(data):0{data._width - len(ms)}.{data._width - prec - 1}f}' + if prec == 0 or (prec == 1 and ms != ""): + value = value.replace('0.', '.') + while len(value) < data._width: + value += '0' + else: + # exponent + ( + m, + es, + ) = f'{data:{data._width}.{data._width + (1 if data._m_sign else 0)}e}'.split('e') + w = data._width if data._prec > 0 else (data._width + 1) + if data < 0: + w += 1 + m = m[:w] + e = int(es) + m1, m2 = m.split('.') # always second? + while len(m1) + len(m2) < data._width - (1 if data._prec >= 0 else 0): + m2 += '0' + if data._m_sign and data > 0: + m1 = '+' + m1 + esgn = '+' if data._e_sign else "" + if data._prec < 0: # mantissa without dot + if m2 != '0': + e -= len(m2) + else: + m2 = "" + while (len(m1) + len(m2) - (1 if data._m_sign else 0)) < data._width: + m2 += '0' + e -= 1 + value = m1 + m2 + data._exp + f'{e:{esgn}0{data._e_width}d}' + elif data._prec == 0: # mantissa with trailing dot + e -= len(m2) + value = m1 + m2 + '.' + data._exp + f'{e:{esgn}0{data._e_width}d}' + else: + if data._m_lead0 > 0: + m2 = '0' * (data._m_lead0 - 1) + m1 + m2 + m1 = '0' + m2 = m2[: -data._m_lead0] # these should be zeros + e += data._m_lead0 + while len(m1) < data._prec: + m1 += m2[0] + m2 = m2[1:] + e -= 1 + value = m1 + '.' + m2 + data._exp + f'{e:{esgn}0{data._e_width}d}' + + if value is None: + value = repr(data).lower() + return self.represent_scalar('tag:yaml.org,2002:float', value, anchor=anchor) + + def represent_sequence( + self, tag: Any, sequence: Any, flow_style: Any = None, + ) -> SequenceNode: + value: List[Any] = [] + # if the flow_style is None, the flow style tacked on to the object + # explicitly will be taken. If that is None as well the default flow + # style rules + try: + flow_style = sequence.fa.flow_style(flow_style) + except AttributeError: + flow_style = flow_style + try: + anchor = sequence.yaml_anchor() + except AttributeError: + anchor = None + if isinstance(tag, str): + tag = Tag(suffix=tag) + node = SequenceNode(tag, value, flow_style=flow_style, anchor=anchor) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + try: + comment = getattr(sequence, comment_attrib) + node.comment = comment.comment + # reset any comment already printed information + if node.comment and node.comment[1]: + for ct in node.comment[1]: + ct.reset() + item_comments = comment.items + for v in item_comments.values(): + if v and v[1]: + for ct in v[1]: + ct.reset() + item_comments = comment.items + if node.comment is None: + node.comment = comment.comment + else: + # as we are potentially going to extend this, make a new list + node.comment = comment.comment[:] + try: + node.comment.append(comment.end) + except AttributeError: + pass + except AttributeError: + item_comments = {} + for idx, item in enumerate(sequence): + node_item = self.represent_data(item) + self.merge_comments(node_item, item_comments.get(idx)) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if len(sequence) != 0 and self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def merge_comments(self, node: Any, comments: Any) -> Any: + if comments is None: + assert hasattr(node, 'comment') + return node + if getattr(node, 'comment', None) is not None: + for idx, val in enumerate(comments): + if idx >= len(node.comment): + continue + nc = node.comment[idx] + if nc is not None: + assert val is None or val == nc + comments[idx] = nc + node.comment = comments + return node + + def represent_key(self, data: Any) -> Any: + if isinstance(data, CommentedKeySeq): + self.alias_key = None + return self.represent_sequence('tag:yaml.org,2002:seq', data, flow_style=True) + if isinstance(data, CommentedKeyMap): + self.alias_key = None + return self.represent_mapping('tag:yaml.org,2002:map', data, flow_style=True) + return SafeRepresenter.represent_key(self, data) + + def represent_mapping(self, tag: Any, mapping: Any, flow_style: Any = None) -> MappingNode: + value: List[Any] = [] + try: + flow_style = mapping.fa.flow_style(flow_style) + except AttributeError: + flow_style = flow_style + try: + anchor = mapping.yaml_anchor() + except AttributeError: + anchor = None + if isinstance(tag, str): + tag = Tag(suffix=tag) + node = MappingNode(tag, value, flow_style=flow_style, anchor=anchor) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + # no sorting! !! + try: + comment = getattr(mapping, comment_attrib) + if node.comment is None: + node.comment = comment.comment + else: + # as we are potentially going to extend this, make a new list + node.comment = comment.comment[:] + if node.comment and node.comment[1]: + for ct in node.comment[1]: + ct.reset() + item_comments = comment.items + if self.dumper.comment_handling is None: + for v in item_comments.values(): + if v and v[1]: + for ct in v[1]: + ct.reset() + try: + node.comment.append(comment.end) + except AttributeError: + pass + else: + # NEWCMNT + pass + except AttributeError: + item_comments = {} + merge_list = [m[1] for m in getattr(mapping, merge_attrib, [])] + try: + merge_pos = getattr(mapping, merge_attrib, [[0]])[0][0] + except IndexError: + merge_pos = 0 + item_count = 0 + if bool(merge_list): + items = mapping.non_merged_items() + else: + items = mapping.items() + for item_key, item_value in items: + item_count += 1 + node_key = self.represent_key(item_key) + node_value = self.represent_data(item_value) + item_comment = item_comments.get(item_key) + if item_comment: + # assert getattr(node_key, 'comment', None) is None + # issue 351 did throw this because the comment from the list item was + # moved to the dict + node_key.comment = item_comment[:2] + nvc = getattr(node_value, 'comment', None) + if nvc is not None: # end comment already there + nvc[0] = item_comment[2] + nvc[1] = item_comment[3] + else: + node_value.comment = item_comment[2:] + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if ((item_count != 0) or bool(merge_list)) and self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + if bool(merge_list): + # because of the call to represent_data here, the anchors + # are marked as being used and thereby created + if len(merge_list) == 1: + arg = self.represent_data(merge_list[0]) + else: + arg = self.represent_data(merge_list) + arg.flow_style = True + value.insert( + merge_pos, (ScalarNode(Tag(suffix='tag:yaml.org,2002:merge'), '<<'), arg), + ) + return node + + def represent_omap(self, tag: Any, omap: Any, flow_style: Any = None) -> SequenceNode: + value: List[Any] = [] + try: + flow_style = omap.fa.flow_style(flow_style) + except AttributeError: + flow_style = flow_style + try: + anchor = omap.yaml_anchor() + except AttributeError: + anchor = None + if isinstance(tag, str): + tag = Tag(suffix=tag) + node = SequenceNode(tag, value, flow_style=flow_style, anchor=anchor) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + try: + comment = getattr(omap, comment_attrib) + if node.comment is None: + node.comment = comment.comment + else: + # as we are potentially going to extend this, make a new list + node.comment = comment.comment[:] + if node.comment and node.comment[1]: + for ct in node.comment[1]: + ct.reset() + item_comments = comment.items + for v in item_comments.values(): + if v and v[1]: + for ct in v[1]: + ct.reset() + try: + node.comment.append(comment.end) + except AttributeError: + pass + except AttributeError: + item_comments = {} + for item_key in omap: + item_val = omap[item_key] + node_item = self.represent_data({item_key: item_val}) + # node_item.flow_style = False + # node item has two scalars in value: node_key and node_value + item_comment = item_comments.get(item_key) + if item_comment: + if item_comment[1]: + node_item.comment = [None, item_comment[1]] + assert getattr(node_item.value[0][0], 'comment', None) is None + node_item.value[0][0].comment = [item_comment[0], None] + nvc = getattr(node_item.value[0][1], 'comment', None) + if nvc is not None: # end comment already there + nvc[0] = item_comment[2] + nvc[1] = item_comment[3] + else: + node_item.value[0][1].comment = item_comment[2:] + # if not (isinstance(node_item, ScalarNode) \ + # and not node_item.style): + # best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_set(self, setting: Any) -> MappingNode: + flow_style = False + tag = Tag(suffix='tag:yaml.org,2002:set') + # return self.represent_mapping(tag, value) + value: List[Any] = [] + flow_style = setting.fa.flow_style(flow_style) + try: + anchor = setting.yaml_anchor() + except AttributeError: + anchor = None + node = MappingNode(tag, value, flow_style=flow_style, anchor=anchor) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + # no sorting! !! + try: + comment = getattr(setting, comment_attrib) + if node.comment is None: + node.comment = comment.comment + else: + # as we are potentially going to extend this, make a new list + node.comment = comment.comment[:] + if node.comment and node.comment[1]: + for ct in node.comment[1]: + ct.reset() + item_comments = comment.items + for v in item_comments.values(): + if v and v[1]: + for ct in v[1]: + ct.reset() + try: + node.comment.append(comment.end) + except AttributeError: + pass + except AttributeError: + item_comments = {} + for item_key in setting.odict: + node_key = self.represent_key(item_key) + node_value = self.represent_data(None) + item_comment = item_comments.get(item_key) + if item_comment: + assert getattr(node_key, 'comment', None) is None + node_key.comment = item_comment[:2] + node_key.style = '?' + node_value.style = '-' if flow_style else '?' + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not node_value.style): + best_style = False + value.append((node_key, node_value)) + best_style = best_style + return node + + def represent_dict(self, data: Any) -> MappingNode: + """write out tag if saved on loading""" + try: + _ = data.tag + except AttributeError: + tag = Tag(suffix='tag:yaml.org,2002:map') + else: + if data.tag.trval: + if data.tag.startswith('!!'): + tag = Tag(suffix='tag:yaml.org,2002:' + data.tag.trval[2:]) + else: + tag = data.tag + else: + tag = Tag(suffix='tag:yaml.org,2002:map') + return self.represent_mapping(tag, data) + + def represent_list(self, data: Any) -> SequenceNode: + try: + _ = data.tag + except AttributeError: + tag = Tag(suffix='tag:yaml.org,2002:seq') + else: + if data.tag.trval: + if data.tag.startswith('!!'): + tag = Tag(suffix='tag:yaml.org,2002:' + data.tag.trval[2:]) + else: + tag = data.tag + else: + tag = Tag(suffix='tag:yaml.org,2002:seq') + return self.represent_sequence(tag, data) + + def represent_datetime(self, data: Any) -> ScalarNode: + inter = 'T' if data._yaml['t'] else ' ' + _yaml = data._yaml + if _yaml['delta']: + data += _yaml['delta'] + value = data.isoformat(inter) + else: + value = data.isoformat(inter) + if _yaml['tz']: + value += _yaml['tz'] + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_tagged_scalar(self, data: Any) -> ScalarNode: + try: + if data.tag.handle == '!!': + tag = f'{data.tag.handle} {data.tag.suffix}' + else: + tag = data.tag + except AttributeError: + tag = None + try: + anchor = data.yaml_anchor() + except AttributeError: + anchor = None + return self.represent_scalar(tag, data.value, style=data.style, anchor=anchor) + + def represent_scalar_bool(self, data: Any) -> ScalarNode: + try: + anchor = data.yaml_anchor() + except AttributeError: + anchor = None + return SafeRepresenter.represent_bool(self, data, anchor=anchor) + + def represent_yaml_object( + self, tag: Any, data: Any, cls: Any, flow_style: Optional[Any] = None, + ) -> MappingNode: + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + anchor = state.pop(Anchor.attrib, None) + res = self.represent_mapping(tag, state, flow_style=flow_style) + if anchor is not None: + res.anchor = anchor + return res + + +RoundTripRepresenter.add_representer(type(None), RoundTripRepresenter.represent_none) + +RoundTripRepresenter.add_representer( + LiteralScalarString, RoundTripRepresenter.represent_literal_scalarstring, +) + +RoundTripRepresenter.add_representer( + FoldedScalarString, RoundTripRepresenter.represent_folded_scalarstring, +) + +RoundTripRepresenter.add_representer( + SingleQuotedScalarString, RoundTripRepresenter.represent_single_quoted_scalarstring, +) + +RoundTripRepresenter.add_representer( + DoubleQuotedScalarString, RoundTripRepresenter.represent_double_quoted_scalarstring, +) + +RoundTripRepresenter.add_representer( + PlainScalarString, RoundTripRepresenter.represent_plain_scalarstring, +) + +# RoundTripRepresenter.add_representer(tuple, Representer.represent_tuple) + +RoundTripRepresenter.add_representer(ScalarInt, RoundTripRepresenter.represent_scalar_int) + +RoundTripRepresenter.add_representer(BinaryInt, RoundTripRepresenter.represent_binary_int) + +RoundTripRepresenter.add_representer(OctalInt, RoundTripRepresenter.represent_octal_int) + +RoundTripRepresenter.add_representer(HexInt, RoundTripRepresenter.represent_hex_int) + +RoundTripRepresenter.add_representer(HexCapsInt, RoundTripRepresenter.represent_hex_caps_int) + +RoundTripRepresenter.add_representer(ScalarFloat, RoundTripRepresenter.represent_scalar_float) + +RoundTripRepresenter.add_representer(ScalarBoolean, RoundTripRepresenter.represent_scalar_bool) + +RoundTripRepresenter.add_representer(CommentedSeq, RoundTripRepresenter.represent_list) + +RoundTripRepresenter.add_representer(CommentedMap, RoundTripRepresenter.represent_dict) + +RoundTripRepresenter.add_representer( + CommentedOrderedMap, RoundTripRepresenter.represent_ordereddict, +) + +RoundTripRepresenter.add_representer( + collections.OrderedDict, RoundTripRepresenter.represent_ordereddict, +) + +RoundTripRepresenter.add_representer(CommentedSet, RoundTripRepresenter.represent_set) + +RoundTripRepresenter.add_representer( + TaggedScalar, RoundTripRepresenter.represent_tagged_scalar, +) + +RoundTripRepresenter.add_representer(TimeStamp, RoundTripRepresenter.represent_datetime) diff --git a/resolver.py b/resolver.py new file mode 100644 index 0000000..aa3ca11 --- /dev/null +++ b/resolver.py @@ -0,0 +1,390 @@ +# coding: utf-8 + +import re + +from typing import Any, Dict, List, Union, Text, Optional # NOQA +from ruamel.yaml.compat import VersionType # NOQA + +from ruamel.yaml.tag import Tag +from ruamel.yaml.compat import _DEFAULT_YAML_VERSION # NOQA +from ruamel.yaml.error import * # NOQA +from ruamel.yaml.nodes import MappingNode, ScalarNode, SequenceNode # NOQA +from ruamel.yaml.util import RegExp # NOQA + +__all__ = ['BaseResolver', 'Resolver', 'VersionedResolver'] + + +# fmt: off +# resolvers consist of +# - a list of applicable version +# - a tag +# - a regexp +# - a list of first characters to match +implicit_resolvers = [ + ([(1, 2)], + 'tag:yaml.org,2002:bool', + RegExp('''^(?:true|True|TRUE|false|False|FALSE)$''', re.X), + list('tTfF')), + ([(1, 1)], + 'tag:yaml.org,2002:bool', + RegExp('''^(?:y|Y|yes|Yes|YES|n|N|no|No|NO + |true|True|TRUE|false|False|FALSE + |on|On|ON|off|Off|OFF)$''', re.X), + list('yYnNtTfFoO')), + ([(1, 2)], + 'tag:yaml.org,2002:float', + RegExp('''^(?: + [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? + |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) + |[-+]?\\.[0-9_]+(?:[eE][-+][0-9]+)? + |[-+]?\\.(?:inf|Inf|INF) + |\\.(?:nan|NaN|NAN))$''', re.X), + list('-+0123456789.')), + ([(1, 1)], + 'tag:yaml.org,2002:float', + RegExp('''^(?: + [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? + |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) + |\\.[0-9_]+(?:[eE][-+][0-9]+)? + |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]* # sexagesimal float + |[-+]?\\.(?:inf|Inf|INF) + |\\.(?:nan|NaN|NAN))$''', re.X), + list('-+0123456789.')), + ([(1, 2)], + 'tag:yaml.org,2002:int', + RegExp('''^(?:[-+]?0b[0-1_]+ + |[-+]?0o?[0-7_]+ + |[-+]?[0-9_]+ + |[-+]?0x[0-9a-fA-F_]+)$''', re.X), + list('-+0123456789')), + ([(1, 1)], + 'tag:yaml.org,2002:int', + RegExp('''^(?:[-+]?0b[0-1_]+ + |[-+]?0?[0-7_]+ + |[-+]?(?:0|[1-9][0-9_]*) + |[-+]?0x[0-9a-fA-F_]+ + |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), # sexagesimal int + list('-+0123456789')), + ([(1, 2), (1, 1)], + 'tag:yaml.org,2002:merge', + RegExp('^(?:<<)$'), + ['<']), + ([(1, 2), (1, 1)], + 'tag:yaml.org,2002:null', + RegExp('''^(?: ~ + |null|Null|NULL + | )$''', re.X), + ['~', 'n', 'N', '']), + ([(1, 2), (1, 1)], + 'tag:yaml.org,2002:timestamp', + RegExp('''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? + (?:[Tt]|[ \\t]+)[0-9][0-9]? + :[0-9][0-9] :[0-9][0-9] (?:\\.[0-9]*)? + (?:[ \\t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), + list('0123456789')), + ([(1, 2), (1, 1)], + 'tag:yaml.org,2002:value', + RegExp('^(?:=)$'), + ['=']), + # The following resolver is only for documentation purposes. It cannot work + # because plain scalars cannot start with '!', '&', or '*'. + ([(1, 2), (1, 1)], + 'tag:yaml.org,2002:yaml', + RegExp('^(?:!|&|\\*)$'), + list('!&*')), +] +# fmt: on + + +class ResolverError(YAMLError): + pass + + +class BaseResolver: + + DEFAULT_SCALAR_TAG = Tag(suffix='tag:yaml.org,2002:str') + DEFAULT_SEQUENCE_TAG = Tag(suffix='tag:yaml.org,2002:seq') + DEFAULT_MAPPING_TAG = Tag(suffix='tag:yaml.org,2002:map') + + yaml_implicit_resolvers: Dict[Any, Any] = {} + yaml_path_resolvers: Dict[Any, Any] = {} + + def __init__(self: Any, loadumper: Any = None) -> None: + self.loadumper = loadumper + if self.loadumper is not None and getattr(self.loadumper, '_resolver', None) is None: + self.loadumper._resolver = self.loadumper + self._loader_version: Any = None + self.resolver_exact_paths: List[Any] = [] + self.resolver_prefix_paths: List[Any] = [] + + @property + def parser(self) -> Any: + if self.loadumper is not None: + if hasattr(self.loadumper, 'typ'): + return self.loadumper.parser + return self.loadumper._parser + return None + + @classmethod + def add_implicit_resolver_base(cls, tag: Any, regexp: Any, first: Any) -> None: + if 'yaml_implicit_resolvers' not in cls.__dict__: + # deepcopy doesn't work here + cls.yaml_implicit_resolvers = { + k: cls.yaml_implicit_resolvers[k][:] for k in cls.yaml_implicit_resolvers + } + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) + + @classmethod + def add_implicit_resolver(cls, tag: Any, regexp: Any, first: Any) -> None: + if 'yaml_implicit_resolvers' not in cls.__dict__: + # deepcopy doesn't work here + cls.yaml_implicit_resolvers = { + k: cls.yaml_implicit_resolvers[k][:] for k in cls.yaml_implicit_resolvers + } + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) + implicit_resolvers.append(([(1, 2), (1, 1)], tag, regexp, first)) + + # @classmethod + # def add_implicit_resolver(cls, tag, regexp, first): + + @classmethod + def add_path_resolver(cls, tag: Any, path: Any, kind: Any = None) -> None: + # Note: `add_path_resolver` is experimental. The API could be changed. + # `new_path` is a pattern that is matched against the path from the + # root to the node that is being considered. `node_path` elements are + # tuples `(node_check, index_check)`. `node_check` is a node class: + # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` + # matches any kind of a node. `index_check` could be `None`, a boolean + # value, a string value, or a number. `None` and `False` match against + # any _value_ of sequence and mapping nodes. `True` matches against + # any _key_ of a mapping node. A string `index_check` matches against + # a mapping value that corresponds to a scalar key which content is + # equal to the `index_check` value. An integer `index_check` matches + # against a sequence value with the index equal to `index_check`. + if 'yaml_path_resolvers' not in cls.__dict__: + cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() + new_path: List[Any] = [] + for element in path: + if isinstance(element, (list, tuple)): + if len(element) == 2: + node_check, index_check = element + elif len(element) == 1: + node_check = element[0] + index_check = True + else: + raise ResolverError(f'Invalid path element: {element!s}') + else: + node_check = None + index_check = element + if node_check is str: + node_check = ScalarNode + elif node_check is list: + node_check = SequenceNode + elif node_check is dict: + node_check = MappingNode + elif ( + node_check not in [ScalarNode, SequenceNode, MappingNode] + and not isinstance(node_check, str) + and node_check is not None + ): + raise ResolverError(f'Invalid node checker: {node_check!s}') + if not isinstance(index_check, (str, int)) and index_check is not None: + raise ResolverError(f'Invalid index checker: {index_check!s}') + new_path.append((node_check, index_check)) + if kind is str: + kind = ScalarNode + elif kind is list: + kind = SequenceNode + elif kind is dict: + kind = MappingNode + elif kind not in [ScalarNode, SequenceNode, MappingNode] and kind is not None: + raise ResolverError(f'Invalid node kind: {kind!s}') + cls.yaml_path_resolvers[tuple(new_path), kind] = tag + + def descend_resolver(self, current_node: Any, current_index: Any) -> None: + if not self.yaml_path_resolvers: + return + exact_paths = {} + prefix_paths = [] + if current_node: + depth = len(self.resolver_prefix_paths) + for path, kind in self.resolver_prefix_paths[-1]: + if self.check_resolver_prefix(depth, path, kind, current_node, current_index): + if len(path) > depth: + prefix_paths.append((path, kind)) + else: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + for path, kind in self.yaml_path_resolvers: + if not path: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + prefix_paths.append((path, kind)) + self.resolver_exact_paths.append(exact_paths) + self.resolver_prefix_paths.append(prefix_paths) + + def ascend_resolver(self) -> None: + if not self.yaml_path_resolvers: + return + self.resolver_exact_paths.pop() + self.resolver_prefix_paths.pop() + + def check_resolver_prefix( + self, depth: int, path: Any, kind: Any, current_node: Any, current_index: Any, + ) -> bool: + node_check, index_check = path[depth - 1] + if isinstance(node_check, str): + if current_node.tag != node_check: + return False + elif node_check is not None: + if not isinstance(current_node, node_check): + return False + if index_check is True and current_index is not None: + return False + if (index_check is False or index_check is None) and current_index is None: + return False + if isinstance(index_check, str): + if not ( + isinstance(current_index, ScalarNode) and index_check == current_index.value + ): + return False + elif isinstance(index_check, int) and not isinstance(index_check, bool): + if index_check != current_index: + return False + return True + + def resolve(self, kind: Any, value: Any, implicit: Any) -> Any: + if kind is ScalarNode and implicit[0]: + if value == "": + resolvers = self.yaml_implicit_resolvers.get("", []) + else: + resolvers = self.yaml_implicit_resolvers.get(value[0], []) + resolvers += self.yaml_implicit_resolvers.get(None, []) + for tag, regexp in resolvers: + if regexp.match(value): + return Tag(suffix=tag) + implicit = implicit[1] + if bool(self.yaml_path_resolvers): + exact_paths = self.resolver_exact_paths[-1] + if kind in exact_paths: + return Tag(suffix=exact_paths[kind]) + if None in exact_paths: + return Tag(suffix=exact_paths[None]) + if kind is ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is MappingNode: + return self.DEFAULT_MAPPING_TAG + + @property + def processing_version(self) -> Any: + return None + + +class Resolver(BaseResolver): + pass + + +for ir in implicit_resolvers: + if (1, 2) in ir[0]: + Resolver.add_implicit_resolver_base(*ir[1:]) + + +class VersionedResolver(BaseResolver): + """ + contrary to the "normal" resolver, the smart resolver delays loading + the pattern matching rules. That way it can decide to load 1.1 rules + or the (default) 1.2 rules, that no longer support octal without 0o, sexagesimals + and Yes/No/On/Off booleans. + """ + + def __init__( + self, version: Optional[VersionType] = None, loader: Any = None, loadumper: Any = None, + ) -> None: + if loader is None and loadumper is not None: + loader = loadumper + BaseResolver.__init__(self, loader) + self._loader_version = self.get_loader_version(version) + self._version_implicit_resolver: Dict[Any, Any] = {} + + def add_version_implicit_resolver( + self, version: VersionType, tag: Any, regexp: Any, first: Any, + ) -> None: + if first is None: + first = [None] + impl_resolver = self._version_implicit_resolver.setdefault(version, {}) + for ch in first: + impl_resolver.setdefault(ch, []).append((tag, regexp)) + + def get_loader_version(self, version: Optional[VersionType]) -> Any: + if version is None or isinstance(version, tuple): + return version + if isinstance(version, list): + return tuple(version) + # assume string + assert isinstance(version, str) + return tuple(map(int, version.split('.'))) + + @property + def versioned_resolver(self) -> Any: + """ + select the resolver based on the version we are parsing + """ + version = self.processing_version + if isinstance(version, str): + version = tuple(map(int, version.split('.'))) + if version not in self._version_implicit_resolver: + for x in implicit_resolvers: + if version in x[0]: + self.add_version_implicit_resolver(version, x[1], x[2], x[3]) + return self._version_implicit_resolver[version] + + def resolve(self, kind: Any, value: Any, implicit: Any) -> Any: + if kind is ScalarNode and implicit[0]: + if value == "": + resolvers = self.versioned_resolver.get("", []) + else: + resolvers = self.versioned_resolver.get(value[0], []) + resolvers += self.versioned_resolver.get(None, []) + for tag, regexp in resolvers: + if regexp.match(value): + return Tag(suffix=tag) + implicit = implicit[1] + if bool(self.yaml_path_resolvers): + exact_paths = self.resolver_exact_paths[-1] + if kind in exact_paths: + return Tag(suffix=exact_paths[kind]) + if None in exact_paths: + return Tag(suffix=exact_paths[None]) + if kind is ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is MappingNode: + return self.DEFAULT_MAPPING_TAG + + @property + def processing_version(self) -> Any: + try: + version = self.loadumper._scanner.yaml_version + except AttributeError: + try: + if hasattr(self.loadumper, 'typ'): + version = self.loadumper.version + else: + version = self.loadumper._serializer.use_version # dumping + except AttributeError: + version = None + if version is None: + version = self._loader_version + if version is None: + version = _DEFAULT_YAML_VERSION + return version diff --git a/ruamel.yaml.egg-info/PKG-INFO b/ruamel.yaml.egg-info/PKG-INFO new file mode 100644 index 0000000..b4e9d9e --- /dev/null +++ b/ruamel.yaml.egg-info/PKG-INFO @@ -0,0 +1,430 @@ +Metadata-Version: 2.1 +Name: ruamel.yaml +Version: 0.18.5 +Summary: ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order +Author: Anthon van der Neut +Author-email: a.van.der.neut@ruamel.eu +License: MIT license +Project-URL: Home, https://sourceforge.net/p/ruamel-yaml/ +Project-URL: Source, https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/ +Project-URL: Tracker, https://sourceforge.net/p/ruamel-yaml/tickets/ +Project-URL: Documentation, https://yaml.readthedocs.io/ +Keywords: yaml 1.2 parser round-trip preserve quotes order config +Classifier: Development Status :: 4 - Beta +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Text Processing :: Markup +Classifier: Typing :: Typed +Requires-Python: >=3.7 +Description-Content-Type: text/markdown; charset=UTF-8; variant=CommonMark +Provides-Extra: jinja2 +Provides-Extra: docs +License-File: LICENSE + +# ruamel.yaml + +`ruamel.yaml` is a YAML 1.2 loader/dumper package for Python. +<table class="docutils"> + <tr> <td>version</td> + <td>0.18.5</td> + </tr> + <tr> <td>updated</td> + <td>2023-11-03</td> + </tr> + <tr> <td>documentation</td> + <td><a href="https://yaml.readthedocs.io">https://yaml.readthedocs.io</a></td> + </tr> + <tr> <td>repository</td> + <td><a href="https://sourceforge.net/projects/ruamel-yaml">https://sourceforge.net/projects/ruamel-yaml</a></td> + </tr> + <tr> <td>pypi</td> + <td><a href="https://pypi.org/project/ruamel.yaml">https://pypi.org/project/ruamel.yaml</a></td> + </tr> +</table> + +As announced, in 0.18.0, the old PyYAML functions have been deprecated. +(`scan`, `parse`, `compose`, `load`, `emit`, `serialize`, `dump` and their variants +(`_all`, `safe_`, `round_trip_`, etc)). If you only read this after your program has +stopped working: I am sorry to hear that, but that also means you, or the person +developing your program, has not tested with warnings on (which is the recommendation +in PEP 565, and e.g. defaultin when using `pytest`). If you have troubles, explicitly use +``` +pip install "ruamel.yaml<0.18.0" +``` +or put something to that effects in your requirments, to give yourself +some time to solve the issue. + +There will be at least one more potentially breaking change in the 0.18 series: `YAML(typ='unsafe')` +now has a pending deprecation warning and is going to be deprecated, probably before the end of 2023. +If you only use it to dump, please use the new `YAML(typ='full')`, the result of that can be *safely* +loaded with a default instance `YAML()`, as that will get you inspectable, tagged, scalars, instead of +executed Python functions/classes. (You should probably add constructors for what you actually need, +but I do consider adding a `ruamel.yaml.unsafe` package that will re-add the `typ='unsafe'` option. +*Please adjust/pin your dependencies accordingly if necessary.* + + +There seems to be a CVE on `ruamel.yaml`, stating that the `load()` function could be abused +because of unchecked input. `load()` was never the default function (that was `round_trip_load()` +before the new API came into existence`. So the creator of that CVE was ill informed and +probably lazily assumed that since `ruamel.yaml` is a derivative of PyYAML (for which +a similar CVE exists), the same problem would still exist, without checking. +So the CVE was always inappriate, now just more so, as the call +to the function `load()` with any input will terminate your program with an error message. If you +(have to) care about such things as this CVE, my recommendation is to stop using Python +completely, as `pickle.load()` can be abused in the same way as `load()` (and like unlike `load()` +is only documented to be unsafe, without development-time warning. + +Version 0.17.21 was the last one tested to be working on Python 3.5 and 3.6<BR> +The 0.16.13 release was the last that was tested to be working on Python 2.7. + + +There are two extra plug-in packages +(`ruamel.yaml.bytes` and `ruamel.yaml.string`) +for those not wanting to do the streaming to a +`io.BytesIO/StringIO` buffer themselves. + +If your package uses `ruamel.yaml` and is not listed on PyPI, drop me an +email, preferably with some information on how you use the package (or a +link to the repository) and I'll keep you informed when the status of +the API is stable enough to make the transition. + +<pre> + <a href="overview/#overview">Overview</a> + + <a href="install/#installing">Installing</a> + <a href="install/#optional-requirements">Optional requirements</a> + + <a href="basicuse/#basic-usage">Basic Usage</a> + <a href="basicuse/#load-and-dump">Load and dump </a> + <a href="basicuse/#more-examples">More examples</a> + + <a href="dumpcls/#working-with-python-classes">Working with Python classes</a> + <a href="dumpcls/#dumping-python-classes">Dumping Python classes</a> + <a href="dumpcls/#dataclass">Dataclass</a> + + <a href="detail/#details">Details</a> + <a href="detail/#indentation-of-block-sequences">Indentation of block sequences</a> + <a href="detail/#inconsistently-indented-yaml">Inconsistently indented YAML</a> + <a href="detail/#indenting-using-typsafe">Indenting using `typ="safe"`</a> + <a href="detail/#positioning-in-top-level-mappings-prefixing">Positioning ':' in top level mappings, prefixing ':'</a> + <a href="detail/#document-version-support">Document version support</a> + <a href="detail/#round-trip-including-comments">Round trip including comments</a> + <a href="detail/#config-file-formats">Config file formats</a> + <a href="detail/#extending">Extending</a> + <a href="detail/#smartening">Smartening</a> + + <a href="example/#examples">Examples</a> + <a href="example/#output-of-dump-as-a-string">Output of `dump()` as a string</a> + + <a href="api/#departure-from-previous-api">Departure from previous API</a> + <a href="api/#loading">Loading</a> + <a href="api/#duplicate-keys">Duplicate keys</a> + <a href="api/#dumping-a-multi-document-yaml-stream">Dumping a multi-document YAML stream</a> + <a href="api/#dumping">Dumping</a> + <a href="api/#controls">Controls</a> + <a href="api/#transparent-usage-of-new-and-old-api">Transparent usage of new and old API</a> + <a href="api/#reason-for-api-change">Reason for API change</a> + + <a href="pyyaml/#differences-with-pyyaml">Differences with PyYAML</a> + <a href="pyyaml/#defaulting-to-yaml-12-support">Defaulting to YAML 1.2 support</a> + <a href="pyyaml/#py2py3-reintegration">PY2/PY3 reintegration</a> + <a href="pyyaml/#fixes">Fixes</a> + <a href="pyyaml/#testing">Testing</a> + <a href="pyyaml/#api">API</a> + + <a href="contributing/#contributing">Contributing</a> + <a href="contributing/#documentation">Documentation</a> + <a href="contributing/#code">Code</a> + <a href="contributing/#flake">Flake</a> + <a href="contributing/#toxpytest">Tox/pytest</a> + <a href="contributing/#typingmypy">Typing/mypy</a> + <a href="contributing/#generated-files">Generated files</a> + <a href="contributing/#vulnerabilities">Vulnerabilities</a> +</pre> + + +[![image](https://readthedocs.org/projects/yaml/badge/?version=latest)](https://yaml.readthedocs.org/en/latest?badge=latest)[![image](https://bestpractices.coreinfrastructure.org/projects/1128/badge)](https://bestpractices.coreinfrastructure.org/projects/1128) +[![image](https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/_doc/_static/license.svg?format=raw)](https://opensource.org/licenses/MIT) +[![image](https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/_doc/_static/pypi.svg?format=raw)](https://pypi.org/project/ruamel.yaml/) +[![image](https://sourceforge.net/p/oitnb/code/ci/default/tree/_doc/_static/oitnb.svg?format=raw)](https://pypi.org/project/oitnb/) +[![image](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/) + +# ChangeLog + +0.18.5 (2023-11-03): + +- there is some indication that dependent packages have been pinned to use specific (tested) and just install the latest even in Python versions that have end-of-life + +0.18.4 (2023-11-01): + +- YAML() instance has a `doc_infos` attribute which is a cumulative list of DocInfo instances (one for `load()`, one per document for `load_all()`). DocInfo instances contain version information (requested, directive) and tag directive information +- fix issue that the YAML instance tags attribute was not reset between documents, resulting in mixing of tag directives of multiple documents. Now only provides tag directive information on latest document after loading. This means tags for dumping must be set **again** after a document is loaded with the same instance. (because of this tags will be removed in a favour of a different mechanism in the future) +- fix issue with multiple document intermixing YAML 1.2 and YAML 1.1, the VersionedResolver now resets +- fix issue with disappearing comment when next token was Tag (still can't have both a comment before a tag and after a tag, before node) + +0.18.3 (2023-10-29): + +- fix issue with spurious newline on first item after comment + nested block sequence +- additional links in the metadata on PyPI (Reported, with pointers how to fix, by [Sorin](https://sourceforge.net/u/ssbarnea/profile/)). + +0.18.2 (2023-10-24): + +- calling the deprecated functions now raises an `AttributeError` with the, somewhat more informative, orginal warning message. Instead of calling `sys.exit(1)` + +0.18.1 (2023-10-24): + +- calling the deprecated functions now always displays the warning message. (reported by [Trend Lloyd](https://sourceforge.net/u/lathiat2/profile/)) + +0.18.0 (2023-10-23): + +- the **functions** `scan`, `parse`, `compose`, `load`, `emit`, `serialize`, `dump` and their variants (`_all`, `safe_`, `round_trip_`, etc) have been deprecated (the same named **methods** on `YAML()` instances are, of course, still there. +- `YAML(typ='unsafe')` now issues a `PendingDeprecationWarning`. This will become deprecated in the 0.18 series +(probably before the end of 2023). +You can use `YAML(typ='full')` to dump unregistered Python classes/functions. +For loading you'll have to register your classes/functions +if you want the old, unsafe, functionality. You can still load any tag, like `!!python/name:posix.system', **safely** +with the (default) round-trip parser. +- fix for `bytes-like object is required not 'str' while dumping binary streams`. This was reported, analysed and a fix provided by [Vit Zikmund](https://sourceforge.net/u/tlwhitec/profile/) + +0.17.40 (2023-10-20): + +- flow style sets are now preserved ( `!!set {a, b, c} )`. Any values specified when loading are dropped, including `!!null ""`. +- potential workaround for issue 484: the long_description_content_type including the variant specification `CommonMark` +can result in problems on Azure. If you can install from `.tar.gz` using +`RUAMEL_NO_LONG_DESCRIPTION=1 pip install ruamel.yaml --no-binary :all:` then the long description, and its +offending type, are nog included (in the METADATA). +(Reported by [Coury Ditch](https://sourceforge.net/u/cmditch/profile/)) +- links in documentation update (reported by [David Hoese](https://sourceforge.net/u/daveydave400/profile/)) +- Added some `__repr__` for internally used classes + +0.17.39 (2023-10-19): + +- update README generation, no code changes + +0.17.36 (2023-10-19): + +- fixed issue 480, dumping of a loaded empty flow-style mapping with comment failed (Reported by [Stéphane Brunner](https://sourceforge.net/u/stbrunner/profile/)) +- fixed issue 482, caused by DEFAULT_MAPPING_TAG having changes to being a `Tag()` instance, not a string (reported by [yan12125](https://sourceforge.net/u/yan12125/profile/)) +- updated documentation to use mkdocs + +0.17.35 (2023-10-04): + +- support for loading dataclasses with `InitVar` variables (some special coding was necessary to get the, unexecpected, default value in the corresponding instance attribute ( example of usage in [this question](https://stackoverflow.com/q/77228378/1307905)) + +0.17.34 (2023-10-03): + +- Python 3.12 also loads C version when using `typ='safe'` +- initial support for loading invoking +`__post_init__()` on dataclasses that have that +method after loading a registered dataclass. +(Originally +[asked](https://stackoverflow.com/q/51529458/1307905) on +Stackoverflow by +[nyanpasu64](https://stackoverflow.com/users/2683842/nyanpasu64) +and as +[ticket](https://sourceforge.net/p/ruamel-yaml/tickets/355/) by +[Patrick Lehmann](https://sourceforge.net/u/paebbels/profile/)) + +``` +@yaml.register_class +@dataclass +class ... +``` + +0.17.33 (2023-09-28): + +- added `flow_seq_start`, `flow_seq_end`, `flow_seq_separator`, `flow_map_start`, `flow_map_end`, `flow_map_separator` **class** attributes to the `Emitter` class so flow style output can more easily be influenced (based on [this answer](https://stackoverflow.com/a/76547814/1307905) on a StackOverflow question by [Huw Walters](https://stackoverflow.com/users/291033/huw-walters)). + +0.17.32 (2023-06-17): + +- fix issue with scanner getting stuck in infinite loop + +0.17.31 (2023-05-31): + +- added tag.setter on `ScalarEvent` and on `Node`, that takes either a `Tag` instance, or a str (reported by [Sorin Sbarnea](https://sourceforge.net/u/ssbarnea/profile/)) + +0.17.30 (2023-05-30): + +- fix issue 467, caused by Tag instances not being hashable (reported by [Douglas Raillard](https://bitbucket.org/%7Bcf052d92-a278-4339-9aa8-de41923bb556%7D/)) + +0.17.29 (2023-05-30): + +- changed the internals of the tag property from a string to a class which allows for preservation of the original handle and suffix. This should result in better results using documents with %TAG directives, as well as preserving URI escapes in tag suffixes. + +0.17.28 (2023-05-26): + +- fix for issue 464: documents ending with document end marker +without final newline fail to load (reported by [Mariusz +Rusiniak](https://sourceforge.net/u/r2dan/profile/)) + +0.17.27 (2023-05-25): + +- fix issue with inline mappings as value for merge keys (reported by Sirish on [StackOverflow](https://stackoverflow.com/q/76331049/1307905)) +- fix for 468, error inserting after accessing merge attribute on `CommentedMap` (reported by [Bastien gerard](https://sourceforge.net/u/bagerard/)) +- fix for issue 461 pop + insert on same `CommentedMap` key throwing error (reported by [John Thorvald Wodder II](https://sourceforge.net/u/jwodder/profile/)) + +0.17.26 (2023-05-09): + +- fix for error on edge cage for issue 459 + +0.17.25 (2023-05-09): + +- fix for regression while dumping wrapped strings with too many backslashes removed (issue 459, reported by [Lele Gaifax](https://sourceforge.net/u/lele/profile/)) + +0.17.24 (2023-05-06): + +- rewrite of `CommentedMap.insert()`. If you have a merge key in the YAML document for the mapping you insert to, the position value should be the one as you look at the YAML input. This fixes issue 453 where other keys of a merged in mapping would show up after an insert (reported by [Alex Miller](https://sourceforge.net/u/millerdevel/profile/)). It also fixes a call to `.insert()` resulting into the merge key to move to be the first key if it wasn't already and it is also now possible to insert a key before a merge key (even if the fist key in the mapping). +- fix (in the pure Python implementation including default) for issue 447. (reported by [Jack Cherng](https://sourceforge.net/u/jfcherng/profile/), also brought up by brent on [StackOverflow](https://stackoverflow.com/q/40072485/1307905)) + +0.17.23 (2023-05-05): + +- fix 458, error on plain scalars starting with word longer than width. (reported by [Kyle Larose](https://sourceforge.net/u/klarose/profile/)) +- fix for `.update()` no longer correctly handling keyword arguments (reported by John Lin on [StackOverflow]( https://stackoverflow.com/q/76089100/1307905)) +- fix issue 454: high Unicode (emojis) in quoted strings always +escaped (reported by [Michal +Čihař](https://sourceforge.net/u/nijel/profile/) based on a +question on StackOverflow). +- fix issue with emitter conservatively inserting extra backslashes in wrapped quoted strings (reported by thebenman on [StackOverflow](https://stackoverflow.com/q/75631454/1307905)) + +0.17.22 (2023-05-02): + +- fix issue 449 where the second exclamation marks got URL encoded (reported and fixing PR provided by [John Stark](https://sourceforge.net/u/jods/profile/)) +- fix issue with indent != 2 and literal scalars with empty first line (reported by wrdis on [StackOverflow](https://stackoverflow.com/q/75584262/1307905)) +- updated `__repr__` of CommentedMap, now that Python's dict is ordered -> no more `ordereddict(list-of-tuples)` +- merge MR 4, handling OctalInt in YAML 1.1 (provided by [Jacob Floyd](https://sourceforge.net/u/cognifloyd/profile/)) +- fix loading of `!!float 42` (reported by Eric on [Stack overflow](https://stackoverflow.com/a/71555107/1307905)) +- line numbers are now set on `CommentedKeySeq` and `CommentedKeyMap` (which are created if you have a sequence resp. mapping as the key in a mapping) +- plain scalars: put single words longer than width on a line of +their own, instead of after the previous line (issue 427, reported +by [Antoine +Cotten](https://sourceforge.net/u/antoineco/profile/)). Caveat: +this currently results in a space ending the previous line. +- fix for folded scalar part of 421: comments after ">" on first +line of folded scalars are now preserved (as were those in the +same position on literal scalars). Issue reported by Jacob Floyd. +- added stacklevel to warnings +- typing changed from Py2 compatible comments to Py3, removed various Py2-isms + +0.17.21 (2022-02-12): + +- fix bug in calling `.compose()` method with `pathlib.Path` instance. + +0.17.20 (2022-01-03): + +- fix error in microseconds while rounding datetime fractions >= 9999995 (reported by [Luis Ferreira](https://sourceforge.net/u/ljmf00/)) + +0.17.19 (2021-12-26): + +- fix mypy problems (reported by [Arun](https://sourceforge.net/u/arunppsg/profile/)) + +0.17.18 (2021-12-24): + +- copy-paste error in folded scalar comment attachment (reported by [Stephan Geulette](https://sourceforge.net/u/sgeulette/profile/)) +- fix 411, indent error comment between key empty seq value (reported by [Guillermo Julián](https://sourceforge.net/u/gjulianm/profile/)) + +0.17.17 (2021-10-31): + +- extract timestamp matching/creation to util + +0.17.16 (2021-08-28): + +- 398 also handle issue 397 when comment is newline + +0.17.15 (2021-08-28): + +- fix issue 397, insert comment before key when a comment between key and value exists (reported by [Bastien gerard](https://sourceforge.net/u/bagerard/)) + +0.17.14 (2021-08-25): + +- fix issue 396, inserting key/val in merged-in dictionary (reported by [Bastien gerard](https://sourceforge.net/u/bagerard/)) + +0.17.13 (2021-08-21): + +- minor fix in attr handling + +0.17.12 (2021-08-21): + +- fix issue with anchor on registered class not preserved and those classes using package attrs with `@attr.s()` (both reported by [ssph](https://sourceforge.net/u/sph/)) + +0.17.11 (2021-08-19): + +- fix error baseclass for `DuplicateKeyError` (reported by [Łukasz Rogalski](https://sourceforge.net/u/lrogalski/)) +- fix typo in reader error message, causing `KeyError` during reader error (reported by [MTU](https://sourceforge.net/u/mtu/)) + +0.17.10 (2021-06-24): + +- fix issue 388, token with old comment structure != two elements (reported by [Dimitrios Bariamis](https://sourceforge.net/u/dbdbc/)) + +0.17.9 (2021-06-10): + +- fix issue with updating CommentedMap (reported by sri on [StackOverflow](https://stackoverflow.com/q/67911659/1307905)) + +0.17.8 (2021-06-09): + +- fix for issue 387 where templated anchors on tagged object did get set resulting in potential id reuse. (reported by [Artem Ploujnikov](https://sourceforge.net/u/flexthink/)) + +0.17.7 (2021-05-31): + +- issue 385 also affected other deprecated loaders (reported via email by Oren Watson) + +0.17.6 (2021-05-31): + +- merged type annotations update provided by [Jochen Sprickerhof](https://sourceforge.net/u/jspricke/) +- fix for issue 385: deprecated round_trip_loader function not +working (reported by [Mike +Gouline](https://sourceforge.net/u/gouline/)) +- wasted a few hours getting rid of mypy warnings/errors + +0.17.5 (2021-05-30): + +- fix for issue 384 `!!set` with aliased entry resulting in broken YAML on rt reported by [William Kimball](https://sourceforge.net/u/william303/)) + +0.17.4 (2021-04-07): + +- prevent (empty) comments from throwing assertion error (issue 351 reported by [William Kimball](https://sourceforge.net/u/william303/)) comments (or empty line) will be dropped + +0.17.3 (2021-04-07): + +- fix for issue 382 caused by an error in a format string (reported by [William Kimball](https://sourceforge.net/u/william303/)) +- allow expansion of aliases by setting `yaml.composer.return_alias = lambda s: copy.deepcopy(s)` +(as per [Stackoverflow answer](https://stackoverflow.com/a/66983530/1307905)) + +0.17.2 (2021-03-29): + +- change -py2.py3-none-any.whl to -py3-none-any.whl, and remove 0.17.1 + +0.17.1 (2021-03-29): + +- added 'Programming Language :: Python :: 3 :: Only', and +removing 0.17.0 from PyPI (reported by [Alasdair +Nicol](https://sourceforge.net/u/alasdairnicol/)) + +0.17.0 (2021-03-26): + +- removed because of incomplete classifiers +- this release no longer supports Python 2.7, most if not all Python 2 specific code is removed. The 0.17.x series is the last to support Python 3.5 (this also allowed for removal of the dependency on `ruamel.std.pathlib`) +- remove Python2 specific code branches and adaptations (u-strings) +- prepare % code for f-strings using `_F` +- allow PyOxidisation ([issue 324](https://sourceforge.net/p/ruamel-yaml/tickets/324/) resp. [issue 171](https://github.com/indygreg/PyOxidizer/issues/171)) +- replaced Python 2 compatible enforcement of keyword arguments with '*' +- the old top level *functions* `load`, `safe_load`, `round_trip_load`, `dump`, `safe_dump`, `round_trip_dump`, `scan`, `parse`, `compose`, `emit`, `serialize` as well as their `_all` variants for multi-document streams, now issue a `PendingDeprecationning` (e.g. when run from pytest, but also Python is started with `-Wd`). Use the methods on `YAML()`, which have been extended. +- fix for issue 376: indentation changes could put literal/folded +scalar to start before the `#` column of a following comment. +Effectively making the comment part of the scalar in the output. +(reported by [Bence Nagy](https://sourceforge.net/u/underyx/)) + +------------------------------------------------------------------------ + +For older changes see the file +[CHANGES](https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree/CHANGES) diff --git a/ruamel.yaml.egg-info/SOURCES.txt b/ruamel.yaml.egg-info/SOURCES.txt new file mode 100644 index 0000000..1b1dbe7 --- /dev/null +++ b/ruamel.yaml.egg-info/SOURCES.txt @@ -0,0 +1,44 @@ +CHANGES +LICENSE +MANIFEST.in +README.md +pyproject.toml +setup.py +./LICENSE +./__init__.py +./anchor.py +./comments.py +./compat.py +./composer.py +./configobjwalker.py +./constructor.py +./cyaml.py +./docinfo.py +./dumper.py +./emitter.py +./error.py +./events.py +./loader.py +./main.py +./nodes.py +./parser.py +./py.typed +./reader.py +./representer.py +./resolver.py +./scalarbool.py +./scalarfloat.py +./scalarint.py +./scalarstring.py +./scanner.py +./serializer.py +./tag.py +./timestamp.py +./tokens.py +./util.py +ruamel.yaml.egg-info/PKG-INFO +ruamel.yaml.egg-info/SOURCES.txt +ruamel.yaml.egg-info/dependency_links.txt +ruamel.yaml.egg-info/not-zip-safe +ruamel.yaml.egg-info/requires.txt +ruamel.yaml.egg-info/top_level.txt
\ No newline at end of file diff --git a/ruamel.yaml.egg-info/dependency_links.txt b/ruamel.yaml.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/ruamel.yaml.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/ruamel.yaml.egg-info/not-zip-safe b/ruamel.yaml.egg-info/not-zip-safe new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/ruamel.yaml.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/ruamel.yaml.egg-info/requires.txt b/ruamel.yaml.egg-info/requires.txt new file mode 100644 index 0000000..c8c22f0 --- /dev/null +++ b/ruamel.yaml.egg-info/requires.txt @@ -0,0 +1,10 @@ + +[:platform_python_implementation=="CPython" and python_version<"3.13"] +ruamel.yaml.clib>=0.2.7 + +[docs] +ryd +mercurial>5.7 + +[jinja2] +ruamel.yaml.jinja2>=0.2 diff --git a/ruamel.yaml.egg-info/top_level.txt b/ruamel.yaml.egg-info/top_level.txt new file mode 100644 index 0000000..282b116 --- /dev/null +++ b/ruamel.yaml.egg-info/top_level.txt @@ -0,0 +1 @@ +ruamel diff --git a/scalarbool.py b/scalarbool.py new file mode 100644 index 0000000..083d3cb --- /dev/null +++ b/scalarbool.py @@ -0,0 +1,42 @@ +# coding: utf-8 + +""" +You cannot subclass bool, and this is necessary for round-tripping anchored +bool values (and also if you want to preserve the original way of writing) + +bool.__bases__ is type 'int', so that is what is used as the basis for ScalarBoolean as well. + +You can use these in an if statement, but not when testing equivalence +""" + +from ruamel.yaml.anchor import Anchor + +from typing import Text, Any, Dict, List # NOQA + +__all__ = ['ScalarBoolean'] + + +class ScalarBoolean(int): + def __new__(cls: Any, *args: Any, **kw: Any) -> Any: + anchor = kw.pop('anchor', None) + b = int.__new__(cls, *args, **kw) + if anchor is not None: + b.yaml_set_anchor(anchor, always_dump=True) + return b + + @property + def anchor(self) -> Any: + if not hasattr(self, Anchor.attrib): + setattr(self, Anchor.attrib, Anchor()) + return getattr(self, Anchor.attrib) + + def yaml_anchor(self, any: bool = False) -> Any: + if not hasattr(self, Anchor.attrib): + return None + if any or self.anchor.always_dump: + return self.anchor + return None + + def yaml_set_anchor(self, value: Any, always_dump: bool = False) -> None: + self.anchor.value = value + self.anchor.always_dump = always_dump diff --git a/scalarfloat.py b/scalarfloat.py new file mode 100644 index 0000000..10b4c29 --- /dev/null +++ b/scalarfloat.py @@ -0,0 +1,103 @@ +# coding: utf-8 + +import sys +from ruamel.yaml.anchor import Anchor + +from typing import Text, Any, Dict, List # NOQA + +__all__ = ['ScalarFloat', 'ExponentialFloat', 'ExponentialCapsFloat'] + + +class ScalarFloat(float): + def __new__(cls: Any, *args: Any, **kw: Any) -> Any: + width = kw.pop('width', None) + prec = kw.pop('prec', None) + m_sign = kw.pop('m_sign', None) + m_lead0 = kw.pop('m_lead0', 0) + exp = kw.pop('exp', None) + e_width = kw.pop('e_width', None) + e_sign = kw.pop('e_sign', None) + underscore = kw.pop('underscore', None) + anchor = kw.pop('anchor', None) + v = float.__new__(cls, *args, **kw) + v._width = width + v._prec = prec + v._m_sign = m_sign + v._m_lead0 = m_lead0 + v._exp = exp + v._e_width = e_width + v._e_sign = e_sign + v._underscore = underscore + if anchor is not None: + v.yaml_set_anchor(anchor, always_dump=True) + return v + + def __iadd__(self, a: Any) -> Any: # type: ignore + return float(self) + a + x = type(self)(self + a) + x._width = self._width + x._underscore = self._underscore[:] if self._underscore is not None else None # NOQA + return x + + def __ifloordiv__(self, a: Any) -> Any: # type: ignore + return float(self) // a + x = type(self)(self // a) + x._width = self._width + x._underscore = self._underscore[:] if self._underscore is not None else None # NOQA + return x + + def __imul__(self, a: Any) -> Any: # type: ignore + return float(self) * a + x = type(self)(self * a) + x._width = self._width + x._underscore = self._underscore[:] if self._underscore is not None else None # NOQA + x._prec = self._prec # check for others + return x + + def __ipow__(self, a: Any) -> Any: # type: ignore + return float(self) ** a + x = type(self)(self ** a) + x._width = self._width + x._underscore = self._underscore[:] if self._underscore is not None else None # NOQA + return x + + def __isub__(self, a: Any) -> Any: # type: ignore + return float(self) - a + x = type(self)(self - a) + x._width = self._width + x._underscore = self._underscore[:] if self._underscore is not None else None # NOQA + return x + + @property + def anchor(self) -> Any: + if not hasattr(self, Anchor.attrib): + setattr(self, Anchor.attrib, Anchor()) + return getattr(self, Anchor.attrib) + + def yaml_anchor(self, any: bool = False) -> Any: + if not hasattr(self, Anchor.attrib): + return None + if any or self.anchor.always_dump: + return self.anchor + return None + + def yaml_set_anchor(self, value: Any, always_dump: bool = False) -> None: + self.anchor.value = value + self.anchor.always_dump = always_dump + + def dump(self, out: Any = sys.stdout) -> None: + out.write( + f'ScalarFloat({self}| w:{self._width}, p:{self._prec}, ' # type: ignore + f's:{self._m_sign}, lz:{self._m_lead0}, _:{self._underscore}|{self._exp}' + f', w:{self._e_width}, s:{self._e_sign})\n', + ) + + +class ExponentialFloat(ScalarFloat): + def __new__(cls, value: Any, width: Any = None, underscore: Any = None) -> Any: + return ScalarFloat.__new__(cls, value, width=width, underscore=underscore) + + +class ExponentialCapsFloat(ScalarFloat): + def __new__(cls, value: Any, width: Any = None, underscore: Any = None) -> Any: + return ScalarFloat.__new__(cls, value, width=width, underscore=underscore) diff --git a/scalarint.py b/scalarint.py new file mode 100644 index 0000000..af798b7 --- /dev/null +++ b/scalarint.py @@ -0,0 +1,122 @@ +# coding: utf-8 + +from ruamel.yaml.anchor import Anchor + +from typing import Text, Any, Dict, List # NOQA + +__all__ = ['ScalarInt', 'BinaryInt', 'OctalInt', 'HexInt', 'HexCapsInt', 'DecimalInt'] + + +class ScalarInt(int): + def __new__(cls: Any, *args: Any, **kw: Any) -> Any: + width = kw.pop('width', None) + underscore = kw.pop('underscore', None) + anchor = kw.pop('anchor', None) + v = int.__new__(cls, *args, **kw) + v._width = width + v._underscore = underscore + if anchor is not None: + v.yaml_set_anchor(anchor, always_dump=True) + return v + + def __iadd__(self, a: Any) -> Any: # type: ignore + x = type(self)(self + a) + x._width = self._width # type: ignore + x._underscore = ( # type: ignore + self._underscore[:] if self._underscore is not None else None # type: ignore + ) # NOQA + return x + + def __ifloordiv__(self, a: Any) -> Any: # type: ignore + x = type(self)(self // a) + x._width = self._width # type: ignore + x._underscore = ( # type: ignore + self._underscore[:] if self._underscore is not None else None # type: ignore + ) # NOQA + return x + + def __imul__(self, a: Any) -> Any: # type: ignore + x = type(self)(self * a) + x._width = self._width # type: ignore + x._underscore = ( # type: ignore + self._underscore[:] if self._underscore is not None else None # type: ignore + ) # NOQA + return x + + def __ipow__(self, a: Any) -> Any: # type: ignore + x = type(self)(self ** a) + x._width = self._width # type: ignore + x._underscore = ( # type: ignore + self._underscore[:] if self._underscore is not None else None # type: ignore + ) # NOQA + return x + + def __isub__(self, a: Any) -> Any: # type: ignore + x = type(self)(self - a) + x._width = self._width # type: ignore + x._underscore = ( # type: ignore + self._underscore[:] if self._underscore is not None else None # type: ignore + ) # NOQA + return x + + @property + def anchor(self) -> Any: + if not hasattr(self, Anchor.attrib): + setattr(self, Anchor.attrib, Anchor()) + return getattr(self, Anchor.attrib) + + def yaml_anchor(self, any: bool = False) -> Any: + if not hasattr(self, Anchor.attrib): + return None + if any or self.anchor.always_dump: + return self.anchor + return None + + def yaml_set_anchor(self, value: Any, always_dump: bool = False) -> None: + self.anchor.value = value + self.anchor.always_dump = always_dump + + +class BinaryInt(ScalarInt): + def __new__( + cls, value: Any, width: Any = None, underscore: Any = None, anchor: Any = None, + ) -> Any: + return ScalarInt.__new__(cls, value, width=width, underscore=underscore, anchor=anchor) + + +class OctalInt(ScalarInt): + def __new__( + cls, value: Any, width: Any = None, underscore: Any = None, anchor: Any = None, + ) -> Any: + return ScalarInt.__new__(cls, value, width=width, underscore=underscore, anchor=anchor) + + +# mixed casing of A-F is not supported, when loading the first non digit +# determines the case + + +class HexInt(ScalarInt): + """uses lower case (a-f)""" + + def __new__( + cls, value: Any, width: Any = None, underscore: Any = None, anchor: Any = None, + ) -> Any: + return ScalarInt.__new__(cls, value, width=width, underscore=underscore, anchor=anchor) + + +class HexCapsInt(ScalarInt): + """uses upper case (A-F)""" + + def __new__( + cls, value: Any, width: Any = None, underscore: Any = None, anchor: Any = None, + ) -> Any: + return ScalarInt.__new__(cls, value, width=width, underscore=underscore, anchor=anchor) + + +class DecimalInt(ScalarInt): + """needed if anchor""" + + def __new__( + cls, value: Any, width: Any = None, underscore: Any = None, anchor: Any = None, + ) -> Any: + return ScalarInt.__new__(cls, value, width=width, underscore=underscore, anchor=anchor) diff --git a/scalarstring.py b/scalarstring.py new file mode 100644 index 0000000..30f4fde --- /dev/null +++ b/scalarstring.py @@ -0,0 +1,140 @@ +# coding: utf-8 + +from ruamel.yaml.anchor import Anchor + +from typing import Text, Any, Dict, List # NOQA +from ruamel.yaml.compat import SupportsIndex + +__all__ = [ + 'ScalarString', + 'LiteralScalarString', + 'FoldedScalarString', + 'SingleQuotedScalarString', + 'DoubleQuotedScalarString', + 'PlainScalarString', + # PreservedScalarString is the old name, as it was the first to be preserved on rt, + # use LiteralScalarString instead + 'PreservedScalarString', +] + + +class ScalarString(str): + __slots__ = Anchor.attrib + + def __new__(cls, *args: Any, **kw: Any) -> Any: + anchor = kw.pop('anchor', None) + ret_val = str.__new__(cls, *args, **kw) + if anchor is not None: + ret_val.yaml_set_anchor(anchor, always_dump=True) + return ret_val + + def replace(self, old: Any, new: Any, maxreplace: SupportsIndex = -1) -> Any: + return type(self)((str.replace(self, old, new, maxreplace))) + + @property + def anchor(self) -> Any: + if not hasattr(self, Anchor.attrib): + setattr(self, Anchor.attrib, Anchor()) + return getattr(self, Anchor.attrib) + + def yaml_anchor(self, any: bool = False) -> Any: + if not hasattr(self, Anchor.attrib): + return None + if any or self.anchor.always_dump: + return self.anchor + return None + + def yaml_set_anchor(self, value: Any, always_dump: bool = False) -> None: + self.anchor.value = value + self.anchor.always_dump = always_dump + + +class LiteralScalarString(ScalarString): + __slots__ = 'comment' # the comment after the | on the first line + + style = '|' + + def __new__(cls, value: Text, anchor: Any = None) -> Any: + return ScalarString.__new__(cls, value, anchor=anchor) + + +PreservedScalarString = LiteralScalarString + + +class FoldedScalarString(ScalarString): + __slots__ = ('fold_pos', 'comment') # the comment after the > on the first line + + style = '>' + + def __new__(cls, value: Text, anchor: Any = None) -> Any: + return ScalarString.__new__(cls, value, anchor=anchor) + + +class SingleQuotedScalarString(ScalarString): + __slots__ = () + + style = "'" + + def __new__(cls, value: Text, anchor: Any = None) -> Any: + return ScalarString.__new__(cls, value, anchor=anchor) + + +class DoubleQuotedScalarString(ScalarString): + __slots__ = () + + style = '"' + + def __new__(cls, value: Text, anchor: Any = None) -> Any: + return ScalarString.__new__(cls, value, anchor=anchor) + + +class PlainScalarString(ScalarString): + __slots__ = () + + style = '' + + def __new__(cls, value: Text, anchor: Any = None) -> Any: + return ScalarString.__new__(cls, value, anchor=anchor) + + +def preserve_literal(s: Text) -> Text: + return LiteralScalarString(s.replace('\r\n', '\n').replace('\r', '\n')) + + +def walk_tree(base: Any, map: Any = None) -> None: + """ + the routine here walks over a simple yaml tree (recursing in + dict values and list items) and converts strings that + have multiple lines to literal scalars + + You can also provide an explicit (ordered) mapping for multiple transforms + (first of which is executed): + map = ruamel.yaml.compat.ordereddict + map['\n'] = preserve_literal + map[':'] = SingleQuotedScalarString + walk_tree(data, map=map) + """ + from collections.abc import MutableMapping, MutableSequence + + if map is None: + map = {'\n': preserve_literal} + + if isinstance(base, MutableMapping): + for k in base: + v: Text = base[k] + if isinstance(v, str): + for ch in map: + if ch in v: + base[k] = map[ch](v) + break + else: + walk_tree(v, map=map) + elif isinstance(base, MutableSequence): + for idx, elem in enumerate(base): + if isinstance(elem, str): + for ch in map: + if ch in elem: + base[idx] = map[ch](elem) + break + else: + walk_tree(elem, map=map) diff --git a/scanner.py b/scanner.py new file mode 100644 index 0000000..65d9a77 --- /dev/null +++ b/scanner.py @@ -0,0 +1,2363 @@ +# coding: utf-8 + +# Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END +# DIRECTIVE(name, value) +# DOCUMENT-START +# DOCUMENT-END +# BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START +# BLOCK-END +# FLOW-SEQUENCE-START +# FLOW-MAPPING-START +# FLOW-SEQUENCE-END +# FLOW-MAPPING-END +# BLOCK-ENTRY +# FLOW-ENTRY +# KEY +# VALUE +# ALIAS(value) +# ANCHOR(value) +# TAG(value) +# SCALAR(value, plain, style) +# +# RoundTripScanner +# COMMENT(value) +# +# Read comments in the Scanner code for more details. +# + +import inspect +from ruamel.yaml.error import MarkedYAMLError, CommentMark # NOQA +from ruamel.yaml.tokens import * # NOQA +from ruamel.yaml.docinfo import Version, Tag # NOQA +from ruamel.yaml.compat import check_anchorname_char, nprint, nprintf # NOQA + +from typing import Any, Dict, Optional, List, Union, Text, Tuple # NOQA + +__all__ = ['Scanner', 'RoundTripScanner', 'ScannerError'] + + +_THE_END = '\n\0\r\x85\u2028\u2029' +_THE_END_SPACE_TAB = ' \n\0\t\r\x85\u2028\u2029' +_SPACE_TAB = ' \t' + + +def xprintf(*args: Any, **kw: Any) -> Any: + return nprintf(*args, **kw) + pass + + +class ScannerError(MarkedYAMLError): + pass + + +class SimpleKey: + # See below simple keys treatment. + + def __init__( + self, token_number: Any, required: Any, index: int, line: int, column: int, mark: Any, + ) -> None: + self.token_number = token_number + self.required = required + self.index = index + self.line = line + self.column = column + self.mark = mark + + +class Scanner: + def __init__(self, loader: Any = None) -> None: + """Initialize the scanner.""" + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. + # + # Reader supports the following methods + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer + + self.loader = loader + if self.loader is not None and getattr(self.loader, '_scanner', None) is None: + self.loader._scanner = self + self.reset_scanner() + self.first_time = False + + @property + def flow_level(self) -> int: + return len(self.flow_context) + + def reset_scanner(self) -> None: + # Had we reached the end of the stream? + self.done = False + + # flow_context is an expanding/shrinking list consisting of '{' and '[' + # for each unclosed flow context. If empty list that means block context + self.flow_context: List[Text] = [] + + # List of processed tokens that are not yet emitted. + self.tokens: List[Any] = [] + + # Add the STREAM-START token. + self.fetch_stream_start() + + # Number of tokens that were emitted through the `get_token` method. + self.tokens_taken = 0 + + # The current indentation level. + self.indent = -1 + + # Past indentation levels. + self.indents: List[int] = [] + + # Variables related to simple keys treatment. + + # A simple key is a key that is not denoted by the '?' indicator. + # Example of simple keys: + # --- + # block simple key: value + # ? not a simple key: + # : { flow simple key: value } + # We emit the KEY token before all keys, so when we find a potential + # simple key, we try to locate the corresponding ':' indicator. + # Simple keys should be limited to a single line and 1024 characters. + + # Can a simple key start at the current position? A simple key may + # start: + # - at the beginning of the line, not counting indentation spaces + # (in block context), + # - after '{', '[', ',' (in the flow context), + # - after '?', ':', '-' (in the block context). + # In the block context, this flag also signifies if a block collection + # may start at the current position. + self.allow_simple_key = True + + # Keep track of possible simple keys. This is a dictionary. The key + # is `flow_level`; there can be no more that one possible simple key + # for each level. The value is a SimpleKey record: + # (token_number, required, index, line, column, mark) + # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), + # '[', or '{' tokens. + self.possible_simple_keys: Dict[Any, Any] = {} + self.yaml_version: Any = None + self.tag_directives: List[Tuple[Any, Any]] = [] + + @property + def reader(self) -> Any: + try: + return self._scanner_reader # type: ignore + except AttributeError: + if hasattr(self.loader, 'typ'): + self._scanner_reader = self.loader.reader + else: + self._scanner_reader = self.loader._reader + return self._scanner_reader + + @property + def scanner_processing_version(self) -> Any: # prefix until un-composited + if hasattr(self.loader, 'typ'): + return self.loader.resolver.processing_version + return self.loader.processing_version + + # Public methods. + + def check_token(self, *choices: Any) -> bool: + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + if len(self.tokens) > 0: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self) -> Any: + # Return the next token, but do not delete if from the queue. + while self.need_more_tokens(): + self.fetch_more_tokens() + if len(self.tokens) > 0: + return self.tokens[0] + + def get_token(self) -> Any: + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if len(self.tokens) > 0: + self.tokens_taken += 1 + return self.tokens.pop(0) + + # Private methods. + + def need_more_tokens(self) -> bool: + if self.done: + return False + if len(self.tokens) == 0: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + return False + + def fetch_comment(self, comment: Any) -> None: + raise NotImplementedError + + def fetch_more_tokens(self) -> Any: + # Eat whitespaces and comments until we reach the next token. + comment = self.scan_to_next_token() + if comment is not None: # never happens for base scanner + return self.fetch_comment(comment) + # Remove obsolete possible simple keys. + self.stale_possible_simple_keys() + + # Compare the current indentation and column. It may add some tokens + # and decrease the current indentation level. + self.unwind_indent(self.reader.column) + + # Peek the next character. + ch = self.reader.peek() + + # Is it the end of stream? + if ch == '\0': + return self.fetch_stream_end() + + # Is it a directive? + if ch == '%' and self.check_directive(): + return self.fetch_directive() + + # Is it the document start? + if ch == '-' and self.check_document_start(): + return self.fetch_document_start() + + # Is it the document end? + if ch == '.' and self.check_document_end(): + return self.fetch_document_end() + + # TODO: support for BOM within a stream. + # if ch == '\uFEFF': + # return self.fetch_bom() <-- issue BOMToken + + # Note: the order of the following checks is NOT significant. + + # Is it the flow sequence start indicator? + if ch == '[': + return self.fetch_flow_sequence_start() + + # Is it the flow mapping start indicator? + if ch == '{': + return self.fetch_flow_mapping_start() + + # Is it the flow sequence end indicator? + if ch == ']': + return self.fetch_flow_sequence_end() + + # Is it the flow mapping end indicator? + if ch == '}': + return self.fetch_flow_mapping_end() + + # Is it the flow entry indicator? + if ch == ',': + return self.fetch_flow_entry() + + # Is it the block entry indicator? + if ch == '-' and self.check_block_entry(): + return self.fetch_block_entry() + + # Is it the key indicator? + if ch == '?' and self.check_key(): + return self.fetch_key() + + # Is it the value indicator? + if ch == ':' and self.check_value(): + return self.fetch_value() + + # Is it an alias? + if ch == '*': + return self.fetch_alias() + + # Is it an anchor? + if ch == '&': + return self.fetch_anchor() + + # Is it a tag? + if ch == '!': + return self.fetch_tag() + + # Is it a literal scalar? + if ch == '|' and not self.flow_level: + return self.fetch_literal() + + # Is it a folded scalar? + if ch == '>' and not self.flow_level: + return self.fetch_folded() + + # Is it a single quoted scalar? + if ch == "'": + return self.fetch_single() + + # Is it a double quoted scalar? + if ch == '"': + return self.fetch_double() + + # It must be a plain scalar then. + if self.check_plain(): + return self.fetch_plain() + + # No? It's an error. Let's produce a nice error message. + raise ScannerError( + 'while scanning for the next token', + None, + f'found character {ch!r} that cannot start any token', + self.reader.get_mark(), + ) + + # Simple keys treatment. + + def next_possible_simple_key(self) -> Any: + # Return the number of the nearest possible simple key. Actually we + # don't need to loop through the whole dictionary. We may replace it + # with the following code: + # if not self.possible_simple_keys: + # return None + # return self.possible_simple_keys[ + # min(self.possible_simple_keys.keys())].token_number + min_token_number = None + for level in self.possible_simple_keys: + key = self.possible_simple_keys[level] + if min_token_number is None or key.token_number < min_token_number: + min_token_number = key.token_number + return min_token_number + + def stale_possible_simple_keys(self) -> None: + # Remove entries that are no longer possible simple keys. According to + # the YAML specification, simple keys + # - should be limited to a single line, + # - should be no longer than 1024 characters. + # Disabling this procedure will allow simple keys of any length and + # height (may cause problems if indentation is broken though). + for level in list(self.possible_simple_keys): + key = self.possible_simple_keys[level] + if key.line != self.reader.line or self.reader.index - key.index > 1024: + if key.required: + raise ScannerError( + 'while scanning a simple key', + key.mark, + "could not find expected ':'", + self.reader.get_mark(), + ) + del self.possible_simple_keys[level] + + def save_possible_simple_key(self) -> None: + # The next token may start a simple key. We check if it's possible + # and save its position. This function is called for + # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + + # Check if a simple key is required at the current position. + required = not self.flow_level and self.indent == self.reader.column + + # The next token might be a simple key. Let's save it's number and + # position. + if self.allow_simple_key: + self.remove_possible_simple_key() + token_number = self.tokens_taken + len(self.tokens) + key = SimpleKey( + token_number, + required, + self.reader.index, + self.reader.line, + self.reader.column, + self.reader.get_mark(), + ) + self.possible_simple_keys[self.flow_level] = key + + def remove_possible_simple_key(self) -> None: + # Remove the saved possible key position at the current flow level. + if self.flow_level in self.possible_simple_keys: + key = self.possible_simple_keys[self.flow_level] + + if key.required: + raise ScannerError( + 'while scanning a simple key', + key.mark, + "could not find expected ':'", + self.reader.get_mark(), + ) + + del self.possible_simple_keys[self.flow_level] + + # Indentation functions. + + def unwind_indent(self, column: Any) -> None: + # In flow context, tokens should respect indentation. + # Actually the condition should be `self.indent >= column` according to + # the spec. But this condition will prohibit intuitively correct + # constructions such as + # key : { + # } + # #### + # if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid intendation or unclosed '[' or '{'", + # self.reader.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if bool(self.flow_level): + return + + # In block context, we may need to issue the BLOCK-END tokens. + while self.indent > column: + mark = self.reader.get_mark() + self.indent = self.indents.pop() + self.tokens.append(BlockEndToken(mark, mark)) + + def add_indent(self, column: int) -> bool: + # Check if we need to increase indentation. + if self.indent < column: + self.indents.append(self.indent) + self.indent = column + return True + return False + + # Fetchers. + + def fetch_stream_start(self) -> None: + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + # Read the token. + mark = self.reader.get_mark() + # Add STREAM-START. + self.tokens.append(StreamStartToken(mark, mark, encoding=self.reader.encoding)) + + def fetch_stream_end(self) -> None: + # Set the current intendation to -1. + self.unwind_indent(-1) + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + self.possible_simple_keys = {} + # Read the token. + mark = self.reader.get_mark() + # Add STREAM-END. + self.tokens.append(StreamEndToken(mark, mark)) + # The steam is finished. + self.done = True + + def fetch_directive(self) -> None: + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Scan and add DIRECTIVE. + self.tokens.append(self.scan_directive()) + + def fetch_document_start(self) -> None: + self.fetch_document_indicator(DocumentStartToken) + + def fetch_document_end(self) -> None: + self.fetch_document_indicator(DocumentEndToken) + + def fetch_document_indicator(self, TokenClass: Any) -> None: + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. Note that there could not be a block collection + # after '---'. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Add DOCUMENT-START or DOCUMENT-END. + start_mark = self.reader.get_mark() + self.reader.forward(3) + end_mark = self.reader.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_start(self) -> None: + self.fetch_flow_collection_start(FlowSequenceStartToken, to_push='[') + + def fetch_flow_mapping_start(self) -> None: + self.fetch_flow_collection_start(FlowMappingStartToken, to_push='{') + + def fetch_flow_collection_start(self, TokenClass: Any, to_push: Text) -> None: + # '[' and '{' may start a simple key. + self.save_possible_simple_key() + # Increase the flow level. + self.flow_context.append(to_push) + # Simple keys are allowed after '[' and '{'. + self.allow_simple_key = True + # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. + start_mark = self.reader.get_mark() + self.reader.forward() + end_mark = self.reader.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_end(self) -> None: + self.fetch_flow_collection_end(FlowSequenceEndToken) + + def fetch_flow_mapping_end(self) -> None: + self.fetch_flow_collection_end(FlowMappingEndToken) + + def fetch_flow_collection_end(self, TokenClass: Any) -> None: + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + # Decrease the flow level. + try: + popped = self.flow_context.pop() # NOQA + except IndexError: + # We must not be in a list or object. + # Defer error handling to the parser. + pass + # No simple keys after ']' or '}'. + self.allow_simple_key = False + # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. + start_mark = self.reader.get_mark() + self.reader.forward() + end_mark = self.reader.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_entry(self) -> None: + # Simple keys are allowed after ','. + self.allow_simple_key = True + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + # Add FLOW-ENTRY. + start_mark = self.reader.get_mark() + self.reader.forward() + end_mark = self.reader.get_mark() + self.tokens.append(FlowEntryToken(start_mark, end_mark)) + + def fetch_block_entry(self) -> None: + # Block context needs additional checks. + if not self.flow_level: + # Are we allowed to start a new entry? + if not self.allow_simple_key: + raise ScannerError( + None, + None, + 'sequence entries are not allowed here', + self.reader.get_mark(), + ) + # We may need to add BLOCK-SEQUENCE-START. + if self.add_indent(self.reader.column): + mark = self.reader.get_mark() + self.tokens.append(BlockSequenceStartToken(mark, mark)) + # It's an error for the block entry to occur in the flow context, + # but we let the parser detect this. + else: + pass + # Simple keys are allowed after '-'. + self.allow_simple_key = True + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add BLOCK-ENTRY. + start_mark = self.reader.get_mark() + self.reader.forward() + end_mark = self.reader.get_mark() + self.tokens.append(BlockEntryToken(start_mark, end_mark)) + + def fetch_key(self) -> None: + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a key (not nessesary a simple)? + if not self.allow_simple_key: + raise ScannerError( + None, None, 'mapping keys are not allowed here', self.reader.get_mark(), + ) + + # We may need to add BLOCK-MAPPING-START. + if self.add_indent(self.reader.column): + mark = self.reader.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after '?' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add KEY. + start_mark = self.reader.get_mark() + self.reader.forward() + end_mark = self.reader.get_mark() + self.tokens.append(KeyToken(start_mark, end_mark)) + + def fetch_value(self) -> None: + # Do we determine a simple key? + if self.flow_level in self.possible_simple_keys: + # Add KEY. + key = self.possible_simple_keys[self.flow_level] + del self.possible_simple_keys[self.flow_level] + self.tokens.insert( + key.token_number - self.tokens_taken, KeyToken(key.mark, key.mark), + ) + + # If this key starts a new block mapping, we need to add + # BLOCK-MAPPING-START. + if not self.flow_level: + if self.add_indent(key.column): + self.tokens.insert( + key.token_number - self.tokens_taken, + BlockMappingStartToken(key.mark, key.mark), + ) + + # There cannot be two simple keys one after another. + self.allow_simple_key = False + + # It must be a part of a complex key. + else: + + # Block context needs additional checks. + # (Do we really need them? They will be caught by the parser + # anyway.) + if not self.flow_level: + + # We are allowed to start a complex value if and only if + # we can start a simple key. + if not self.allow_simple_key: + raise ScannerError( + None, + None, + 'mapping values are not allowed here', + self.reader.get_mark(), + ) + + # If this value starts a new block mapping, we need to add + # BLOCK-MAPPING-START. It will be detected as an error later by + # the parser. + if not self.flow_level: + if self.add_indent(self.reader.column): + mark = self.reader.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after ':' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add VALUE. + start_mark = self.reader.get_mark() + self.reader.forward() + end_mark = self.reader.get_mark() + self.tokens.append(ValueToken(start_mark, end_mark)) + + def fetch_alias(self) -> None: + # ALIAS could be a simple key. + self.save_possible_simple_key() + # No simple keys after ALIAS. + self.allow_simple_key = False + # Scan and add ALIAS. + self.tokens.append(self.scan_anchor(AliasToken)) + + def fetch_anchor(self) -> None: + # ANCHOR could start a simple key. + self.save_possible_simple_key() + # No simple keys after ANCHOR. + self.allow_simple_key = False + # Scan and add ANCHOR. + self.tokens.append(self.scan_anchor(AnchorToken)) + + def fetch_tag(self) -> None: + # TAG could start a simple key. + self.save_possible_simple_key() + # No simple keys after TAG. + self.allow_simple_key = False + # Scan and add TAG. + self.tokens.append(self.scan_tag()) + + def fetch_literal(self) -> None: + self.fetch_block_scalar(style='|') + + def fetch_folded(self) -> None: + self.fetch_block_scalar(style='>') + + def fetch_block_scalar(self, style: Any) -> None: + # A simple key may follow a block scalar. + self.allow_simple_key = True + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + # Scan and add SCALAR. + self.tokens.append(self.scan_block_scalar(style)) + + def fetch_single(self) -> None: + self.fetch_flow_scalar(style="'") + + def fetch_double(self) -> None: + self.fetch_flow_scalar(style='"') + + def fetch_flow_scalar(self, style: Any) -> None: + # A flow scalar could be a simple key. + self.save_possible_simple_key() + # No simple keys after flow scalars. + self.allow_simple_key = False + # Scan and add SCALAR. + self.tokens.append(self.scan_flow_scalar(style)) + + def fetch_plain(self) -> None: + # A plain scalar could be a simple key. + self.save_possible_simple_key() + # No simple keys after plain scalars. But note that `scan_plain` will + # change this flag if the scan is finished at the beginning of the + # line. + self.allow_simple_key = False + # Scan and add SCALAR. May change `allow_simple_key`. + self.tokens.append(self.scan_plain()) + + # Checkers. + + def check_directive(self) -> Any: + # DIRECTIVE: ^ '%' ... + # The '%' indicator is already checked. + if self.reader.column == 0: + return True + return None + + def check_document_start(self) -> Any: + # DOCUMENT-START: ^ '---' (' '|'\n') + if self.reader.column == 0: + if self.reader.prefix(3) == '---' and self.reader.peek(3) in _THE_END_SPACE_TAB: + return True + return None + + def check_document_end(self) -> Any: + # DOCUMENT-END: ^ '...' (' '|'\n') + if self.reader.column == 0: + if self.reader.prefix(3) == '...' and self.reader.peek(3) in _THE_END_SPACE_TAB: + return True + return None + + def check_block_entry(self) -> Any: + # BLOCK-ENTRY: '-' (' '|'\n') + return self.reader.peek(1) in _THE_END_SPACE_TAB + + def check_key(self) -> Any: + # KEY(flow context): '?' + if bool(self.flow_level): + return True + # KEY(block context): '?' (' '|'\n') + return self.reader.peek(1) in _THE_END_SPACE_TAB + + def check_value(self) -> Any: + # VALUE(flow context): ':' + if self.scanner_processing_version == (1, 1): + if bool(self.flow_level): + return True + else: + if bool(self.flow_level): + if self.flow_context[-1] == '[': + if self.reader.peek(1) not in _THE_END_SPACE_TAB: + return False + elif self.tokens and isinstance(self.tokens[-1], ValueToken): + # mapping flow context scanning a value token + if self.reader.peek(1) not in _THE_END_SPACE_TAB: + return False + return True + # VALUE(block context): ':' (' '|'\n') + return self.reader.peek(1) in _THE_END_SPACE_TAB + + def check_plain(self) -> Any: + # A plain scalar may start with any non-space character except: + # '-', '?', ':', ',', '[', ']', '{', '}', + # '#', '&', '*', '!', '|', '>', '\'', '\"', + # '%', '@', '`'. + # + # It may also start with + # '-', '?', ':' + # if it is followed by a non-space character. + # + # Note that we limit the last rule to the block context (except the + # '-' character) because we want the flow context to be space + # independent. + srp = self.reader.peek + ch = srp() + if self.scanner_processing_version == (1, 1): + return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'"%@`' or ( + srp(1) not in _THE_END_SPACE_TAB + and (ch == '-' or (not self.flow_level and ch in '?:')) + ) + # YAML 1.2 + if ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'"%@`': + # ################### ^ ??? + return True + ch1 = srp(1) + if ch == '-' and ch1 not in _THE_END_SPACE_TAB: + return True + if ch == ':' and bool(self.flow_level) and ch1 not in _SPACE_TAB: + return True + + return srp(1) not in _THE_END_SPACE_TAB and ( + ch == '-' or (not self.flow_level and ch in '?:') + ) + + # Scanners. + + def scan_to_next_token(self) -> Any: + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if <TAB>: + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + srp = self.reader.peek + srf = self.reader.forward + if self.reader.index == 0 and srp() == '\uFEFF': + srf() + found = False + _the_end = _THE_END + white_space = ' \t' if self.flow_level > 0 else ' ' + while not found: + while srp() in white_space: + srf() + if srp() == '#': + while srp() not in _the_end: + srf() + if self.scan_line_break(): + if not self.flow_level: + self.allow_simple_key = True + else: + found = True + return None + + def scan_directive(self) -> Any: + # See the specification for details. + srp = self.reader.peek + srf = self.reader.forward + start_mark = self.reader.get_mark() + srf() + name = self.scan_directive_name(start_mark) + value = None + if name == 'YAML': + value = self.scan_yaml_directive_value(start_mark) + end_mark = self.reader.get_mark() + elif name == 'TAG': + value = self.scan_tag_directive_value(start_mark) + end_mark = self.reader.get_mark() + else: + end_mark = self.reader.get_mark() + while srp() not in _THE_END: + srf() + self.scan_directive_ignored_line(start_mark) + return DirectiveToken(name, value, start_mark, end_mark) + + def scan_directive_name(self, start_mark: Any) -> Any: + # See the specification for details. + length = 0 + srp = self.reader.peek + ch = srp(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_:.': + length += 1 + ch = srp(length) + if not length: + raise ScannerError( + 'while scanning a directive', + start_mark, + f'expected alphabetic or numeric character, but found {ch!r}', + self.reader.get_mark(), + ) + value = self.reader.prefix(length) + self.reader.forward(length) + ch = srp() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError( + 'while scanning a directive', + start_mark, + f'expected alphabetic or numeric character, but found {ch!r}', + self.reader.get_mark(), + ) + return value + + def scan_yaml_directive_value(self, start_mark: Any) -> Any: + # See the specification for details. + srp = self.reader.peek + srf = self.reader.forward + while srp() == ' ': + srf() + major = self.scan_yaml_directive_number(start_mark) + if srp() != '.': + raise ScannerError( + 'while scanning a directive', + start_mark, + f"expected a digit or '.', but found {srp()!r}", + self.reader.get_mark(), + ) + srf() + minor = self.scan_yaml_directive_number(start_mark) + if srp() not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError( + 'while scanning a directive', + start_mark, + f"expected a digit or '.', but found {srp()!r}", + self.reader.get_mark(), + ) + self.yaml_version = (major, minor) + self.loader.doc_infos[-1].doc_version = Version(major, minor) + return self.yaml_version + + def scan_yaml_directive_number(self, start_mark: Any) -> Any: + # See the specification for details. + srp = self.reader.peek + srf = self.reader.forward + ch = srp() + if not ('0' <= ch <= '9'): + raise ScannerError( + 'while scanning a directive', + start_mark, + f'expected a digit, but found {ch!r}', + self.reader.get_mark(), + ) + length = 0 + while '0' <= srp(length) <= '9': + length += 1 + value = int(self.reader.prefix(length)) + srf(length) + return value + + def scan_tag_directive_value(self, start_mark: Any) -> Any: + # See the specification for details. + srp = self.reader.peek + srf = self.reader.forward + while srp() == ' ': + srf() + handle = self.scan_tag_directive_handle(start_mark) + while srp() == ' ': + srf() + prefix = self.scan_tag_directive_prefix(start_mark) + ret_val = (handle, prefix) + self.tag_directives.append(ret_val) + return ret_val + + def scan_tag_directive_handle(self, start_mark: Any) -> Any: + # See the specification for details. + value = self.scan_tag_handle('directive', start_mark) + ch = self.reader.peek() + if ch != ' ': + raise ScannerError( + 'while scanning a directive', + start_mark, + f"expected ' ', but found {ch!r}", + self.reader.get_mark(), + ) + return value + + def scan_tag_directive_prefix(self, start_mark: Any) -> Any: + # See the specification for details. + value = self.scan_tag_uri('directive', start_mark) + ch = self.reader.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError( + 'while scanning a directive', + start_mark, + f"expected ' ', but found {ch!r}", + self.reader.get_mark(), + ) + return value + + def scan_directive_ignored_line(self, start_mark: Any) -> None: + # See the specification for details. + srp = self.reader.peek + srf = self.reader.forward + while srp() == ' ': + srf() + if srp() == '#': + while srp() not in _THE_END: + srf() + ch = srp() + if ch not in _THE_END: + raise ScannerError( + 'while scanning a directive', + start_mark, + f'expected a comment or a line break, but found {ch!r}', + self.reader.get_mark(), + ) + self.scan_line_break() + + def scan_anchor(self, TokenClass: Any) -> Any: + # The specification does not restrict characters for anchors and + # aliases. This may lead to problems, for instance, the document: + # [ *alias, value ] + # can be interpteted in two ways, as + # [ "value" ] + # and + # [ *alias , "value" ] + # Therefore we restrict aliases to numbers and ASCII letters. + srp = self.reader.peek + start_mark = self.reader.get_mark() + indicator = srp() + if indicator == '*': + name = 'alias' + else: + name = 'anchor' + self.reader.forward() + length = 0 + ch = srp(length) + # while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + # or ch in '-_': + while check_anchorname_char(ch): + length += 1 + ch = srp(length) + if not length: + raise ScannerError( + f'while scanning an {name!s}', + start_mark, + f'expected alphabetic or numeric character, but found {ch!r}', + self.reader.get_mark(), + ) + value = self.reader.prefix(length) + self.reader.forward(length) + # ch1 = ch + # ch = srp() # no need to peek, ch is already set + # assert ch1 == ch + if ch not in '\0 \t\r\n\x85\u2028\u2029?:,[]{}%@`': + raise ScannerError( + f'while scanning an {name!s}', + start_mark, + f'expected alphabetic or numeric character, but found {ch!r}', + self.reader.get_mark(), + ) + end_mark = self.reader.get_mark() + return TokenClass(value, start_mark, end_mark) + + def scan_tag(self) -> Any: + # See the specification for details. + srp = self.reader.peek + start_mark = self.reader.get_mark() + ch = srp(1) + short_handle = '!' + if ch == '!': + short_handle = '!!' + self.reader.forward() + srp = self.reader.peek + ch = srp(1) + + if ch == '<': + handle = None + self.reader.forward(2) + suffix = self.scan_tag_uri('tag', start_mark) + if srp() != '>': + raise ScannerError( + 'while parsing a tag', + start_mark, + f"expected '>' but found {srp()!r}", + self.reader.get_mark(), + ) + self.reader.forward() + elif ch in _THE_END_SPACE_TAB: + handle = None + suffix = short_handle + self.reader.forward() + else: + length = 1 + use_handle = False + while ch not in '\0 \r\n\x85\u2028\u2029': + if ch == '!': + use_handle = True + break + length += 1 + ch = srp(length) + handle = short_handle + if use_handle: + handle = self.scan_tag_handle('tag', start_mark) + else: + handle = short_handle + self.reader.forward() + suffix = self.scan_tag_uri('tag', start_mark) + ch = srp() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError( + 'while scanning a tag', + start_mark, + f"expected ' ', but found {ch!r}", + self.reader.get_mark(), + ) + value = (handle, suffix) + end_mark = self.reader.get_mark() + return TagToken(value, start_mark, end_mark) + + def scan_block_scalar(self, style: Any, rt: Optional[bool] = False) -> Any: + # See the specification for details. + srp = self.reader.peek + if style == '>': + folded = True + else: + folded = False + + chunks: List[Any] = [] + start_mark = self.reader.get_mark() + + # Scan the header. + self.reader.forward() + chomping, increment = self.scan_block_scalar_indicators(start_mark) + # block scalar comment e.g. : |+ # comment text + block_scalar_comment = self.scan_block_scalar_ignored_line(start_mark) + + # Determine the indentation level and go to the first non-empty line. + min_indent = self.indent + 1 + if increment is None: + # no increment and top level, min_indent could be 0 + if min_indent < 1 and ( + style not in '|>' + or (self.scanner_processing_version == (1, 1)) + and getattr( + self.loader, 'top_level_block_style_scalar_no_indent_error_1_1', False, + ) + ): + min_indent = 1 + breaks, max_indent, end_mark = self.scan_block_scalar_indentation() + indent = max(min_indent, max_indent) + else: + if min_indent < 1: + min_indent = 1 + indent = min_indent + increment - 1 + breaks, end_mark = self.scan_block_scalar_breaks(indent) + line_break = "" + + # Scan the inner part of the block scalar. + while self.reader.column == indent and srp() != '\0': + chunks.extend(breaks) + leading_non_space = srp() not in ' \t' + length = 0 + while srp(length) not in _THE_END: + length += 1 + chunks.append(self.reader.prefix(length)) + self.reader.forward(length) + line_break = self.scan_line_break() + breaks, end_mark = self.scan_block_scalar_breaks(indent) + if style in '|>' and min_indent == 0: + # at the beginning of a line, if in block style see if + # end of document/start_new_document + if self.check_document_start() or self.check_document_end(): + break + if self.reader.column == indent and srp() != '\0': + + # Unfortunately, folding rules are ambiguous. + # + # This is the folding according to the specification: + + if rt and folded and line_break == '\n': + chunks.append('\a') + if folded and line_break == '\n' and leading_non_space and srp() not in ' \t': + if not breaks: + chunks.append(' ') + else: + chunks.append(line_break) + + # This is Clark Evans's interpretation (also in the spec + # examples): + # + # if folded and line_break == '\n': + # if not breaks: + # if srp() not in ' \t': + # chunks.append(' ') + # else: + # chunks.append(line_break) + # else: + # chunks.append(line_break) + else: + break + + # Process trailing line breaks. The 'chomping' setting determines + # whether they are included in the value. + trailing: List[Any] = [] + if chomping in [None, True]: + chunks.append(line_break) + if chomping is True: + chunks.extend(breaks) + elif chomping in [None, False]: + trailing.extend(breaks) + + # We are done. + token = ScalarToken("".join(chunks), False, start_mark, end_mark, style) + if self.loader is not None: + comment_handler = getattr(self.loader, 'comment_handling', False) + if comment_handler is None: + if block_scalar_comment is not None: + token.add_pre_comments([block_scalar_comment]) + if len(trailing) > 0: + # Eat whitespaces and comments until we reach the next token. + if self.loader is not None: + comment_handler = getattr(self.loader, 'comment_handling', None) + if comment_handler is not None: + line = end_mark.line - len(trailing) + for x in trailing: + assert x[-1] == '\n' + self.comments.add_blank_line(x, 0, line) # type: ignore + line += 1 + comment = self.scan_to_next_token() + while comment: + trailing.append(' ' * comment[1].column + comment[0]) + comment = self.scan_to_next_token() + if self.loader is not None: + comment_handler = getattr(self.loader, 'comment_handling', False) + if comment_handler is None: + # Keep track of the trailing whitespace and following comments + # as a comment token, if isn't all included in the actual value. + comment_end_mark = self.reader.get_mark() + comment = CommentToken("".join(trailing), end_mark, comment_end_mark) + token.add_post_comment(comment) + return token + + def scan_block_scalar_indicators(self, start_mark: Any) -> Any: + # See the specification for details. + srp = self.reader.peek + chomping = None + increment = None + ch = srp() + if ch in '+-': + if ch == '+': + chomping = True + else: + chomping = False + self.reader.forward() + ch = srp() + if ch in '0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError( + 'while scanning a block scalar', + start_mark, + 'expected indentation indicator in the range 1-9, ' 'but found 0', + self.reader.get_mark(), + ) + self.reader.forward() + elif ch in '0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError( + 'while scanning a block scalar', + start_mark, + 'expected indentation indicator in the range 1-9, ' 'but found 0', + self.reader.get_mark(), + ) + self.reader.forward() + ch = srp() + if ch in '+-': + if ch == '+': + chomping = True + else: + chomping = False + self.reader.forward() + ch = srp() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError( + 'while scanning a block scalar', + start_mark, + f'expected chomping or indentation indicators, but found {ch!r}', + self.reader.get_mark(), + ) + return chomping, increment + + def scan_block_scalar_ignored_line(self, start_mark: Any) -> Any: + # See the specification for details. + srp = self.reader.peek + srf = self.reader.forward + prefix = '' + comment = None + while srp() == ' ': + prefix += srp() + srf() + if srp() == '#': + comment = prefix + while srp() not in _THE_END: + comment += srp() + srf() + ch = srp() + if ch not in _THE_END: + raise ScannerError( + 'while scanning a block scalar', + start_mark, + f'expected a comment or a line break, but found {ch!r}', + self.reader.get_mark(), + ) + self.scan_line_break() + return comment + + def scan_block_scalar_indentation(self) -> Any: + # See the specification for details. + srp = self.reader.peek + srf = self.reader.forward + chunks = [] + first_indent = -1 + max_indent = 0 + end_mark = self.reader.get_mark() + while srp() in ' \r\n\x85\u2028\u2029': + if srp() != ' ': + if first_indent < 0: + first_indent = self.reader.column + chunks.append(self.scan_line_break()) + end_mark = self.reader.get_mark() + else: + srf() + if self.reader.column > max_indent: + max_indent = self.reader.column + if first_indent > 0 and max_indent > first_indent: + start_mark = self.reader.get_mark() + raise ScannerError( + 'more indented follow up line than first in a block scalar', start_mark, + ) + return chunks, max_indent, end_mark + + def scan_block_scalar_breaks(self, indent: int) -> Any: + # See the specification for details. + chunks = [] + srp = self.reader.peek + srf = self.reader.forward + end_mark = self.reader.get_mark() + while self.reader.column < indent and srp() == ' ': + srf() + while srp() in '\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + end_mark = self.reader.get_mark() + while self.reader.column < indent and srp() == ' ': + srf() + return chunks, end_mark + + def scan_flow_scalar(self, style: Any) -> Any: + # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. + if style == '"': + double = True + else: + double = False + srp = self.reader.peek + chunks: List[Any] = [] + start_mark = self.reader.get_mark() + quote = srp() + self.reader.forward() + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + while srp() != quote: + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + self.reader.forward() + end_mark = self.reader.get_mark() + return ScalarToken("".join(chunks), False, start_mark, end_mark, style) + + ESCAPE_REPLACEMENTS = { + '0': '\0', + 'a': '\x07', + 'b': '\x08', + 't': '\x09', + '\t': '\x09', + 'n': '\x0A', + 'v': '\x0B', + 'f': '\x0C', + 'r': '\x0D', + 'e': '\x1B', + ' ': '\x20', + '"': '"', + '/': '/', # as per http://www.json.org/ + '\\': '\\', + 'N': '\x85', + '_': '\xA0', + 'L': '\u2028', + 'P': '\u2029', + } + + ESCAPE_CODES = {'x': 2, 'u': 4, 'U': 8} + + def scan_flow_scalar_non_spaces(self, double: Any, start_mark: Any) -> Any: + # See the specification for details. + chunks: List[Any] = [] + srp = self.reader.peek + srf = self.reader.forward + while True: + length = 0 + while srp(length) not in ' \n\'"\\\0\t\r\x85\u2028\u2029': + length += 1 + if length != 0: + chunks.append(self.reader.prefix(length)) + srf(length) + ch = srp() + if not double and ch == "'" and srp(1) == "'": + chunks.append("'") + srf(2) + elif (double and ch == "'") or (not double and ch in '"\\'): + chunks.append(ch) + srf() + elif double and ch == '\\': + srf() + ch = srp() + if ch in self.ESCAPE_REPLACEMENTS: + chunks.append(self.ESCAPE_REPLACEMENTS[ch]) + srf() + elif ch in self.ESCAPE_CODES: + length = self.ESCAPE_CODES[ch] + srf() + for k in range(length): + if srp(k) not in '0123456789ABCDEFabcdef': + raise ScannerError( + 'while scanning a double-quoted scalar', + start_mark, + f'expected escape sequence of {length:d} ' + f'hexdecimal numbers, but found {srp(k)!r}', + self.reader.get_mark(), + ) + code = int(self.reader.prefix(length), 16) + chunks.append(chr(code)) + srf(length) + elif ch in '\n\r\x85\u2028\u2029': + self.scan_line_break() + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) + else: + raise ScannerError( + 'while scanning a double-quoted scalar', + start_mark, + f'found unknown escape character {ch!r}', + self.reader.get_mark(), + ) + else: + return chunks + + def scan_flow_scalar_spaces(self, double: Any, start_mark: Any) -> Any: + # See the specification for details. + srp = self.reader.peek + chunks = [] + length = 0 + while srp(length) in ' \t': + length += 1 + whitespaces = self.reader.prefix(length) + self.reader.forward(length) + ch = srp() + if ch == '\0': + raise ScannerError( + 'while scanning a quoted scalar', + start_mark, + 'found unexpected end of stream', + self.reader.get_mark(), + ) + elif ch in '\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + breaks = self.scan_flow_scalar_breaks(double, start_mark) + if line_break != '\n': + chunks.append(line_break) + elif not breaks: + chunks.append(' ') + chunks.extend(breaks) + else: + chunks.append(whitespaces) + return chunks + + def scan_flow_scalar_breaks(self, double: Any, start_mark: Any) -> Any: + # See the specification for details. + chunks: List[Any] = [] + srp = self.reader.peek + srf = self.reader.forward + while True: + # Instead of checking indentation, we check for document + # separators. + prefix = self.reader.prefix(3) + if (prefix == '---' or prefix == '...') and srp(3) in _THE_END_SPACE_TAB: + raise ScannerError( + 'while scanning a quoted scalar', + start_mark, + 'found unexpected document separator', + self.reader.get_mark(), + ) + while srp() in ' \t': + srf() + if srp() in '\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + else: + return chunks + + def scan_plain(self) -> Any: + # See the specification for details. + # We add an additional restriction for the flow context: + # plain scalars in the flow context cannot contain ',', ': ' and '?'. + # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. + srp = self.reader.peek + srf = self.reader.forward + chunks: List[Any] = [] + start_mark = self.reader.get_mark() + end_mark = start_mark + indent = self.indent + 1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + # if indent == 0: + # indent = 1 + spaces: List[Any] = [] + while True: + length = 0 + if srp() == '#': + break + while True: + ch = srp(length) + if False and ch == ':' and srp(length + 1) == ',': + break + elif ch == ':' and srp(length + 1) not in _THE_END_SPACE_TAB: + pass + elif ch == '?' and self.scanner_processing_version != (1, 1): + pass + elif ( + ch in _THE_END_SPACE_TAB + or ( + not self.flow_level + and ch == ':' + and srp(length + 1) in _THE_END_SPACE_TAB + ) + or (self.flow_level and ch in ',:?[]{}') + ): + break + length += 1 + # It's not clear what we should do with ':' in the flow context. + if ( + self.flow_level + and ch == ':' + and srp(length + 1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}' + ): + srf(length) + raise ScannerError( + 'while scanning a plain scalar', + start_mark, + "found unexpected ':'", + self.reader.get_mark(), + 'Please check ' + 'http://pyyaml.org/wiki/YAMLColonInFlowContext ' + 'for details.', + ) + if length == 0: + break + self.allow_simple_key = False + chunks.extend(spaces) + chunks.append(self.reader.prefix(length)) + srf(length) + end_mark = self.reader.get_mark() + spaces = self.scan_plain_spaces(indent, start_mark) + if ( + not spaces + or srp() == '#' + or (not self.flow_level and self.reader.column < indent) + ): + break + + token = ScalarToken("".join(chunks), True, start_mark, end_mark) + # getattr provides True so C type loader, which cannot handle comment, + # will not make CommentToken + if self.loader is not None: + comment_handler = getattr(self.loader, 'comment_handling', False) + if comment_handler is None: + if spaces and spaces[0] == '\n': + # Create a comment token to preserve the trailing line breaks. + comment = CommentToken("".join(spaces) + '\n', start_mark, end_mark) + token.add_post_comment(comment) + elif comment_handler is not False: + line = start_mark.line + 1 + for ch in spaces: + if ch == '\n': + self.comments.add_blank_line('\n', 0, line) # type: ignore + line += 1 + + return token + + def scan_plain_spaces(self, indent: Any, start_mark: Any) -> Any: + # See the specification for details. + # The specification is really confusing about tabs in plain scalars. + # We just forbid them completely. Do not use tabs in YAML! + srp = self.reader.peek + srf = self.reader.forward + chunks = [] + length = 0 + while srp(length) in ' ': + length += 1 + whitespaces = self.reader.prefix(length) + self.reader.forward(length) + ch = srp() + if ch in '\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + self.allow_simple_key = True + prefix = self.reader.prefix(3) + if (prefix == '---' or prefix == '...') and srp(3) in _THE_END_SPACE_TAB: + return + breaks = [] + while srp() in ' \r\n\x85\u2028\u2029': + if srp() == ' ': + srf() + else: + breaks.append(self.scan_line_break()) + prefix = self.reader.prefix(3) + if (prefix == '---' or prefix == '...') and srp(3) in _THE_END_SPACE_TAB: + return + if line_break != '\n': + chunks.append(line_break) + elif not breaks: + chunks.append(' ') + chunks.extend(breaks) + elif whitespaces: + chunks.append(whitespaces) + return chunks + + def scan_tag_handle(self, name: Any, start_mark: Any) -> Any: + # See the specification for details. + # For some strange reasons, the specification does not allow '_' in + # tag handles. I have allowed it anyway. + srp = self.reader.peek + ch = srp() + if ch != '!': + raise ScannerError( + f'while scanning an {name!s}', + start_mark, + f"expected '!', but found {ch!r}", + self.reader.get_mark(), + ) + length = 1 + ch = srp(length) + if ch != ' ': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_': + length += 1 + ch = srp(length) + if ch != '!': + self.reader.forward(length) + raise ScannerError( + f'while scanning an {name!s}', + start_mark, + f"expected '!' but found {ch!r}", + self.reader.get_mark(), + ) + length += 1 + value = self.reader.prefix(length) + self.reader.forward(length) + return value + + def scan_tag_uri(self, name: Any, start_mark: Any) -> Any: + # See the specification for details. + # Note: we do not check if URI is well-formed. + srp = self.reader.peek + chunks = [] + length = 0 + ch = srp(length) + while ( + '0' <= ch <= '9' + or 'A' <= ch <= 'Z' + or 'a' <= ch <= 'z' + or ch in "-;/?:@&=+$,_.!~*'()[]%" + or ((self.scanner_processing_version > (1, 1)) and ch == '#') + ): + if ch == '%': + chunks.append(self.reader.prefix(length)) + self.reader.forward(length) + length = 0 + chunks.append(self.scan_uri_escapes(name, start_mark)) + else: + length += 1 + ch = srp(length) + if length != 0: + chunks.append(self.reader.prefix(length)) + self.reader.forward(length) + length = 0 + if not chunks: + raise ScannerError( + f'while parsing an {name!s}', + start_mark, + f'expected URI, but found {ch!r}', + self.reader.get_mark(), + ) + return "".join(chunks) + + def scan_uri_escapes(self, name: Any, start_mark: Any) -> Any: + # See the specification for details. + srp = self.reader.peek + srf = self.reader.forward + code_bytes: List[Any] = [] + mark = self.reader.get_mark() + while srp() == '%': + srf() + for k in range(2): + if srp(k) not in '0123456789ABCDEFabcdef': + raise ScannerError( + f'while scanning an {name!s}', + start_mark, + f'expected URI escape sequence of 2 hexdecimal numbers, ' + f'but found {srp(k)!r}', + self.reader.get_mark(), + ) + code_bytes.append(int(self.reader.prefix(2), 16)) + srf(2) + try: + value = bytes(code_bytes).decode('utf-8') + except UnicodeDecodeError as exc: + raise ScannerError(f'while scanning an {name!s}', start_mark, str(exc), mark) + return value + + def scan_line_break(self) -> Any: + # Transforms: + # '\r\n' : '\n' + # '\r' : '\n' + # '\n' : '\n' + # '\x85' : '\n' + # '\u2028' : '\u2028' + # '\u2029 : '\u2029' + # default : '' + ch = self.reader.peek() + if ch in '\r\n\x85': + if self.reader.prefix(2) == '\r\n': + self.reader.forward(2) + else: + self.reader.forward() + return '\n' + elif ch in '\u2028\u2029': + self.reader.forward() + return ch + return "" + + +class RoundTripScanner(Scanner): + def check_token(self, *choices: Any) -> bool: + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + self._gather_comments() + if len(self.tokens) > 0: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self) -> Any: + # Return the next token, but do not delete if from the queue. + while self.need_more_tokens(): + self.fetch_more_tokens() + self._gather_comments() + if len(self.tokens) > 0: + return self.tokens[0] + return None + + def _gather_comments(self) -> Any: + """combine multiple comment lines and assign to next non-comment-token""" + comments: List[Any] = [] + if not self.tokens: + return comments + if isinstance(self.tokens[0], CommentToken): + comment = self.tokens.pop(0) + self.tokens_taken += 1 + comments.append(comment) + while self.need_more_tokens(): + self.fetch_more_tokens() + if not self.tokens: + return comments + if isinstance(self.tokens[0], CommentToken): + self.tokens_taken += 1 + comment = self.tokens.pop(0) + # nprint('dropping2', comment) + comments.append(comment) + if len(comments) >= 1: + self.tokens[0].add_pre_comments(comments) + # pull in post comment on e.g. ':' + if not self.done and len(self.tokens) < 2: + self.fetch_more_tokens() + + def get_token(self) -> Any: + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + self._gather_comments() + if len(self.tokens) > 0: + # nprint('tk', self.tokens) + # only add post comment to single line tokens: + # scalar, value token. FlowXEndToken, otherwise + # hidden streamtokens could get them (leave them and they will be + # pre comments for the next map/seq + if ( + len(self.tokens) > 1 + and isinstance( + self.tokens[0], + (ScalarToken, ValueToken, FlowSequenceEndToken, FlowMappingEndToken), + ) + and isinstance(self.tokens[1], CommentToken) + and self.tokens[0].end_mark.line == self.tokens[1].start_mark.line + ): + self.tokens_taken += 1 + c = self.tokens.pop(1) + self.fetch_more_tokens() + while len(self.tokens) > 1 and isinstance(self.tokens[1], CommentToken): + self.tokens_taken += 1 + c1 = self.tokens.pop(1) + c.value = c.value + (' ' * c1.start_mark.column) + c1.value + self.fetch_more_tokens() + self.tokens[0].add_post_comment(c) + elif ( + len(self.tokens) > 1 + and isinstance(self.tokens[0], ScalarToken) + and isinstance(self.tokens[1], CommentToken) + and self.tokens[0].end_mark.line != self.tokens[1].start_mark.line + ): + self.tokens_taken += 1 + c = self.tokens.pop(1) + c.value = ( + '\n' * (c.start_mark.line - self.tokens[0].end_mark.line) + + (' ' * c.start_mark.column) + + c.value + ) + self.tokens[0].add_post_comment(c) + self.fetch_more_tokens() + while len(self.tokens) > 1 and isinstance(self.tokens[1], CommentToken): + self.tokens_taken += 1 + c1 = self.tokens.pop(1) + c.value = c.value + (' ' * c1.start_mark.column) + c1.value + self.fetch_more_tokens() + self.tokens_taken += 1 + return self.tokens.pop(0) + return None + + def fetch_comment(self, comment: Any) -> None: + value, start_mark, end_mark = comment + while value and value[-1] == ' ': + # empty line within indented key context + # no need to update end-mark, that is not used + value = value[:-1] + self.tokens.append(CommentToken(value, start_mark, end_mark)) + + # scanner + + def scan_to_next_token(self) -> Any: + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if <TAB>: + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + srp = self.reader.peek + srf = self.reader.forward + if self.reader.index == 0 and srp() == '\uFEFF': + srf() + found = False + white_space = ' \t' if self.flow_level > 0 else ' ' + while not found: + while srp() in white_space: + srf() + ch = srp() + if ch == '#': + start_mark = self.reader.get_mark() + comment = ch + srf() + while ch not in _THE_END: + ch = srp() + if ch == '\0': # don't gobble the end-of-stream character + # but add an explicit newline as "YAML processors should terminate + # the stream with an explicit line break + # https://yaml.org/spec/1.2/spec.html#id2780069 + comment += '\n' + break + comment += ch + srf() + # gather any blank lines following the comment + ch = self.scan_line_break() + while len(ch) > 0: + comment += ch + ch = self.scan_line_break() + end_mark = self.reader.get_mark() + if not self.flow_level: + self.allow_simple_key = True + return comment, start_mark, end_mark + if self.scan_line_break() != '': + start_mark = self.reader.get_mark() + if not self.flow_level: + self.allow_simple_key = True + ch = srp() + if ch == '\n': # empty toplevel lines + start_mark = self.reader.get_mark() + comment = "" + while ch: + ch = self.scan_line_break(empty_line=True) + comment += ch + if srp() == '#': + # empty line followed by indented real comment + comment = comment.rsplit('\n', 1)[0] + '\n' + end_mark = self.reader.get_mark() + return comment, start_mark, end_mark + else: + found = True + return None + + def scan_line_break(self, empty_line: bool = False) -> Text: + # Transforms: + # '\r\n' : '\n' + # '\r' : '\n' + # '\n' : '\n' + # '\x85' : '\n' + # '\u2028' : '\u2028' + # '\u2029 : '\u2029' + # default : '' + ch: Text = self.reader.peek() + if ch in '\r\n\x85': + if self.reader.prefix(2) == '\r\n': + self.reader.forward(2) + else: + self.reader.forward() + return '\n' + elif ch in '\u2028\u2029': + self.reader.forward() + return ch + elif empty_line and ch in '\t ': + self.reader.forward() + return ch + return "" + + def scan_block_scalar(self, style: Any, rt: Optional[bool] = True) -> Any: + return Scanner.scan_block_scalar(self, style, rt=rt) + + def scan_uri_escapes(self, name: Any, start_mark: Any) -> Any: + """ + The roundtripscanner doesn't do URI escaping + """ + # See the specification for details. + srp = self.reader.peek + srf = self.reader.forward + code_bytes: List[Any] = [] + chunk = '' + mark = self.reader.get_mark() + while srp() == '%': + chunk += '%' + srf() + for k in range(2): + if srp(k) not in '0123456789ABCDEFabcdef': + raise ScannerError( + f'while scanning an {name!s}', + start_mark, + f'expected URI escape sequence of 2 hexdecimal numbers, ' + f'but found {srp(k)!r}', + self.reader.get_mark(), + ) + code_bytes.append(int(self.reader.prefix(2), 16)) + chunk += self.reader.prefix(2) + srf(2) + try: + _ = bytes(code_bytes).decode('utf-8') + except UnicodeDecodeError as exc: + raise ScannerError(f'while scanning an {name!s}', start_mark, str(exc), mark) + return chunk + + +# commenthandling 2021, differentiatiation not needed + +VALUECMNT = 0 +KEYCMNT = 0 # 1 +# TAGCMNT = 2 +# ANCHORCMNT = 3 + + +class CommentBase: + __slots__ = ('value', 'line', 'column', 'used', 'function', 'fline', 'ufun', 'uline') + + def __init__(self, value: Any, line: Any, column: Any) -> None: + self.value = value + self.line = line + self.column = column + self.used = ' ' + info = inspect.getframeinfo(inspect.stack()[3][0]) + self.function = info.function + self.fline = info.lineno + self.ufun = None + self.uline = None + + def set_used(self, v: Any = '+') -> None: + self.used = v + info = inspect.getframeinfo(inspect.stack()[1][0]) + self.ufun = info.function # type: ignore + self.uline = info.lineno # type: ignore + + def set_assigned(self) -> None: + self.used = '|' + + def __str__(self) -> str: + return f'{self.value}' + + def __repr__(self) -> str: + return f'{self.value!r}' + + def info(self) -> str: + xv = self.value + '"' + name = self.name # type: ignore + return ( + f'{name}{self.used} {self.line:2}:{self.column:<2} "{xv:40s} ' + f'{self.function}:{self.fline} {self.ufun}:{self.uline}' + ) + + +class EOLComment(CommentBase): + name = 'EOLC' + + def __init__(self, value: Any, line: Any, column: Any) -> None: + super().__init__(value, line, column) + + +class FullLineComment(CommentBase): + name = 'FULL' + + def __init__(self, value: Any, line: Any, column: Any) -> None: + super().__init__(value, line, column) + + +class BlankLineComment(CommentBase): + name = 'BLNK' + + def __init__(self, value: Any, line: Any, column: Any) -> None: + super().__init__(value, line, column) + + +class ScannedComments: + def __init__(self: Any) -> None: + self.comments = {} # type: ignore + self.unused = [] # type: ignore + + def add_eol_comment(self, comment: Any, column: Any, line: Any) -> Any: + # info = inspect.getframeinfo(inspect.stack()[1][0]) + if comment.count('\n') == 1: + assert comment[-1] == '\n' + else: + assert '\n' not in comment + self.comments[line] = retval = EOLComment(comment[:-1], line, column) + self.unused.append(line) + return retval + + def add_blank_line(self, comment: Any, column: Any, line: Any) -> Any: + # info = inspect.getframeinfo(inspect.stack()[1][0]) + assert comment.count('\n') == 1 and comment[-1] == '\n' + assert line not in self.comments + self.comments[line] = retval = BlankLineComment(comment[:-1], line, column) + self.unused.append(line) + return retval + + def add_full_line_comment(self, comment: Any, column: Any, line: Any) -> Any: + # info = inspect.getframeinfo(inspect.stack()[1][0]) + assert comment.count('\n') == 1 and comment[-1] == '\n' + # if comment.startswith('# C12'): + # raise + # this raises in line 2127 fro 330 + self.comments[line] = retval = FullLineComment(comment[:-1], line, column) + self.unused.append(line) + return retval + + def __getitem__(self, idx: Any) -> Any: + return self.comments[idx] + + def __str__(self) -> Any: + return ( + 'ParsedComments:\n ' + + '\n '.join((f'{lineno:2} {x.info()}' for lineno, x in self.comments.items())) + + '\n' + ) + + def last(self) -> str: + lineno, x = list(self.comments.items())[-1] + return f'{lineno:2} {x.info()}\n' + + def any_unprocessed(self) -> bool: + # ToDo: might want to differentiate based on lineno + return len(self.unused) > 0 + # for lno, comment in reversed(self.comments.items()): + # if comment.used == ' ': + # return True + # return False + + def unprocessed(self, use: Any = False) -> Any: + while len(self.unused) > 0: + first = self.unused.pop(0) if use else self.unused[0] + info = inspect.getframeinfo(inspect.stack()[1][0]) + xprintf('using', first, self.comments[first].value, info.function, info.lineno) + yield first, self.comments[first] + if use: + self.comments[first].set_used() + + def assign_pre(self, token: Any) -> Any: + token_line = token.start_mark.line + info = inspect.getframeinfo(inspect.stack()[1][0]) + xprintf('assign_pre', token_line, self.unused, info.function, info.lineno) + gobbled = False + while self.unused and self.unused[0] < token_line: + gobbled = True + first = self.unused.pop(0) + xprintf('assign_pre < ', first) + self.comments[first].set_used() + token.add_comment_pre(first) + return gobbled + + def assign_eol(self, tokens: Any) -> Any: + try: + comment_line = self.unused[0] + except IndexError: + return + if not isinstance(self.comments[comment_line], EOLComment): + return + idx = 1 + while tokens[-idx].start_mark.line > comment_line or isinstance( + tokens[-idx], ValueToken, + ): + idx += 1 + xprintf('idx1', idx) + if ( + len(tokens) > idx + and isinstance(tokens[-idx], ScalarToken) + and isinstance(tokens[-(idx + 1)], ScalarToken) + ): + return + try: + if isinstance(tokens[-idx], ScalarToken) and isinstance( + tokens[-(idx + 1)], KeyToken, + ): + try: + eol_idx = self.unused.pop(0) + self.comments[eol_idx].set_used() + xprintf('>>>>>a', idx, eol_idx, KEYCMNT) + tokens[-idx].add_comment_eol(eol_idx, KEYCMNT) + except IndexError: + raise NotImplementedError + return + except IndexError: + xprintf('IndexError1') + pass + try: + if isinstance(tokens[-idx], ScalarToken) and isinstance( + tokens[-(idx + 1)], (ValueToken, BlockEntryToken), + ): + try: + eol_idx = self.unused.pop(0) + self.comments[eol_idx].set_used() + tokens[-idx].add_comment_eol(eol_idx, VALUECMNT) + except IndexError: + raise NotImplementedError + return + except IndexError: + xprintf('IndexError2') + pass + for t in tokens: + xprintf('tt-', t) + xprintf('not implemented EOL', type(tokens[-idx])) + import sys + + sys.exit(0) + + def assign_post(self, token: Any) -> Any: + token_line = token.start_mark.line + info = inspect.getframeinfo(inspect.stack()[1][0]) + xprintf('assign_post', token_line, self.unused, info.function, info.lineno) + gobbled = False + while self.unused and self.unused[0] < token_line: + gobbled = True + first = self.unused.pop(0) + xprintf('assign_post < ', first) + self.comments[first].set_used() + token.add_comment_post(first) + return gobbled + + def str_unprocessed(self) -> Any: + return ''.join( + (f' {ind:2} {x.info()}\n' for ind, x in self.comments.items() if x.used == ' '), + ) + + +class RoundTripScannerSC(Scanner): # RoundTripScanner Split Comments + def __init__(self, *arg: Any, **kw: Any) -> None: + super().__init__(*arg, **kw) + assert self.loader is not None + # comments isinitialised on .need_more_tokens and persist on + # self.loader.parsed_comments + self.comments = None + + def get_token(self) -> Any: + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if len(self.tokens) > 0: + if isinstance(self.tokens[0], BlockEndToken): + self.comments.assign_post(self.tokens[0]) # type: ignore + else: + self.comments.assign_pre(self.tokens[0]) # type: ignore + self.tokens_taken += 1 + return self.tokens.pop(0) + + def need_more_tokens(self) -> bool: + if self.comments is None: + self.loader.parsed_comments = self.comments = ScannedComments() # type: ignore + if self.done: + return False + if len(self.tokens) == 0: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + if len(self.tokens) < 2: + return True + if self.tokens[0].start_mark.line == self.tokens[-1].start_mark.line: + return True + if True: + xprintf('-x--', len(self.tokens)) + for t in self.tokens: + xprintf(t) + # xprintf(self.comments.last()) + xprintf(self.comments.str_unprocessed()) # type: ignore + self.comments.assign_pre(self.tokens[0]) # type: ignore + self.comments.assign_eol(self.tokens) # type: ignore + return False + + def scan_to_next_token(self) -> None: + srp = self.reader.peek + srf = self.reader.forward + if self.reader.index == 0 and srp() == '\uFEFF': + srf() + start_mark = self.reader.get_mark() + # xprintf('current_mark', start_mark.line, start_mark.column) + found = False + while not found: + while srp() == ' ': + srf() + ch = srp() + if ch == '#': + comment_start_mark = self.reader.get_mark() + comment = ch + srf() # skipt the '#' + while ch not in _THE_END: + ch = srp() + if ch == '\0': # don't gobble the end-of-stream character + # but add an explicit newline as "YAML processors should terminate + # the stream with an explicit line break + # https://yaml.org/spec/1.2/spec.html#id2780069 + comment += '\n' + break + comment += ch + srf() + # we have a comment + if start_mark.column == 0: + self.comments.add_full_line_comment( # type: ignore + comment, comment_start_mark.column, comment_start_mark.line, + ) + else: + self.comments.add_eol_comment( # type: ignore + comment, comment_start_mark.column, comment_start_mark.line, + ) + comment = "" + # gather any blank lines or full line comments following the comment as well + self.scan_empty_or_full_line_comments() + if not self.flow_level: + self.allow_simple_key = True + return + if bool(self.scan_line_break()): + # start_mark = self.reader.get_mark() + if not self.flow_level: + self.allow_simple_key = True + self.scan_empty_or_full_line_comments() + return None + ch = srp() + if ch == '\n': # empty toplevel lines + start_mark = self.reader.get_mark() + comment = "" + while ch: + ch = self.scan_line_break(empty_line=True) + comment += ch + if srp() == '#': + # empty line followed by indented real comment + comment = comment.rsplit('\n', 1)[0] + '\n' + _ = self.reader.get_mark() # gobble end_mark + return None + else: + found = True + return None + + def scan_empty_or_full_line_comments(self) -> None: + blmark = self.reader.get_mark() + assert blmark.column == 0 + blanks = "" + comment = None + mark = None + ch = self.reader.peek() + while True: + # nprint('ch', repr(ch), self.reader.get_mark().column) + if ch in '\r\n\x85\u2028\u2029': + if self.reader.prefix(2) == '\r\n': + self.reader.forward(2) + else: + self.reader.forward() + if comment is not None: + comment += '\n' + self.comments.add_full_line_comment(comment, mark.column, mark.line) + comment = None + else: + blanks += '\n' + self.comments.add_blank_line(blanks, blmark.column, blmark.line) # type: ignore # NOQA + blanks = "" + blmark = self.reader.get_mark() + ch = self.reader.peek() + continue + if comment is None: + if ch in ' \t': + blanks += ch + elif ch == '#': + mark = self.reader.get_mark() + comment = '#' + else: + # xprintf('breaking on', repr(ch)) + break + else: + comment += ch + self.reader.forward() + ch = self.reader.peek() + + def scan_block_scalar_ignored_line(self, start_mark: Any) -> Any: + # See the specification for details. + srp = self.reader.peek + srf = self.reader.forward + prefix = '' + comment = None + while srp() == ' ': + prefix += srp() + srf() + if srp() == '#': + comment = '' + mark = self.reader.get_mark() + while srp() not in _THE_END: + comment += srp() + srf() + comment += '\n' # type: ignore + ch = srp() + if ch not in _THE_END: + raise ScannerError( + 'while scanning a block scalar', + start_mark, + f'expected a comment or a line break, but found {ch!r}', + self.reader.get_mark(), + ) + if comment is not None: + self.comments.add_eol_comment(comment, mark.column, mark.line) # type: ignore + self.scan_line_break() + return None diff --git a/serializer.py b/serializer.py new file mode 100644 index 0000000..1ac46d2 --- /dev/null +++ b/serializer.py @@ -0,0 +1,231 @@ +# coding: utf-8 + +from ruamel.yaml.error import YAMLError +from ruamel.yaml.compat import nprint, DBG_NODE, dbg, nprintf # NOQA +from ruamel.yaml.util import RegExp + +from ruamel.yaml.events import ( + StreamStartEvent, + StreamEndEvent, + MappingStartEvent, + MappingEndEvent, + SequenceStartEvent, + SequenceEndEvent, + AliasEvent, + ScalarEvent, + DocumentStartEvent, + DocumentEndEvent, +) +from ruamel.yaml.nodes import MappingNode, ScalarNode, SequenceNode + +from typing import Any, Dict, Union, Text, Optional # NOQA +from ruamel.yaml.compat import VersionType # NOQA + +__all__ = ['Serializer', 'SerializerError'] + + +class SerializerError(YAMLError): + pass + + +class Serializer: + + # 'id' and 3+ numbers, but not 000 + ANCHOR_TEMPLATE = 'id{:03d}' + ANCHOR_RE = RegExp('id(?!000$)\\d{3,}') + + def __init__( + self, + encoding: Any = None, + explicit_start: Optional[bool] = None, + explicit_end: Optional[bool] = None, + version: Optional[VersionType] = None, + tags: Any = None, + dumper: Any = None, + ) -> None: + # NOQA + self.dumper = dumper + if self.dumper is not None: + self.dumper._serializer = self + self.use_encoding = encoding + self.use_explicit_start = explicit_start + self.use_explicit_end = explicit_end + if isinstance(version, str): + self.use_version = tuple(map(int, version.split('.'))) + else: + self.use_version = version # type: ignore + self.use_tags = tags + self.serialized_nodes: Dict[Any, Any] = {} + self.anchors: Dict[Any, Any] = {} + self.last_anchor_id = 0 + self.closed: Optional[bool] = None + self._templated_id = None + + @property + def emitter(self) -> Any: + if hasattr(self.dumper, 'typ'): + return self.dumper.emitter + return self.dumper._emitter + + @property + def resolver(self) -> Any: + if hasattr(self.dumper, 'typ'): + self.dumper.resolver + return self.dumper._resolver + + def open(self) -> None: + if self.closed is None: + self.emitter.emit(StreamStartEvent(encoding=self.use_encoding)) + self.closed = False + elif self.closed: + raise SerializerError('serializer is closed') + else: + raise SerializerError('serializer is already opened') + + def close(self) -> None: + if self.closed is None: + raise SerializerError('serializer is not opened') + elif not self.closed: + self.emitter.emit(StreamEndEvent()) + self.closed = True + + # def __del__(self): + # self.close() + + def serialize(self, node: Any) -> None: + if dbg(DBG_NODE): + nprint('Serializing nodes') + node.dump() + if self.closed is None: + raise SerializerError('serializer is not opened') + elif self.closed: + raise SerializerError('serializer is closed') + self.emitter.emit( + DocumentStartEvent( + explicit=self.use_explicit_start, version=self.use_version, tags=self.use_tags, + ), + ) + self.anchor_node(node) + self.serialize_node(node, None, None) + self.emitter.emit(DocumentEndEvent(explicit=self.use_explicit_end)) + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + + def anchor_node(self, node: Any) -> None: + if node in self.anchors: + if self.anchors[node] is None: + self.anchors[node] = self.generate_anchor(node) + else: + anchor = None + try: + if node.anchor.always_dump: + anchor = node.anchor.value + except: # NOQA + pass + self.anchors[node] = anchor + if isinstance(node, SequenceNode): + for item in node.value: + self.anchor_node(item) + elif isinstance(node, MappingNode): + for key, value in node.value: + self.anchor_node(key) + self.anchor_node(value) + + def generate_anchor(self, node: Any) -> Any: + try: + anchor = node.anchor.value + except: # NOQA + anchor = None + if anchor is None: + self.last_anchor_id += 1 + return self.ANCHOR_TEMPLATE.format(self.last_anchor_id) + return anchor + + def serialize_node(self, node: Any, parent: Any, index: Any) -> None: + alias = self.anchors[node] + if node in self.serialized_nodes: + node_style = getattr(node, 'style', None) + if node_style != '?': + node_style = None + self.emitter.emit(AliasEvent(alias, style=node_style)) + else: + self.serialized_nodes[node] = True + self.resolver.descend_resolver(parent, index) + if isinstance(node, ScalarNode): + # here check if the node.tag equals the one that would result from parsing + # if not equal quoting is necessary for strings + detected_tag = self.resolver.resolve(ScalarNode, node.value, (True, False)) + default_tag = self.resolver.resolve(ScalarNode, node.value, (False, True)) + implicit = ( + (node.ctag == detected_tag), + (node.ctag == default_tag), + node.tag.startswith('tag:yaml.org,2002:'), # type: ignore + ) + self.emitter.emit( + ScalarEvent( + alias, + node.ctag, + implicit, + node.value, + style=node.style, + comment=node.comment, + ), + ) + elif isinstance(node, SequenceNode): + implicit = node.ctag == self.resolver.resolve(SequenceNode, node.value, True) + comment = node.comment + end_comment = None + seq_comment = None + if node.flow_style is True: + if comment: # eol comment on flow style sequence + seq_comment = comment[0] + # comment[0] = None + if comment and len(comment) > 2: + end_comment = comment[2] + else: + end_comment = None + self.emitter.emit( + SequenceStartEvent( + alias, + node.ctag, + implicit, + flow_style=node.flow_style, + comment=node.comment, + ), + ) + index = 0 + for item in node.value: + self.serialize_node(item, node, index) + index += 1 + self.emitter.emit(SequenceEndEvent(comment=[seq_comment, end_comment])) + elif isinstance(node, MappingNode): + implicit = node.ctag == self.resolver.resolve(MappingNode, node.value, True) + comment = node.comment + end_comment = None + map_comment = None + if node.flow_style is True: + if comment: # eol comment on flow style sequence + map_comment = comment[0] + # comment[0] = None + if comment and len(comment) > 2: + end_comment = comment[2] + self.emitter.emit( + MappingStartEvent( + alias, + node.ctag, + implicit, + flow_style=node.flow_style, + comment=node.comment, + nr_items=len(node.value), + ), + ) + for key, value in node.value: + self.serialize_node(key, node, None) + self.serialize_node(value, node, key) + self.emitter.emit(MappingEndEvent(comment=[map_comment, end_comment])) + self.resolver.ascend_resolver() + + +def templated_id(s: Text) -> Any: + return Serializer.ANCHOR_RE.match(s) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..8bfd5a1 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,4 @@ +[egg_info] +tag_build = +tag_date = 0 + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..3154575 --- /dev/null +++ b/setup.py @@ -0,0 +1,941 @@ +# # header +# coding: utf-8 +# dd: 20230418 + +# # __init__.py parser + +import sys +import os +import datetime +from textwrap import dedent + +sys.path = [path for path in sys.path if path not in [os.getcwd(), ""]] +import platform # NOQA +from _ast import * # NOQA +from ast import parse # NOQA + +from setuptools import setup, Extension, Distribution # NOQA +from setuptools.command import install_lib # NOQA +from setuptools.command.sdist import sdist as _sdist # NOQA + +# try: +# from setuptools.namespaces import Installer as NameSpaceInstaller # NOQA +# except ImportError: +# msg = ('You should use the latest setuptools. The namespaces.py file that this setup.py' +# ' uses was added in setuptools 28.7.0 (Oct 2016)') +# print(msg) +# sys.exit() + +if __name__ != '__main__': + raise NotImplementedError('should never include setup.py') + +# # definitions + +full_package_name = None + +if sys.version_info < (3,): + string_type = basestring +else: + string_type = str + + +if sys.version_info < (3, 4): + + class Bytes: + pass + + class NameConstant: + pass + + +if sys.version_info < (3,): + open_kw = {} +else: + open_kw = dict(encoding='utf-8') # NOQA: C408 + + +if sys.version_info < (2, 7) or platform.python_implementation() == 'Jython': + + class Set: + pass + + +if os.environ.get('DVDEBUG', "") == "": + + def debug(*args, **kw): + pass + + +else: + + def debug(*args, **kw): + with open(os.environ['DVDEBUG'], 'a') as fp: + kw1 = kw.copy() + kw1['file'] = fp + print('{:%Y-%d-%mT%H:%M:%S}'.format(datetime.datetime.now()), file=fp, end=' ') + print(*args, **kw1) + +if sys.version_info >= (3, 8): + from ast import Str, Num, Bytes, NameConstant # NOQA + + +def literal_eval(node_or_string): + """ + Safely evaluate an expression node or a string containing a Python + expression. The string or node provided may only consist of the following + Python literal structures: strings, bytes, numbers, tuples, lists, dicts, + sets, booleans, and None. + + Even when passing in Unicode, the resulting Str types parsed are 'str' in Python 2. + I don't now how to set 'unicode_literals' on parse -> Str is explicitly converted. + """ + _safe_names = {'None': None, 'True': True, 'False': False} + if isinstance(node_or_string, string_type): + node_or_string = parse(node_or_string, mode='eval') + if isinstance(node_or_string, Expression): + node_or_string = node_or_string.body + else: + raise TypeError('only string or AST nodes supported') + + def _convert(node): + if isinstance(node, Str): + if sys.version_info < (3,) and not isinstance(node.s, unicode): + return node.s.decode('utf-8') + return node.s + elif isinstance(node, Bytes): + return node.s + elif isinstance(node, Num): + return node.n + elif isinstance(node, Tuple): + return tuple(map(_convert, node.elts)) + elif isinstance(node, List): + return list(map(_convert, node.elts)) + elif isinstance(node, Set): + return set(map(_convert, node.elts)) + elif isinstance(node, Dict): + return {_convert(k): _convert(v) for k, v in zip(node.keys, node.values)} + elif isinstance(node, NameConstant): + return node.value + elif sys.version_info < (3, 4) and isinstance(node, Name): + if node.id in _safe_names: + return _safe_names[node.id] + elif ( + isinstance(node, UnaryOp) + and isinstance(node.op, (UAdd, USub)) + and isinstance(node.operand, (Num, UnaryOp, BinOp)) + ): # NOQA + operand = _convert(node.operand) + if isinstance(node.op, UAdd): + return +operand + else: + return -operand + elif ( + isinstance(node, BinOp) + and isinstance(node.op, (Add, Sub)) + and isinstance(node.right, (Num, UnaryOp, BinOp)) + and isinstance(node.left, (Num, UnaryOp, BinOp)) + ): # NOQA + left = _convert(node.left) + right = _convert(node.right) + if isinstance(node.op, Add): + return left + right + else: + return left - right + elif isinstance(node, Call): + func_id = getattr(node.func, 'id', None) + if func_id == 'dict': + return {k.arg: _convert(k.value) for k in node.keywords} + elif func_id == 'set': + return set(_convert(node.args[0])) + elif func_id == 'date': + return datetime.date(*[_convert(k) for k in node.args]) + elif func_id == 'datetime': + return datetime.datetime(*[_convert(k) for k in node.args]) + err = SyntaxError('malformed node or string: ' + repr(node)) + err.filename = '<string>' + err.lineno = node.lineno + err.offset = node.col_offset + err.text = repr(node) + err.node = node + raise err + + return _convert(node_or_string) + + +# parses python ( "= dict( )" ) or ( "= {" ) +def _package_data(fn): + data = {} + with open(fn, **open_kw) as fp: + parsing = False + lines = [] + for line in fp.readlines(): + if sys.version_info < (3,): + line = line.decode('utf-8') + if line.startswith('_package_data'): + if 'dict(' in line: + parsing = 'python' + lines.append('dict(\n') + elif line.endswith('= {\n'): + parsing = 'python' + lines.append('{\n') + else: + raise NotImplementedError + continue + if not parsing: + continue + if parsing == 'python': + if line.startswith(')') or line.startswith('}'): + lines.append(line) + try: + data = literal_eval("".join(lines)) + except SyntaxError as e: + context = 2 + from_line = e.lineno - (context + 1) + to_line = e.lineno + (context - 1) + w = len(str(to_line)) + for index, line in enumerate(lines): + if from_line <= index <= to_line: + print( + '{0:{1}}: {2}'.format(index, w, line).encode('utf-8'), + end="", + ) + if index == e.lineno - 1: + print( + '{0:{1}} {2}^--- {3}'.format( + ' ', w, ' ' * e.offset, e.node, + ), + ) + raise + break + lines.append(line) + else: + raise NotImplementedError + return data + + +# make sure you can run "python ../some/dir/setup.py install" +pkg_data = _package_data(__file__.replace('setup.py', '__init__.py')) + +exclude_files = ['setup.py'] + + +# # helper +def _check_convert_version(tup): + """Create a PEP 386 pseudo-format conformant string from tuple tup.""" + ret_val = str(tup[0]) # first is always digit + next_sep = '.' # separator for next extension, can be "" or "." + nr_digits = 0 # nr of adjacent digits in rest, to verify + post_dev = False # are we processig post/dev + for x in tup[1:]: + if isinstance(x, int): + nr_digits += 1 + if nr_digits > 2: + raise ValueError('too many consecutive digits after ' + ret_val) + ret_val += next_sep + str(x) + next_sep = '.' + continue + first_letter = x[0].lower() + next_sep = "" + if first_letter in 'abcr': + if post_dev: + raise ValueError('release level specified after ' 'post/dev: ' + x) + nr_digits = 0 + ret_val += 'rc' if first_letter == 'r' else first_letter + elif first_letter in 'pd': + nr_digits = 1 # only one can follow + post_dev = True + ret_val += '.post' if first_letter == 'p' else '.dev' + else: + raise ValueError('First letter of "' + x + '" not recognised') + # .dev and .post need a number otherwise setuptools normalizes and complains + if nr_digits == 1 and post_dev: + ret_val += '0' + return ret_val + + +version_info = pkg_data['version_info'] +version_str = _check_convert_version(version_info) + + +class MyInstallLib(install_lib.install_lib): + def install(self): + fpp = pkg_data['full_package_name'].split('.') # full package path + full_exclude_files = [os.path.join(*(fpp + [x])) for x in exclude_files] + alt_files = [] + outfiles = install_lib.install_lib.install(self) + for x in outfiles: + for full_exclude_file in full_exclude_files: + if full_exclude_file in x: + os.remove(x) + break + else: + alt_files.append(x) + return alt_files + + +class MySdist(_sdist): + def initialize_options(self): + _sdist.initialize_options(self) + # failed expiriment, see pep 527, new uploads should be tar.gz or .zip + # because of unicode_literals + # self.formats = fmt if fmt else [b'bztar'] if sys.version_info < (3, ) else ['bztar'] + dist_base = os.environ.get('PYDISTBASE') + fpn = getattr(getattr(self, 'nsp', self), 'full_package_name', None) + if fpn and dist_base: + print('setting distdir {}/{}'.format(dist_base, fpn)) + self.dist_dir = os.path.join(dist_base, fpn) + + +# try except so this doesn't bomb when you don't have wheel installed, implies +# generation of wheels in ./dist +try: + from wheel.bdist_wheel import bdist_wheel as _bdist_wheel # NOQA + + class MyBdistWheel(_bdist_wheel): + def initialize_options(self): + _bdist_wheel.initialize_options(self) + dist_base = os.environ.get('PYDISTBASE') + fpn = getattr(getattr(self, 'nsp', self), 'full_package_name', None) + if fpn and dist_base: + print('setting distdir {}/{}'.format(dist_base, fpn)) + self.dist_dir = os.path.join(dist_base, fpn) + + _bdist_wheel_available = True + +except ImportError: + _bdist_wheel_available = False + + +class NameSpacePackager(object): + def __init__(self, pkg_data): + assert isinstance(pkg_data, dict) + self._pkg_data = pkg_data + self.full_package_name = self.pn(self._pkg_data['full_package_name']) + self._split = None + self.depth = self.full_package_name.count('.') + self.nested = self._pkg_data.get('nested', False) + # if self.nested: + # NameSpaceInstaller.install_namespaces = lambda x: None + self.command = None + self.python_version() + self._pkg = [None, None] # required and pre-installable packages + if sys.argv[0] == 'setup.py' and sys.argv[1] == 'install': + debug('calling setup.py', sys.argv) + if '-h' in sys.argv: + pass + elif '--single-version-externally-managed' not in sys.argv: + if os.environ.get('READTHEDOCS', None) == 'True': + os.system('pip install .') + sys.exit(0) + if not os.environ.get('RUAMEL_NO_PIP_INSTALL_CHECK', False): + print('error: you have to install with "pip install ."') + sys.exit(1) + # If you only support an extension module on Linux, Windows thinks it + # is pure. That way you would get pure python .whl files that take + # precedence for downloading on Linux over source with compilable C code + if self._pkg_data.get('universal'): + Distribution.is_pure = lambda *args: True + else: + Distribution.is_pure = lambda *args: False + for x in sys.argv: + if x[0] == '-' or x == 'setup.py': + continue + self.command = x + break + + def pn(self, s): + if sys.version_info < (3,) and isinstance(s, unicode): + return s.encode('utf-8') + return s + + @property + def split(self): + """split the full package name in list of compontents traditionally + done by setuptools.find_packages. This routine skips any directories + with __init__.py, for which the name starts with "_" or ".", or contain a + setup.py/tox.ini (indicating a subpackage) + """ + skip = [] + if self._split is None: + fpn = self.full_package_name.split('.') + self._split = [] + while fpn: + self._split.insert(0, '.'.join(fpn)) + fpn = fpn[:-1] + for d in sorted(os.listdir('.')): + if not os.path.isdir(d) or d == self._split[0] or d[0] in '._': + continue + # prevent sub-packages in namespace from being included + x = os.path.join(d, '__init__.py') + if os.path.exists(x): + pd = _package_data(x) + if pd.get('nested', False): + skip.append(d) + continue + self._split.append(self.full_package_name + '.' + d) + if sys.version_info < (3,): + self._split = [ + (y.encode('utf-8') if isinstance(y, unicode) else y) for y in self._split + ] + if skip: + # this interferes with output checking + # print('skipping sub-packages:', ', '.join(skip)) + pass + return self._split + + @property + def namespace_directories(self, depth=None): + """return list of directories where the namespace should be created / + can be found + """ + res = [] + for index, d in enumerate(self.split[:depth]): + # toplevel gets a dot + if index > 0: + d = os.path.join(*d.split('.')) + res.append('.' + d) + return res + + @property + def package_dir(self): + d = { + # don't specify empty dir, clashes with package_data spec + self.full_package_name: '.', + } + if 'extra_packages' in self._pkg_data: + return d + # if len(self.split) > 1: # only if package namespace + # d[self.split[0]] = self.namespace_directories(1)[0] + # print('d', d, os.getcwd()) + return d + + def python_version(self): + supported = self._pkg_data.get('supported') + if supported is None: + return + if len(supported) == 1: + minimum = supported[0] + else: + for x in supported: + if x[0] == sys.version_info[0]: + minimum = x + break + else: + return + if sys.version_info < minimum: + print('minimum python version(s): ' + str(supported)) + sys.exit(1) + + def check(self): + # https://github.com/pypa/setuptools/issues/2355#issuecomment-685159580 + InstallationError = Exception + # arg is either develop (pip install -e) or install + if self.command not in ['install', 'develop']: + return + + # if hgi and hgi.base are both in namespace_packages matching + # against the top (hgi.) it suffices to find minus-e and non-minus-e + # installed packages. As we don't know the order in namespace_packages + # do some magic + prefix = self.split[0] + prefixes = {prefix, prefix.replace('_', '-')} + for p in sys.path: + if not p: + continue # directory with setup.py + if os.path.exists(os.path.join(p, 'setup.py')): + continue # some linked in stuff might not be hgi based + if not os.path.isdir(p): + continue + if p.startswith('/tmp/'): + continue + for fn in os.listdir(p): + for pre in prefixes: + if fn.startswith(pre): + break + else: + continue + full_name = os.path.join(p, fn) + # not in prefixes the toplevel is never changed from _ to - + if fn == prefix and os.path.isdir(full_name): + # directory -> other, non-minus-e, install + if self.command == 'develop': + raise InstallationError( + 'Cannot mix develop (pip install -e),\nwith ' + 'non-develop installs for package name {0}'.format(fn), + ) + elif fn == prefix: + raise InstallationError('non directory package {0} in {1}'.format(fn, p)) + for pre in [x + '.' for x in prefixes]: + if fn.startswith(pre): + break + else: + continue # hgiabc instead of hgi. + if fn.endswith('-link') and self.command == 'install': + raise InstallationError( + 'Cannot mix non-develop with develop\n(pip install -e)' + ' installs for package name {0}'.format(fn), + ) + + def entry_points(self, script_name=None, package_name=None): + """normally called without explicit script_name and package name + the default console_scripts entry depends on the existence of __main__.py: + if that file exists then the function main() in there is used, otherwise + the in __init__.py. + + the _package_data entry_points key/value pair can be explicitly specified + including a "=" character. If the entry is True or 1 the + scriptname is the last part of the full package path (split on '.') + if the ep entry is a simple string without "=", that is assumed to be + the name of the script. + """ + + def pckg_entry_point(name): + return '{0}{1}:main'.format( + name, '.__main__' if os.path.exists('__main__.py') else "", + ) + + ep = self._pkg_data.get('entry_points', True) + if isinstance(ep, dict): + return ep + if ep is None: + return None + if ep not in [True, 1]: + if '=' in ep: + # full specification of the entry point like + # entry_points=['yaml = ruamel.yaml.cmd:main'], + return {'console_scripts': [ep]} + # assume that it is just the script name + script_name = ep + if package_name is None: + package_name = self.full_package_name + if not script_name: + script_name = package_name.rsplit('.', 1)[-1] + return { + 'console_scripts': [ + '{0} = {1}'.format(script_name, pckg_entry_point(package_name)), + ], + } + + @property + def url(self): + url = self._pkg_data.get('url') + if url: + return url + sp = self.full_package_name + for ch in '_.': + sp = sp.replace(ch, '-') + return 'https://sourceforge.net/p/{0}/code/ci/default/tree'.format(sp) + + @property + def project_urls(self): + ret_val = {} + sp = self.full_package_name + for ch in '_.': + sp = sp.replace(ch, '-') + base_url = self._pkg_data.get('url', 'https://sourceforge.net/p/{0}'.format(sp)) + if base_url[-1] != '/': + base_url += '/' + ret_val['Home'] = base_url + if 'sourceforge.net' in base_url: + ret_val['Source'] = base_url + 'code/ci/default/tree/' + ret_val['Tracker'] = base_url + 'tickets/' + rtfd = self._pkg_data.get('read_the_docs') + if rtfd: + ret_val['Documentation'] = 'https://{0}.readthedocs.io/'.format(rtfd) + return ret_val + + @property + def author(self): + return self._pkg_data['author'] # no get needs to be there + + @property + def author_email(self): + return self._pkg_data['author_email'] # no get needs to be there + + @property + def license(self): + """return the license field from _package_data, None means MIT""" + lic = self._pkg_data.get('license') + if lic is None: + # lic_fn = os.path.join(os.path.dirname(__file__), 'LICENSE') + # assert os.path.exists(lic_fn) + return 'MIT license' + return lic + + def has_mit_lic(self): + return 'MIT' in self.license + + @property + def description(self): + return self._pkg_data['description'] # no get needs to be there + + @property + def status(self): + # αβ + status = self._pkg_data.get('status', 'β').lower() + if status in ['α', 'alpha']: + return (3, 'Alpha') + elif status in ['β', 'beta']: + return (4, 'Beta') + elif 'stable' in status.lower(): + return (5, 'Production/Stable') + raise NotImplementedError + + @property + def classifiers(self): + """this needs more intelligence, probably splitting the classifiers from _pkg_data + and only adding defaults when no explicit entries were provided. + Add explicit Python versions in sync with tox.env generation based on python_requires? + """ + attr = '_' + sys._getframe().f_code.co_name + if not hasattr(self, attr): + setattr(self, attr, self._setup_classifiers()) + return getattr(self, attr) + + def _setup_classifiers(self): + return sorted( + set( + [ + 'Development Status :: {0} - {1}'.format(*self.status), + 'Intended Audience :: Developers', + 'License :: ' + + ('OSI Approved :: MIT' if self.has_mit_lic() else 'Other/Proprietary') + + ' License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + ] + + [self.pn(x) for x in self._pkg_data.get('classifiers', [])], + ), + ) + + @property + def keywords(self): + return self.pn(self._pkg_data.get('keywords', [])) + + @property + def install_requires(self): + """list of packages required for installation""" + return self._analyse_packages[0] + + @property + def install_pre(self): + """list of packages required for installation""" + return self._analyse_packages[1] + + @property + def _analyse_packages(self): + """gather from configuration, names starting with * need + to be installed explicitly as they are not on PyPI + install_requires should be dict, with keys 'any', 'py27' etc + or a list (which is as if only 'any' was defined + + ToDo: update with: pep508 conditional dependencies + """ + if self._pkg[0] is None: + self._pkg[0] = [] + self._pkg[1] = [] + + ir = self._pkg_data.get('install_requires') + if ir is None: + return self._pkg # these will be both empty at this point + if isinstance(ir, list): + self._pkg[0] = ir + return self._pkg + # 'any' for all builds, 'py27' etc for specifics versions + packages = ir.get('any', []) + if isinstance(packages, string_type): + packages = packages.split() # assume white space separated string + if self.nested: + # parent dir is also a package, make sure it is installed (need its .pth file) + parent_pkg = self.full_package_name.rsplit('.', 1)[0] + if parent_pkg not in packages: + packages.append(parent_pkg) + implementation = platform.python_implementation() + if implementation == 'CPython': + pyver = 'py{0}{1}'.format(*sys.version_info) + elif implementation == 'PyPy': + pyver = 'pypy' if sys.version_info < (3,) else 'pypy3' + elif implementation == 'Jython': + pyver = 'jython' + packages.extend(ir.get(pyver, [])) + for p in packages: + # package name starting with * means use local source tree, non-published + # to PyPi or maybe not latest version on PyPI -> pre-install + if p[0] == '*': + p = p[1:] + self._pkg[1].append(p) + self._pkg[0].append(p) + return self._pkg + + @property + def extras_require(self): + """dict of conditions -> extra packages informaton required for installation + as of setuptools 33 doing `package ; python_version<=2.7' in install_requires + still doesn't work + + https://www.python.org/dev/peps/pep-0508/ + https://wheel.readthedocs.io/en/latest/index.html#defining-conditional-dependencies + https://hynek.me/articles/conditional-python-dependencies/ + """ + ep = self._pkg_data.get('extras_require') + return ep + + # @property + # def data_files(self): + # df = self._pkg_data.get('data_files', []) + # if self.has_mit_lic(): + # df.append('LICENSE') + # if not df: + # return None + # return [('.', df)] + + @property + def package_data(self): + df = self._pkg_data.get('data_files', []) + if self.has_mit_lic(): + # include the file + df.append('LICENSE') + # but don't install it + exclude_files.append('LICENSE') + if self._pkg_data.get('binary_only', False): + exclude_files.append('__init__.py') + debug('testing<<<<<') + if 'Typing :: Typed' in self.classifiers: + debug('appending') + df.append('py.typed') + pd = self._pkg_data.get('package_data', {}) + if df: + pd[self.full_package_name] = df + if sys.version_info < (3,): + # python2 doesn't seem to like unicode package names as keys + # maybe only when the packages themselves are non-unicode + for k in pd: + if isinstance(k, unicode): + pd[str(k)] = pd.pop(k) + # for k in pd: + # pd[k] = [e.encode('utf-8') for e in pd[k]] # de-unicode + return pd + + @property + def packages(self): + # s = self.split + s = [self._pkg_data['full_package_name']] + # fixed this in package_data, the keys there must be non-unicode for py27 + # if sys.version_info < (3, 0): + # s = [x.encode('utf-8') for x in self.split] + return s + self._pkg_data.get('extra_packages', []) + + @property + def python_requires(self): + return self._pkg_data.get('python_requires', None) + + @property + def ext_modules(self): + """ + Check if all modules specified in the value for 'ext_modules' can be build. + That value (if not None) is a list of dicts with 'name', 'src', 'lib' + Optional 'test' can be used to make sure trying to compile will work on the host + + creates and return the external modules as Extensions, unless that + is not necessary at all for the action (like --version) + + test existence of compiler by using export CC=nonexistent; export CXX=nonexistent + """ + + if hasattr(self, '_ext_modules'): + return self._ext_modules + if '--version' in sys.argv: + return None + if platform.python_implementation() == 'Jython': + return None + try: + plat = sys.argv.index('--plat-name') + if 'win' in sys.argv[plat + 1]: + return None + except ValueError: + pass + self._ext_modules = [] + no_test_compile = True + if '--restructuredtext' in sys.argv: + no_test_compile = True + elif 'sdist' in sys.argv: + no_test_compile = True + if no_test_compile: + for target in self._pkg_data.get('ext_modules', []): + ext = Extension( + self.pn(target['name']), + sources=[self.pn(x) for x in target['src']], + libraries=[self.pn(x) for x in target.get('lib')], + ) + self._ext_modules.append(ext) + return self._ext_modules + # this used to use distutils + + @property + def test_suite(self): + return self._pkg_data.get('test_suite') + + def wheel(self, kw, setup): + """temporary add setup.cfg if creating a wheel to include LICENSE file + https://bitbucket.org/pypa/wheel/issues/47 + """ + if 'bdist_wheel' not in sys.argv: + return False + file_name = 'setup.cfg' + if os.path.exists(file_name): # add it if not in there? + return False + with open(file_name, 'w') as fp: + if self._pkg_data.get('universal'): + fp.write('[bdist_wheel]\nuniversal = 1\n') + try: + setup(**kw) + except Exception: + raise + finally: + os.remove(file_name) + return True + + +class TmpFiles: + def __init__(self, pkg_data, py_project=True, keep=False): + self._rm_after = [] + self._pkg_data = pkg_data + self._py_project = py_project + self._bdist_wheel = 'bdist_wheel' in sys.argv + self._keep = keep + + def __enter__(self): + self.bdist_wheel() + self.py_project() + + def bdist_wheel(self): + """pyproject doesn't allow for universal, so use setup.cfg if necessary + """ + file_name = 'setup.cfg' + if not self._bdist_wheel or os.path.exists(file_name): + return + if self._pkg_data.get('universal'): + self._rm_after.append(file_name) + with open(file_name, 'w') as fp: + fp.write('[bdist_wheel]\nuniversal = 1\n') + + def py_project(self): + """ + to prevent pip from complaining, or is it too late to create it from setup.py + """ + file_name = 'pyproject.toml' + if not self._py_project or os.path.exists(file_name): + return + self._rm_after.append(file_name) + with open(file_name, 'w') as fp: + fp.write(dedent("""\ + [build-system] + requires = ["setuptools", "wheel"] + # test + build-backend = "setuptools.build_meta" + """)) + + def __exit__(self, typ, value, traceback): + if self._keep: + return + for p in self._rm_after: + if not os.path.exists(p): + print('file {} already removed'.format(p)) + else: + os.unlink(p) + + +# call setup +def main(): + dump_kw = '--dump-kw' + if dump_kw in sys.argv: + import wheel + import setuptools + import pip + + print('python: ', sys.version) + print('pip: ', pip.__version__) + print('setuptools:', setuptools.__version__) + print('wheel: ', wheel.__version__) + nsp = NameSpacePackager(pkg_data) + nsp.check() + # nsp.create_dirs() + MySdist.nsp = nsp + cmdclass = dict(install_lib=MyInstallLib, sdist=MySdist) # NOQA: C408 + if _bdist_wheel_available: + MyBdistWheel.nsp = nsp + cmdclass['bdist_wheel'] = MyBdistWheel + + kw = dict( # NOQA: C408 + name=nsp.full_package_name, + version=version_str, + packages=nsp.packages, + python_requires=nsp.python_requires, + # url=nsp.url, + project_urls=nsp.project_urls, + author=nsp.author, + author_email=nsp.author_email, + cmdclass=cmdclass, + package_dir=nsp.package_dir, + entry_points=nsp.entry_points(), + description=nsp.description, + install_requires=nsp.install_requires, + extras_require=nsp.extras_require, # available since setuptools 18.0 / 2015-06 + license=nsp.license, + classifiers=nsp.classifiers, + keywords=nsp.keywords, + package_data=nsp.package_data, + ext_modules=nsp.ext_modules, + test_suite=nsp.test_suite, + zip_safe=False, + ) + + if '--version' not in sys.argv and ('--verbose' in sys.argv or dump_kw in sys.argv): + for k in sorted(kw): + v = kw[k] + print(' "{0}": {1},'.format(k, repr(v))) + # if '--record' in sys.argv: + # return + if dump_kw in sys.argv: + sys.argv.remove(dump_kw) + if not os.environ.get('RUAMEL_NO_LONG_DESCRIPTION', False): + for readme_file_name, readme_markup_type in [ + ('README.md', 'text/markdown; charset=UTF-8; variant=CommonMark'), + ('README.rst', 'text/x-rst'), + ]: + try: + kw['long_description'] = open(readme_file_name).read() + kw['long_description_content_type'] = readme_markup_type + break + except FileNotFoundError: + pass + + # if nsp.wheel(kw, setup): + # return + with TmpFiles(pkg_data, keep=True): + for x in ['-c', 'egg_info', '--egg-base', 'pip-egg-info']: + if x not in sys.argv: + break + else: + # we're doing a tox setup install any starred package by searching up the + # source tree until you match your/package/name for your.package.name + for p in nsp.install_pre: + import subprocess + + # search other source + setup_path = os.path.join(*p.split('.') + ['setup.py']) + try_dir = os.path.dirname(sys.executable) + while len(try_dir) > 1: + full_path_setup_py = os.path.join(try_dir, setup_path) + if os.path.exists(full_path_setup_py): + pip = sys.executable.replace('python', 'pip') + cmd = [pip, 'install', os.path.dirname(full_path_setup_py)] + # with open('/var/tmp/notice', 'a') as fp: + # print('installing', cmd, file=fp) + subprocess.check_output(cmd) + break + try_dir = os.path.dirname(try_dir) + setup(**kw) + + +main() @@ -0,0 +1,124 @@ +# coding: utf-8 + +""" +In round-trip mode the original tag needs to be preserved, but the tag +transformed based on the directives needs to be available as well. + +A Tag that is created during loading has a handle and a suffix. +Not all objects loaded currently have a Tag, that .tag attribute can be None +A Tag that is created for dumping only (on an object loaded without a tag) has a suffix +only. +""" + +from typing import Any, Dict, Optional, List, Union, Optional, Iterator # NOQA + +tag_attrib = '_yaml_tag' + + +class Tag: + """store original tag information for roundtripping""" + + attrib = tag_attrib + + def __init__(self, handle: Any = None, suffix: Any = None, handles: Any = None) -> None: + self.handle = handle + self.suffix = suffix + self.handles = handles + self._transform_type: Optional[bool] = None + + def __repr__(self) -> str: + return f'{self.__class__.__name__}({self.trval!r})' + + def __str__(self) -> str: + return f'{self.trval}' + + def __hash__(self) -> int: + try: + return self._hash_id # type: ignore + except AttributeError: + self._hash_id = res = hash((self.handle, self.suffix)) + return res + + def __eq__(self, other: Any) -> bool: + # other should not be a string, but the serializer sometimes provides these + if isinstance(other, str): + return self.trval == other + return bool(self.trval == other.trval) + + def startswith(self, x: str) -> bool: + if self.trval is not None: + return self.trval.startswith(x) + return False + + @property + def trval(self) -> Optional[str]: + try: + return self._trval + except AttributeError: + pass + if self.handle is None: + self._trval: Optional[str] = self.uri_decoded_suffix + return self._trval + assert self._transform_type is not None + if not self._transform_type: + # the non-round-trip case + self._trval = self.handles[self.handle] + self.uri_decoded_suffix + return self._trval + # round-trip case + if self.handle == '!!' and self.suffix in ( + 'null', + 'bool', + 'int', + 'float', + 'binary', + 'timestamp', + 'omap', + 'pairs', + 'set', + 'str', + 'seq', + 'map', + ): + self._trval = self.handles[self.handle] + self.uri_decoded_suffix + else: + # self._trval = self.handle + self.suffix + self._trval = self.handles[self.handle] + self.uri_decoded_suffix + return self._trval + + value = trval + + @property + def uri_decoded_suffix(self) -> Optional[str]: + try: + return self._uri_decoded_suffix + except AttributeError: + pass + if self.suffix is None: + self._uri_decoded_suffix: Optional[str] = None + return None + res = '' + # don't have to check for scanner errors here + idx = 0 + while idx < len(self.suffix): + ch = self.suffix[idx] + idx += 1 + if ch != '%': + res += ch + else: + res += chr(int(self.suffix[idx : idx + 2], 16)) + idx += 2 + self._uri_decoded_suffix = res + return res + + def select_transform(self, val: bool) -> None: + """ + val: False -> non-round-trip + True -> round-trip + """ + assert self._transform_type is None + self._transform_type = val + + def check_handle(self) -> bool: + if self.handle is None: + return False + return self.handle not in self.handles diff --git a/timestamp.py b/timestamp.py new file mode 100644 index 0000000..753dfc1 --- /dev/null +++ b/timestamp.py @@ -0,0 +1,58 @@ +# coding: utf-8 + +import datetime +import copy + +# ToDo: at least on PY3 you could probably attach the tzinfo correctly to the object +# a more complete datetime might be used by safe loading as well +# +# add type information (iso8601, spaced) + +from typing import Any, Dict, Optional, List # NOQA + + +class TimeStamp(datetime.datetime): + def __init__(self, *args: Any, **kw: Any) -> None: + self._yaml: Dict[Any, Any] = dict(t=False, tz=None, delta=0) + + def __new__(cls, *args: Any, **kw: Any) -> Any: # datetime is immutable + return datetime.datetime.__new__(cls, *args, **kw) + + def __deepcopy__(self, memo: Any) -> Any: + ts = TimeStamp(self.year, self.month, self.day, self.hour, self.minute, self.second) + ts._yaml = copy.deepcopy(self._yaml) + return ts + + def replace( + self, + year: Any = None, + month: Any = None, + day: Any = None, + hour: Any = None, + minute: Any = None, + second: Any = None, + microsecond: Any = None, + tzinfo: Any = True, + fold: Any = None, + ) -> Any: + if year is None: + year = self.year + if month is None: + month = self.month + if day is None: + day = self.day + if hour is None: + hour = self.hour + if minute is None: + minute = self.minute + if second is None: + second = self.second + if microsecond is None: + microsecond = self.microsecond + if tzinfo is True: + tzinfo = self.tzinfo + if fold is None: + fold = self.fold + ts = type(self)(year, month, day, hour, minute, second, microsecond, tzinfo, fold=fold) + ts._yaml = copy.deepcopy(self._yaml) + return ts diff --git a/tokens.py b/tokens.py new file mode 100644 index 0000000..0c73dcf --- /dev/null +++ b/tokens.py @@ -0,0 +1,379 @@ +# coding: utf-8 + +from ruamel.yaml.compat import nprintf # NOQA + +from typing import Text, Any, Dict, Optional, List # NOQA +from .error import StreamMark # NOQA + +SHOW_LINES = True + + +class Token: + __slots__ = 'start_mark', 'end_mark', '_comment' + + def __init__(self, start_mark: StreamMark, end_mark: StreamMark) -> None: + self.start_mark = start_mark + self.end_mark = end_mark + + def __repr__(self) -> Any: + # attributes = [key for key in self.__slots__ if not key.endswith('_mark') and + # hasattr('self', key)] + attributes = [key for key in self.__slots__ if not key.endswith('_mark')] + attributes.sort() + # arguments = ', '.join( + # [f'{key!s}={getattr(self, key)!r})' for key in attributes] + # ) + arguments = [f'{key!s}={getattr(self, key)!r}' for key in attributes] + if SHOW_LINES: + try: + arguments.append('line: ' + str(self.start_mark.line)) + except: # NOQA + pass + try: + arguments.append('comment: ' + str(self._comment)) + except: # NOQA + pass + return f'{self.__class__.__name__}({", ".join(arguments)})' + + @property + def column(self) -> int: + return self.start_mark.column + + @column.setter + def column(self, pos: Any) -> None: + self.start_mark.column = pos + + # old style ( <= 0.17) is a TWO element list with first being the EOL + # comment concatenated with following FLC/BLNK; and second being a list of FLC/BLNK + # preceding the token + # new style ( >= 0.17 ) is a THREE element list with the first being a list of + # preceding FLC/BLNK, the second EOL and the third following FLC/BLNK + # note that new style has differing order, and does not consist of CommentToken(s) + # but of CommentInfo instances + # any non-assigned values in new style are None, but first and last can be empty list + # new style routines add one comment at a time + + # going to be deprecated in favour of add_comment_eol/post + def add_post_comment(self, comment: Any) -> None: + if not hasattr(self, '_comment'): + self._comment = [None, None] + else: + assert len(self._comment) in [2, 5] # make sure it is version 0 + # if isinstance(comment, CommentToken): + # if comment.value.startswith('# C09'): + # raise + self._comment[0] = comment + + # going to be deprecated in favour of add_comment_pre + def add_pre_comments(self, comments: Any) -> None: + if not hasattr(self, '_comment'): + self._comment = [None, None] + else: + assert len(self._comment) == 2 # make sure it is version 0 + assert self._comment[1] is None + self._comment[1] = comments + return + + # new style + def add_comment_pre(self, comment: Any) -> None: + if not hasattr(self, '_comment'): + self._comment = [[], None, None] # type: ignore + else: + assert len(self._comment) == 3 + if self._comment[0] is None: + self._comment[0] = [] # type: ignore + self._comment[0].append(comment) # type: ignore + + def add_comment_eol(self, comment: Any, comment_type: Any) -> None: + if not hasattr(self, '_comment'): + self._comment = [None, None, None] + else: + assert len(self._comment) == 3 + assert self._comment[1] is None + if self.comment[1] is None: + self._comment[1] = [] # type: ignore + self._comment[1].extend([None] * (comment_type + 1 - len(self.comment[1]))) # type: ignore # NOQA + # nprintf('commy', self.comment, comment_type) + self._comment[1][comment_type] = comment # type: ignore + + def add_comment_post(self, comment: Any) -> None: + if not hasattr(self, '_comment'): + self._comment = [None, None, []] # type: ignore + else: + assert len(self._comment) == 3 + if self._comment[2] is None: + self._comment[2] = [] # type: ignore + self._comment[2].append(comment) # type: ignore + + # def get_comment(self) -> Any: + # return getattr(self, '_comment', None) + + @property + def comment(self) -> Any: + return getattr(self, '_comment', None) + + def move_old_comment(self, target: Any, empty: bool = False) -> Any: + """move a comment from this token to target (normally next token) + used to combine e.g. comments before a BlockEntryToken to the + ScalarToken that follows it + empty is a special for empty values -> comment after key + """ + c = self.comment + if c is None: + return + # don't push beyond last element + if isinstance(target, (StreamEndToken, DocumentStartToken)): + return + delattr(self, '_comment') + tc = target.comment + if not tc: # target comment, just insert + # special for empty value in key: value issue 25 + if empty: + c = [c[0], c[1], None, None, c[0]] + target._comment = c + # nprint('mco2:', self, target, target.comment, empty) + return self + if c[0] and tc[0] or c[1] and tc[1]: + raise NotImplementedError(f'overlap in comment {c!r} {tc!r}') + if c[0]: + tc[0] = c[0] + if c[1]: + tc[1] = c[1] + return self + + def split_old_comment(self) -> Any: + """ split the post part of a comment, and return it + as comment to be added. Delete second part if [None, None] + abc: # this goes to sequence + # this goes to first element + - first element + """ + comment = self.comment + if comment is None or comment[0] is None: + return None # nothing to do + ret_val = [comment[0], None] + if comment[1] is None: + delattr(self, '_comment') + return ret_val + + def move_new_comment(self, target: Any, empty: bool = False) -> Any: + """move a comment from this token to target (normally next token) + used to combine e.g. comments before a BlockEntryToken to the + ScalarToken that follows it + empty is a special for empty values -> comment after key + """ + c = self.comment + if c is None: + return + # don't push beyond last element + if isinstance(target, (StreamEndToken, DocumentStartToken)): + return + delattr(self, '_comment') + tc = target.comment + if not tc: # target comment, just insert + # special for empty value in key: value issue 25 + if empty: + c = [c[0], c[1], c[2]] + target._comment = c + # nprint('mco2:', self, target, target.comment, empty) + return self + # if self and target have both pre, eol or post comments, something seems wrong + for idx in range(3): + if c[idx] is not None and tc[idx] is not None: + raise NotImplementedError(f'overlap in comment {c!r} {tc!r}') + # move the comment parts + for idx in range(3): + if c[idx]: + tc[idx] = c[idx] + return self + + +# class BOMToken(Token): +# id = '<byte order mark>' + + +class DirectiveToken(Token): + __slots__ = 'name', 'value' + id = '<directive>' + + def __init__(self, name: Any, value: Any, start_mark: Any, end_mark: Any) -> None: + Token.__init__(self, start_mark, end_mark) + self.name = name + self.value = value + + +class DocumentStartToken(Token): + __slots__ = () + id = '<document start>' + + +class DocumentEndToken(Token): + __slots__ = () + id = '<document end>' + + +class StreamStartToken(Token): + __slots__ = ('encoding',) + id = '<stream start>' + + def __init__( + self, start_mark: Any = None, end_mark: Any = None, encoding: Any = None, + ) -> None: + Token.__init__(self, start_mark, end_mark) + self.encoding = encoding + + +class StreamEndToken(Token): + __slots__ = () + id = '<stream end>' + + +class BlockSequenceStartToken(Token): + __slots__ = () + id = '<block sequence start>' + + +class BlockMappingStartToken(Token): + __slots__ = () + id = '<block mapping start>' + + +class BlockEndToken(Token): + __slots__ = () + id = '<block end>' + + +class FlowSequenceStartToken(Token): + __slots__ = () + id = '[' + + +class FlowMappingStartToken(Token): + __slots__ = () + id = '{' + + +class FlowSequenceEndToken(Token): + __slots__ = () + id = ']' + + +class FlowMappingEndToken(Token): + __slots__ = () + id = '}' + + +class KeyToken(Token): + __slots__ = () + id = '?' + +# def x__repr__(self): +# return f'KeyToken({self.start_mark.buffer[self.start_mark.index:].split(None, 1)[0]})' + + +class ValueToken(Token): + __slots__ = () + id = ':' + + +class BlockEntryToken(Token): + __slots__ = () + id = '-' + + +class FlowEntryToken(Token): + __slots__ = () + id = ',' + + +class AliasToken(Token): + __slots__ = ('value',) + id = '<alias>' + + def __init__(self, value: Any, start_mark: Any, end_mark: Any) -> None: + Token.__init__(self, start_mark, end_mark) + self.value = value + + +class AnchorToken(Token): + __slots__ = ('value',) + id = '<anchor>' + + def __init__(self, value: Any, start_mark: Any, end_mark: Any) -> None: + Token.__init__(self, start_mark, end_mark) + self.value = value + + +class TagToken(Token): + __slots__ = ('value',) + id = '<tag>' + + def __init__(self, value: Any, start_mark: Any, end_mark: Any) -> None: + Token.__init__(self, start_mark, end_mark) + self.value = value + + +class ScalarToken(Token): + __slots__ = 'value', 'plain', 'style' + id = '<scalar>' + + def __init__( + self, value: Any, plain: Any, start_mark: Any, end_mark: Any, style: Any = None, + ) -> None: + Token.__init__(self, start_mark, end_mark) + self.value = value + self.plain = plain + self.style = style + + +class CommentToken(Token): + __slots__ = '_value', '_column', 'pre_done' + id = '<comment>' + + def __init__( + self, value: Any, start_mark: Any = None, end_mark: Any = None, column: Any = None, + ) -> None: + if start_mark is None: + assert column is not None + self._column = column + Token.__init__(self, start_mark, None) # type: ignore + self._value = value + + @property + def value(self) -> str: + if isinstance(self._value, str): + return self._value + return "".join(self._value) + + @value.setter + def value(self, val: Any) -> None: + self._value = val + + def reset(self) -> None: + if hasattr(self, 'pre_done'): + delattr(self, 'pre_done') + + def __repr__(self) -> Any: + v = f'{self.value!r}' + if SHOW_LINES: + try: + v += ', line: ' + str(self.start_mark.line) + except: # NOQA + pass + try: + v += ', col: ' + str(self.start_mark.column) + except: # NOQA + pass + return f'CommentToken({v})' + + def __eq__(self, other: Any) -> bool: + if self.start_mark != other.start_mark: + return False + if self.end_mark != other.end_mark: + return False + if self.value != other.value: + return False + return True + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) @@ -0,0 +1,257 @@ +# coding: utf-8 + +""" +some helper functions that might be generally useful +""" + +import datetime +from functools import partial +import re + + +from typing import Any, Dict, Optional, List, Text, Callable, Union # NOQA +from .compat import StreamTextType # NOQA + + +class LazyEval: + """ + Lightweight wrapper around lazily evaluated func(*args, **kwargs). + + func is only evaluated when any attribute of its return value is accessed. + Every attribute access is passed through to the wrapped value. + (This only excludes special cases like method-wrappers, e.g., __hash__.) + The sole additional attribute is the lazy_self function which holds the + return value (or, prior to evaluation, func and arguments), in its closure. + """ + + def __init__(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> None: + def lazy_self() -> Any: + return_value = func(*args, **kwargs) + object.__setattr__(self, 'lazy_self', lambda: return_value) + return return_value + + object.__setattr__(self, 'lazy_self', lazy_self) + + def __getattribute__(self, name: str) -> Any: + lazy_self = object.__getattribute__(self, 'lazy_self') + if name == 'lazy_self': + return lazy_self + return getattr(lazy_self(), name) + + def __setattr__(self, name: str, value: Any) -> None: + setattr(self.lazy_self(), name, value) + + +RegExp = partial(LazyEval, re.compile) + +timestamp_regexp = RegExp( + """^(?P<year>[0-9][0-9][0-9][0-9]) + -(?P<month>[0-9][0-9]?) + -(?P<day>[0-9][0-9]?) + (?:((?P<t>[Tt])|[ \\t]+) # explictly not retaining extra spaces + (?P<hour>[0-9][0-9]?) + :(?P<minute>[0-9][0-9]) + :(?P<second>[0-9][0-9]) + (?:\\.(?P<fraction>[0-9]*))? + (?:[ \\t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?) + (?::(?P<tz_minute>[0-9][0-9]))?))?)?$""", + re.X, +) + + +def create_timestamp( + year: Any, + month: Any, + day: Any, + t: Any, + hour: Any, + minute: Any, + second: Any, + fraction: Any, + tz: Any, + tz_sign: Any, + tz_hour: Any, + tz_minute: Any, +) -> Union[datetime.datetime, datetime.date]: + # create a timestamp from match against timestamp_regexp + MAX_FRAC = 999999 + year = int(year) + month = int(month) + day = int(day) + if not hour: + return datetime.date(year, month, day) + hour = int(hour) + minute = int(minute) + second = int(second) + frac = 0 + if fraction: + frac_s = fraction[:6] + while len(frac_s) < 6: + frac_s += '0' + frac = int(frac_s) + if len(fraction) > 6 and int(fraction[6]) > 4: + frac += 1 + if frac > MAX_FRAC: + fraction = 0 + else: + fraction = frac + else: + fraction = 0 + delta = None + if tz_sign: + tz_hour = int(tz_hour) + tz_minute = int(tz_minute) if tz_minute else 0 + delta = datetime.timedelta( + hours=tz_hour, minutes=tz_minute, seconds=1 if frac > MAX_FRAC else 0, + ) + if tz_sign == '-': + delta = -delta + elif frac > MAX_FRAC: + delta = -datetime.timedelta(seconds=1) + # should do something else instead (or hook this up to the preceding if statement + # in reverse + # if delta is None: + # return datetime.datetime(year, month, day, hour, minute, second, fraction) + # return datetime.datetime(year, month, day, hour, minute, second, fraction, + # datetime.timezone.utc) + # the above is not good enough though, should provide tzinfo. In Python3 that is easily + # doable drop that kind of support for Python2 as it has not native tzinfo + data = datetime.datetime(year, month, day, hour, minute, second, fraction) + if delta: + data -= delta + return data + + +# originally as comment +# https://github.com/pre-commit/pre-commit/pull/211#issuecomment-186466605 +# if you use this in your code, I suggest adding a test in your test suite +# that check this routines output against a known piece of your YAML +# before upgrades to this code break your round-tripped YAML +def load_yaml_guess_indent(stream: StreamTextType, **kw: Any) -> Any: + """guess the indent and block sequence indent of yaml stream/string + + returns round_trip_loaded stream, indent level, block sequence indent + - block sequence indent is the number of spaces before a dash relative to previous indent + - if there are no block sequences, indent is taken from nested mappings, block sequence + indent is unset (None) in that case + """ + from .main import YAML + + # load a YAML document, guess the indentation, if you use TABs you are on your own + def leading_spaces(line: Any) -> int: + idx = 0 + while idx < len(line) and line[idx] == ' ': + idx += 1 + return idx + + if isinstance(stream, str): + yaml_str: Any = stream + elif isinstance(stream, bytes): + # most likely, but the Reader checks BOM for this + yaml_str = stream.decode('utf-8') + else: + yaml_str = stream.read() + map_indent = None + indent = None # default if not found for some reason + block_seq_indent = None + prev_line_key_only = None + key_indent = 0 + for line in yaml_str.splitlines(): + rline = line.rstrip() + lline = rline.lstrip() + if lline.startswith('- '): + l_s = leading_spaces(line) + block_seq_indent = l_s - key_indent + idx = l_s + 1 + while line[idx] == ' ': # this will end as we rstripped + idx += 1 + if line[idx] == '#': # comment after - + continue + indent = idx - key_indent + break + if map_indent is None and prev_line_key_only is not None and rline: + idx = 0 + while line[idx] in ' -': + idx += 1 + if idx > prev_line_key_only: + map_indent = idx - prev_line_key_only + if rline.endswith(':'): + key_indent = leading_spaces(line) + idx = 0 + while line[idx] == ' ': # this will end on ':' + idx += 1 + prev_line_key_only = idx + continue + prev_line_key_only = None + if indent is None and map_indent is not None: + indent = map_indent + yaml = YAML() + return yaml.load(yaml_str, **kw), indent, block_seq_indent + + +def configobj_walker(cfg: Any) -> Any: + """ + walks over a ConfigObj (INI file with comments) generating + corresponding YAML output (including comments + """ + from configobj import ConfigObj # type: ignore + + assert isinstance(cfg, ConfigObj) + for c in cfg.initial_comment: + if c.strip(): + yield c + for s in _walk_section(cfg): + if s.strip(): + yield s + for c in cfg.final_comment: + if c.strip(): + yield c + + +def _walk_section(s: Any, level: int = 0) -> Any: + from configobj import Section + + assert isinstance(s, Section) + indent = ' ' * level + for name in s.scalars: + for c in s.comments[name]: + yield indent + c.strip() + x = s[name] + if '\n' in x: + i = indent + ' ' + x = '|\n' + i + x.strip().replace('\n', '\n' + i) + elif ':' in x: + x = "'" + x.replace("'", "''") + "'" + line = f'{indent}{name}: {x}' + c = s.inline_comments[name] + if c: + line += ' ' + c + yield line + for name in s.sections: + for c in s.comments[name]: + yield indent + c.strip() + line = f'{indent}{name}:' + c = s.inline_comments[name] + if c: + line += ' ' + c + yield line + for val in _walk_section(s[name], level=level + 1): + yield val + + +# def config_obj_2_rt_yaml(cfg): +# from .comments import CommentedMap, CommentedSeq +# from configobj import ConfigObj +# assert isinstance(cfg, ConfigObj) +# #for c in cfg.initial_comment: +# # if c.strip(): +# # pass +# cm = CommentedMap() +# for name in s.sections: +# cm[name] = d = CommentedMap() +# +# +# #for c in cfg.final_comment: +# # if c.strip(): +# # yield c +# return cm |