From e7ee850d46d54789979bf0c5244bae1825fb7149 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 14 Apr 2024 22:19:53 +0200 Subject: Adding upstream version 0.91.0. Signed-off-by: Daniel Baumann --- _doc/example.rst | 332 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 332 insertions(+) create mode 100644 _doc/example.rst (limited to '_doc/example.rst') diff --git a/_doc/example.rst b/_doc/example.rst new file mode 100644 index 0000000..e9bdeaa --- /dev/null +++ b/_doc/example.rst @@ -0,0 +1,332 @@ +******** +Examples +******** + +Basic round trip of parsing YAML to Python objects, modifying +and generating YAML:: + + import sys + from ruyaml import YAML + + inp = """\ + # example + name: + # details + family: Smith # very common + given: Alice # one of the siblings + """ + + yaml = YAML() + code = yaml.load(inp) + code['name']['given'] = 'Bob' + + yaml.dump(code, sys.stdout) + +Resulting in:: + + # example + name: + # details + family: Smith # very common + given: Bob # one of the siblings + +with the old API:: + + from __future__ import print_function + + import sys + import ruyaml + + inp = """\ + # example + name: + # details + family: Smith # very common + given: Alice # one of the siblings + """ + + code = ruyaml.load(inp, ruyaml.RoundTripLoader) + code['name']['given'] = 'Bob' + + ruyaml.dump(code, sys.stdout, Dumper=ruyaml.RoundTripDumper) + + # the last statement can be done less efficient in time and memory with + # leaving out the end='' would cause a double newline at the end + # print(ruyaml.dump(code, Dumper=ruyaml.RoundTripDumper), end='') + +Resulting in :: + + # example + name: + # details + family: Smith # very common + given: Bob # one of the siblings + +---- + +YAML handcrafted anchors and references as well as key merging +are preserved. The merged keys can transparently be accessed +using ``[]`` and ``.get()``:: + + from ruyaml import YAML + + inp = """\ + - &CENTER {x: 1, y: 2} + - &LEFT {x: 0, y: 2} + - &BIG {r: 10} + - &SMALL {r: 1} + # All the following maps are equal: + # Explicit keys + - x: 1 + y: 2 + r: 10 + label: center/big + # Merge one map + - <<: *CENTER + r: 10 + label: center/big + # Merge multiple maps + - <<: [*CENTER, *BIG] + label: center/big + # Override + - <<: [*BIG, *LEFT, *SMALL] + x: 1 + label: center/big + """ + + yaml = YAML() + data = yaml.load(inp) + assert data[7]['y'] == 2 + + +The ``CommentedMap``, which is the ``dict`` like construct one gets when round-trip loading, +supports insertion of a key into a particular position, while optionally adding a comment:: + + import sys + from ruyaml import YAML + + yaml_str = """\ + first_name: Art + occupation: Architect # This is an occupation comment + about: Art Vandelay is a fictional character that George invents... + """ + + yaml = YAML() + data = yaml.load(yaml_str) + data.insert(1, 'last name', 'Vandelay', comment="new key") + yaml.dump(data, sys.stdout) + +gives:: + + first_name: Art + last name: Vandelay # new key + occupation: Architect # This is an occupation comment + about: Art Vandelay is a fictional character that George invents... + +Please note that the comment is aligned with that of its neighbour (if available). + +The above was inspired by a `question `_ +posted by *demux* on StackOverflow. + +---- + +By default ``ruyaml`` indents with two positions in block style, for +both mappings and sequences. For sequences the indent is counted to the +beginning of the scalar, with the dash taking the first position of the +indented "space". + +You can change this default indentation by e.g. using ``yaml.indent()``:: + + import sys + from ruyaml import YAML + + d = dict(a=dict(b=2),c=[3, 4]) + yaml = YAML() + yaml.dump(d, sys.stdout) + print('0123456789') + yaml = YAML() + yaml.indent(mapping=4, sequence=6, offset=3) + yaml.dump(d, sys.stdout) + print('0123456789') + + +giving:: + + a: + b: 2 + c: + - 3 + - 4 + 0123456789 + a: + b: 2 + c: + - 3 + - 4 + 0123456789 + + +If a block sequence or block mapping is the element of a sequence, the +are, by default, displayed `compact +`__ notation. This means +that the dash of the "parent" sequence is on the same line as the +first element resp. first key/value pair of the child collection. + +If you want either or both of these (sequence within sequence, mapping +within sequence) to begin on the next line use ``yaml.compact()``:: + + import sys + from ruyaml import YAML + + d = [dict(b=2), [3, 4]] + yaml = YAML() + yaml.dump(d, sys.stdout) + print('='*15) + yaml = YAML() + yaml.compact(seq_seq=False, seq_map=False) + yaml.dump(d, sys.stdout) + + +giving:: + + - b: 2 + - - 3 + - 4 + =============== + - + b: 2 + - + - 3 + - 4 + + +------ + +The following program uses three dumps on the same data, resulting in a stream with +three documents:: + + import sys + from ruyaml import YAML + + data = {1: {1: [{1: 1, 2: 2}, {1: 1, 2: 2}], 2: 2}, 2: 42} + + yaml = YAML() + yaml.explicit_start = True + yaml.dump(data, sys.stdout) + yaml.indent(sequence=4, offset=2) + yaml.dump(data, sys.stdout) + + + def sequence_indent_four(s): + # this will fail on direclty nested lists: {1; [[2, 3], 4]} + levels = [] + ret_val = '' + for line in s.splitlines(True): + ls = line.lstrip() + indent = len(line) - len(ls) + if ls.startswith('- '): + if not levels or indent > levels[-1]: + levels.append(indent) + elif levels: + if indent < levels[-1]: + levels = levels[:-1] + # same -> do nothing + else: + if levels: + if indent <= levels[-1]: + while levels and indent <= levels[-1]: + levels = levels[:-1] + ret_val += ' ' * len(levels) + line + return ret_val + + yaml = YAML() + yaml.explicit_start = True + yaml.dump(data, sys.stdout, transform=sequence_indent_four) + +gives as output:: + + --- + 1: + 1: + - 1: 1 + 2: 2 + - 1: 1 + 2: 2 + 2: 2 + 2: 42 + --- + 1: + 1: + - 1: 1 + 2: 2 + - 1: 1 + 2: 2 + 2: 2 + 2: 42 + --- + 1: + 1: + - 1: 1 + 2: 2 + - 1: 1 + 2: 2 + 2: 2 + 2: 42 + + +The transform example, in the last document, was inspired by a +`question posted by *nowox* +`_ on StackOverflow. + +----- + +Output of ``dump()`` as a string +++++++++++++++++++++++++++++++++ + +The single most abused "feature" of the old API is not providing the (second) +stream parameter to one of the ``dump()`` variants, in order to get a monolithic string +representation of the stream back. + +Apart from being memory inefficient and slow, quite often people using this did not +realise that ``print(round_trip_dump(dict(a=1, b=2)))`` gets you an extra, +empty, line after ``b: 2``. + +The real question is why this functionality, which is seldom really +necessary, is available in the old API (and in PyYAML) in the first place. One +explanation you get by looking at what someone would need to do to make this +available if it weren't there already. Apart from subclassing the ``Serializer`` +and providing a new ``dump`` method, which would ten or so lines, another +**hundred** lines, essentially the whole ``dumper.py`` file, would need to be +copied and to make use of this serializer. + +The fact is that one should normally be doing ``round_trip_dump(dict(a=1, b=2)), +sys.stdout)`` and do away with 90% of the cases for returning the string, and +that all post-processing YAML, before writing to stream, can be handled by using +the ``transform=`` parameter of dump, being able to handle most of the rest. But +it is also much easier in the new API to provide that YAML output as a string if +you really need to have it (or think you do):: + + import sys + from ruyaml import YAML + from io import StringIO + + class MyYAML(YAML): + def dump(self, data, stream=None, **kw): + inefficient = False + if stream is None: + inefficient = True + stream = StringIO() + YAML.dump(self, data, stream, **kw) + if inefficient: + return stream.getvalue() + + yaml = MyYAML() # or typ='safe'/'unsafe' etc + +with about one tenth of the lines needed for the old interface, you can once more do:: + + print(yaml.dump(dict(a=1, b=2))) + +instead of:: + + yaml.dump((dict(a=1, b=2)), sys.stdout) + print() # or sys.stdout.write('\n') -- cgit v1.2.3