summaryrefslogtreecommitdiffstats
path: root/pre_commit_hooks/sort_simple_yaml.py
blob: 39f683e4844720f536c092a6913829b63be96aa6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""Sort a simple YAML file, keeping blocks of comments and definitions
together.

We assume a strict subset of YAML that looks like:

    # block of header comments
    # here that should always
    # be at the top of the file

    # optional comments
    # can go here
    key: value
    key: value

    key: value

In other words, we don't sort deeper than the top layer, and might corrupt
complicated YAML files.
"""
import argparse
from typing import List
from typing import Optional
from typing import Sequence


QUOTES = ["'", '"']


def sort(lines: List[str]) -> List[str]:
    """Sort a YAML file in alphabetical order, keeping blocks together.

    :param lines: array of strings (without newlines)
    :return: sorted array of strings
    """
    # make a copy of lines since we will clobber it
    lines = list(lines)
    new_lines = parse_block(lines, header=True)

    for block in sorted(parse_blocks(lines), key=first_key):
        if new_lines:
            new_lines.append('')
        new_lines.extend(block)

    return new_lines


def parse_block(lines: List[str], header: bool = False) -> List[str]:
    """Parse and return a single block, popping off the start of `lines`.

    If parsing a header block, we stop after we reach a line that is not a
    comment. Otherwise, we stop after reaching an empty line.

    :param lines: list of lines
    :param header: whether we are parsing a header block
    :return: list of lines that form the single block
    """
    block_lines = []
    while lines and lines[0] and (not header or lines[0].startswith('#')):
        block_lines.append(lines.pop(0))
    return block_lines


def parse_blocks(lines: List[str]) -> List[List[str]]:
    """Parse and return all possible blocks, popping off the start of `lines`.

    :param lines: list of lines
    :return: list of blocks, where each block is a list of lines
    """
    blocks = []

    while lines:
        if lines[0] == '':
            lines.pop(0)
        else:
            blocks.append(parse_block(lines))

    return blocks


def first_key(lines: List[str]) -> str:
    """Returns a string representing the sort key of a block.

    The sort key is the first YAML key we encounter, ignoring comments, and
    stripping leading quotes.

    >>> print(test)
    # some comment
    'foo': true
    >>> first_key(test)
    'foo'
    """
    for line in lines:
        if line.startswith('#'):
            continue
        if any(line.startswith(quote) for quote in QUOTES):
            return line[1:]
        return line
    else:
        return ''  # not actually reached in reality


def main(argv: Optional[Sequence[str]] = None) -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
    args = parser.parse_args(argv)

    retval = 0

    for filename in args.filenames:
        with open(filename, 'r+') as f:
            lines = [line.rstrip() for line in f.readlines()]
            new_lines = sort(lines)

            if lines != new_lines:
                print(f'Fixing file `{filename}`')
                f.seek(0)
                f.write('\n'.join(new_lines) + '\n')
                f.truncate()
                retval = 1

    return retval


if __name__ == '__main__':
    raise SystemExit(main())