"""Sort a simple YAML file, keeping blocks of comments and definitions together. We assume a strict subset of YAML that looks like: # block of header comments # here that should always # be at the top of the file # optional comments # can go here key: value key: value key: value In other words, we don't sort deeper than the top layer, and might corrupt complicated YAML files. """ import argparse from typing import List from typing import Optional from typing import Sequence QUOTES = ["'", '"'] def sort(lines: List[str]) -> List[str]: """Sort a YAML file in alphabetical order, keeping blocks together. :param lines: array of strings (without newlines) :return: sorted array of strings """ # make a copy of lines since we will clobber it lines = list(lines) new_lines = parse_block(lines, header=True) for block in sorted(parse_blocks(lines), key=first_key): if new_lines: new_lines.append('') new_lines.extend(block) return new_lines def parse_block(lines: List[str], header: bool = False) -> List[str]: """Parse and return a single block, popping off the start of `lines`. If parsing a header block, we stop after we reach a line that is not a comment. Otherwise, we stop after reaching an empty line. :param lines: list of lines :param header: whether we are parsing a header block :return: list of lines that form the single block """ block_lines = [] while lines and lines[0] and (not header or lines[0].startswith('#')): block_lines.append(lines.pop(0)) return block_lines def parse_blocks(lines: List[str]) -> List[List[str]]: """Parse and return all possible blocks, popping off the start of `lines`. :param lines: list of lines :return: list of blocks, where each block is a list of lines """ blocks = [] while lines: if lines[0] == '': lines.pop(0) else: blocks.append(parse_block(lines)) return blocks def first_key(lines: List[str]) -> str: """Returns a string representing the sort key of a block. The sort key is the first YAML key we encounter, ignoring comments, and stripping leading quotes. >>> print(test) # some comment 'foo': true >>> first_key(test) 'foo' """ for line in lines: if line.startswith('#'): continue if any(line.startswith(quote) for quote in QUOTES): return line[1:] return line else: return '' # not actually reached in reality def main(argv: Optional[Sequence[str]] = None) -> int: parser = argparse.ArgumentParser() parser.add_argument('filenames', nargs='*', help='Filenames to fix') args = parser.parse_args(argv) retval = 0 for filename in args.filenames: with open(filename, 'r+') as f: lines = [line.rstrip() for line in f.readlines()] new_lines = sort(lines) if lines != new_lines: print(f'Fixing file `{filename}`') f.seek(0) f.write('\n'.join(new_lines) + '\n') f.truncate() retval = 1 return retval if __name__ == '__main__': raise SystemExit(main())