summaryrefslogtreecommitdiffstats
path: root/pre_commit_hooks/sort_simple_yaml.py
diff options
context:
space:
mode:
Diffstat (limited to 'pre_commit_hooks/sort_simple_yaml.py')
-rw-r--r--pre_commit_hooks/sort_simple_yaml.py125
1 files changed, 125 insertions, 0 deletions
diff --git a/pre_commit_hooks/sort_simple_yaml.py b/pre_commit_hooks/sort_simple_yaml.py
new file mode 100644
index 0000000..39f683e
--- /dev/null
+++ b/pre_commit_hooks/sort_simple_yaml.py
@@ -0,0 +1,125 @@
+"""Sort a simple YAML file, keeping blocks of comments and definitions
+together.
+
+We assume a strict subset of YAML that looks like:
+
+ # block of header comments
+ # here that should always
+ # be at the top of the file
+
+ # optional comments
+ # can go here
+ key: value
+ key: value
+
+ key: value
+
+In other words, we don't sort deeper than the top layer, and might corrupt
+complicated YAML files.
+"""
+import argparse
+from typing import List
+from typing import Optional
+from typing import Sequence
+
+
+QUOTES = ["'", '"']
+
+
+def sort(lines: List[str]) -> List[str]:
+ """Sort a YAML file in alphabetical order, keeping blocks together.
+
+ :param lines: array of strings (without newlines)
+ :return: sorted array of strings
+ """
+ # make a copy of lines since we will clobber it
+ lines = list(lines)
+ new_lines = parse_block(lines, header=True)
+
+ for block in sorted(parse_blocks(lines), key=first_key):
+ if new_lines:
+ new_lines.append('')
+ new_lines.extend(block)
+
+ return new_lines
+
+
+def parse_block(lines: List[str], header: bool = False) -> List[str]:
+ """Parse and return a single block, popping off the start of `lines`.
+
+ If parsing a header block, we stop after we reach a line that is not a
+ comment. Otherwise, we stop after reaching an empty line.
+
+ :param lines: list of lines
+ :param header: whether we are parsing a header block
+ :return: list of lines that form the single block
+ """
+ block_lines = []
+ while lines and lines[0] and (not header or lines[0].startswith('#')):
+ block_lines.append(lines.pop(0))
+ return block_lines
+
+
+def parse_blocks(lines: List[str]) -> List[List[str]]:
+ """Parse and return all possible blocks, popping off the start of `lines`.
+
+ :param lines: list of lines
+ :return: list of blocks, where each block is a list of lines
+ """
+ blocks = []
+
+ while lines:
+ if lines[0] == '':
+ lines.pop(0)
+ else:
+ blocks.append(parse_block(lines))
+
+ return blocks
+
+
+def first_key(lines: List[str]) -> str:
+ """Returns a string representing the sort key of a block.
+
+ The sort key is the first YAML key we encounter, ignoring comments, and
+ stripping leading quotes.
+
+ >>> print(test)
+ # some comment
+ 'foo': true
+ >>> first_key(test)
+ 'foo'
+ """
+ for line in lines:
+ if line.startswith('#'):
+ continue
+ if any(line.startswith(quote) for quote in QUOTES):
+ return line[1:]
+ return line
+ else:
+ return '' # not actually reached in reality
+
+
+def main(argv: Optional[Sequence[str]] = None) -> int:
+ parser = argparse.ArgumentParser()
+ parser.add_argument('filenames', nargs='*', help='Filenames to fix')
+ args = parser.parse_args(argv)
+
+ retval = 0
+
+ for filename in args.filenames:
+ with open(filename, 'r+') as f:
+ lines = [line.rstrip() for line in f.readlines()]
+ new_lines = sort(lines)
+
+ if lines != new_lines:
+ print(f'Fixing file `{filename}`')
+ f.seek(0)
+ f.write('\n'.join(new_lines) + '\n')
+ f.truncate()
+ retval = 1
+
+ return retval
+
+
+if __name__ == '__main__':
+ raise SystemExit(main())