diff options
Diffstat (limited to 'pre_commit_hooks/check_docstring_first.py')
-rw-r--r-- | pre_commit_hooks/check_docstring_first.py | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/pre_commit_hooks/check_docstring_first.py b/pre_commit_hooks/check_docstring_first.py new file mode 100644 index 0000000..d55f08a --- /dev/null +++ b/pre_commit_hooks/check_docstring_first.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import argparse +import io +import tokenize +from tokenize import tokenize as tokenize_tokenize +from typing import Sequence + +NON_CODE_TOKENS = frozenset(( + tokenize.COMMENT, tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL, + tokenize.ENCODING, +)) + + +def check_docstring_first(src: bytes, filename: str = '<unknown>') -> int: + """Returns nonzero if the source has what looks like a docstring that is + not at the beginning of the source. + + A string will be considered a docstring if it is a STRING token with a + col offset of 0. + """ + found_docstring_line = None + found_code_line = None + + tok_gen = tokenize_tokenize(io.BytesIO(src).readline) + for tok_type, _, (sline, scol), _, _ in tok_gen: + # Looks like a docstring! + if tok_type == tokenize.STRING and scol == 0: + if found_docstring_line is not None: + print( + f'{filename}:{sline}: Multiple module docstrings ' + f'(first docstring on line {found_docstring_line}).', + ) + return 1 + elif found_code_line is not None: + print( + f'{filename}:{sline}: Module docstring appears after code ' + f'(code seen on line {found_code_line}).', + ) + return 1 + else: + found_docstring_line = sline + elif tok_type not in NON_CODE_TOKENS and found_code_line is None: + found_code_line = sline + + return 0 + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*') + args = parser.parse_args(argv) + + retv = 0 + + for filename in args.filenames: + with open(filename, 'rb') as f: + contents = f.read() + retv |= check_docstring_first(contents, filename=filename) + + return retv |