summaryrefslogtreecommitdiffstats
path: root/pre_commit_hooks/check_docstring_first.py
diff options
context:
space:
mode:
Diffstat (limited to 'pre_commit_hooks/check_docstring_first.py')
-rw-r--r--pre_commit_hooks/check_docstring_first.py60
1 files changed, 60 insertions, 0 deletions
diff --git a/pre_commit_hooks/check_docstring_first.py b/pre_commit_hooks/check_docstring_first.py
new file mode 100644
index 0000000..875c0fb
--- /dev/null
+++ b/pre_commit_hooks/check_docstring_first.py
@@ -0,0 +1,60 @@
+import argparse
+import io
+import tokenize
+from tokenize import tokenize as tokenize_tokenize
+from typing import Optional
+from typing import Sequence
+
+NON_CODE_TOKENS = frozenset((
+ tokenize.COMMENT, tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL,
+ tokenize.ENCODING,
+))
+
+
+def check_docstring_first(src: bytes, filename: str = '<unknown>') -> int:
+ """Returns nonzero if the source has what looks like a docstring that is
+ not at the beginning of the source.
+
+ A string will be considered a docstring if it is a STRING token with a
+ col offset of 0.
+ """
+ found_docstring_line = None
+ found_code_line = None
+
+ tok_gen = tokenize_tokenize(io.BytesIO(src).readline)
+ for tok_type, _, (sline, scol), _, _ in tok_gen:
+ # Looks like a docstring!
+ if tok_type == tokenize.STRING and scol == 0:
+ if found_docstring_line is not None:
+ print(
+ f'{filename}:{sline} Multiple module docstrings '
+ f'(first docstring on line {found_docstring_line}).',
+ )
+ return 1
+ elif found_code_line is not None:
+ print(
+ f'{filename}:{sline} Module docstring appears after code '
+ f'(code seen on line {found_code_line}).',
+ )
+ return 1
+ else:
+ found_docstring_line = sline
+ elif tok_type not in NON_CODE_TOKENS and found_code_line is None:
+ found_code_line = sline
+
+ return 0
+
+
+def main(argv: Optional[Sequence[str]] = None) -> int:
+ parser = argparse.ArgumentParser()
+ parser.add_argument('filenames', nargs='*')
+ args = parser.parse_args(argv)
+
+ retv = 0
+
+ for filename in args.filenames:
+ with open(filename, 'rb') as f:
+ contents = f.read()
+ retv |= check_docstring_first(contents, filename=filename)
+
+ return retv