From ccc606e17ceea4cdc9666131506c19b74a528d1a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 4 Oct 2020 16:50:35 +0200 Subject: Merging upstream version 1.5.5. Signed-off-by: Daniel Baumann --- README.md | 1 + identify/extensions.py | 32 ++++++++++++++++---------- identify/identify.py | 23 +++++++++++++++++++ identify/interpreters.py | 5 ++++ setup.cfg | 2 +- tests/identify_test.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 110 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 7a58a1f..17aabd2 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ If you have an actual file on disk, you can get the most information possible (a superset of all other methods): ```python +>>> from identify import identify >>> identify.tags_from_path('/path/to/file.py') {'file', 'text', 'python', 'non-executable'} >>> identify.tags_from_path('/path/to/file-with-shebang') diff --git a/identify/extensions.py b/identify/extensions.py index 4475a41..1110ccb 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -10,11 +10,11 @@ EXTENSIONS = { 'asar': {'binary', 'asar'}, 'bash': {'text', 'shell', 'bash'}, 'bat': {'text', 'batch'}, + 'bib': {'text', 'bib'}, 'bmp': {'binary', 'image', 'bitmap'}, 'bz2': {'binary', 'bzip2'}, 'c': {'text', 'c'}, 'cc': {'text', 'c++'}, - 'cu': {'text', 'cuda'}, 'cfg': {'text'}, 'chs': {'text', 'c2hs'}, 'clj': {'text', 'clojure'}, @@ -31,6 +31,7 @@ EXTENSIONS = { 'cson': {'text', 'cson'}, 'css': {'text', 'css'}, 'csv': {'text', 'csv'}, + 'cu': {'text', 'cuda'}, 'cxx': {'text', 'c++'}, 'dart': {'text', 'dart'}, 'def': {'text', 'def'}, @@ -89,6 +90,7 @@ EXTENSIONS = { 'key': {'text', 'pem'}, 'kml': {'text', 'kml', 'xml'}, 'kt': {'text', 'kotlin'}, + 'lean': {'text', 'lean'}, 'less': {'text', 'less'}, 'lhs': {'text', 'literate-haskell'}, 'libsonnet': {'text', 'jsonnet'}, @@ -130,31 +132,32 @@ EXTENSIONS = { 'proto': {'text', 'proto'}, 'puml': {'text', 'plantuml'}, 'purs': {'text', 'purescript'}, + 'pxd': {'text', 'cython'}, + 'pxi': {'text', 'cython'}, 'py': {'text', 'python'}, 'pyi': {'text', 'pyi'}, 'pyx': {'text', 'cython'}, 'pyz': {'binary', 'pyz'}, 'pyzw': {'binary', 'pyz'}, - 'pxd': {'text', 'cython'}, - 'pxi': {'text', 'cython'}, 'r': {'text', 'r'}, 'rb': {'text', 'ruby'}, 'rs': {'text', 'rust'}, 'rst': {'text', 'rst'}, 's': {'text', 'asm'}, + 'sass': {'text', 'sass'}, 'sbt': {'text', 'sbt', 'scala'}, 'sc': {'text', 'scala'}, 'scala': {'text', 'scala'}, - 'scss': {'text', 'scss'}, 'scm': {'text', 'scheme'}, + 'scss': {'text', 'scss'}, 'sh': {'text', 'shell'}, 'sls': {'text', 'salt'}, 'so': {'binary'}, 'sol': {'text', 'solidity'}, 'spec': {'text', 'spec'}, + 'sql': {'text', 'sql'}, 'ss': {'text', 'scheme'}, 'styl': {'text', 'stylus'}, - 'sql': {'text', 'sql'}, 'sv': {'text', 'system-verilog'}, 'svg': {'text', 'image', 'svg', 'xml'}, 'svh': {'text', 'system-verilog'}, @@ -163,15 +166,17 @@ EXTENSIONS = { 'swiftdeps': {'text', 'swiftdeps'}, 'tac': {'text', 'twisted', 'python'}, 'tar': {'binary', 'tar'}, + 'tex': {'text', 'tex'}, + 'tf': {'text', 'terraform'}, + 'tfvars': {'text', 'terraform'}, 'tgz': {'binary', 'gzip'}, 'thrift': {'text', 'thrift'}, 'tiff': {'binary', 'image', 'tiff'}, 'toml': {'text', 'toml'}, - 'tf': {'text', 'terraform'}, - 'tfvars': {'text', 'terraform'}, 'ts': {'text', 'ts'}, 'tsx': {'text', 'tsx'}, 'ttf': {'binary', 'ttf'}, + 'txsprofile': {'text', 'ini', 'txsprofile'}, 'txt': {'text', 'plain-text'}, 'v': {'text', 'verilog'}, 'vdx': {'text', 'vdx'}, @@ -181,11 +186,12 @@ EXTENSIONS = { 'vue': {'text', 'vue'}, 'war': {'binary', 'zip', 'jar'}, 'wav': {'binary', 'audio', 'wav'}, - 'wkt': {'text', 'wkt'}, 'whl': {'binary', 'wheel', 'zip'}, + 'wkt': {'text', 'wkt'}, 'woff': {'binary', 'woff'}, 'woff2': {'binary', 'woff2'}, 'wsgi': {'text', 'wsgi', 'python'}, + 'xhtml': {'text', 'xml', 'html', 'xhtml'}, 'xml': {'text', 'xml'}, 'xq': {'text', 'xquery'}, 'xql': {'text', 'xquery'}, @@ -209,30 +215,32 @@ EXTENSIONS_NEED_BINARY_CHECK = { NAMES = { '.babelrc': EXTENSIONS['json'] | {'babelrc'}, - '.bashrc': EXTENSIONS['bash'], '.bash_aliases': EXTENSIONS['bash'], '.bash_profile': EXTENSIONS['bash'], + '.bashrc': EXTENSIONS['bash'], '.bowerrc': EXTENSIONS['json'] | {'bowerrc'}, '.coveragerc': EXTENSIONS['ini'] | {'coveragerc'}, '.cshrc': EXTENSIONS['csh'], '.dockerignore': {'text', 'dockerignore'}, '.editorconfig': {'text', 'editorconfig'}, - '.gitconfig': EXTENSIONS['ini'] | {'gitconfig'}, - '.hgrc': EXTENSIONS['ini'] | {'hgrc'}, + '.flake8': EXTENSIONS['ini'] | {'flake8'}, '.gitattributes': {'text', 'gitattributes'}, + '.gitconfig': EXTENSIONS['ini'] | {'gitconfig'}, '.gitignore': {'text', 'gitignore'}, '.gitmodules': {'text', 'gitmodules'}, + '.hgrc': EXTENSIONS['ini'] | {'hgrc'}, '.jshintrc': EXTENSIONS['json'] | {'jshintrc'}, '.mailmap': {'text', 'mailmap'}, '.mention-bot': EXTENSIONS['json'] | {'mention-bot'}, '.npmignore': {'text', 'npmignore'}, '.pdbrc': EXTENSIONS['py'] | {'pdbrc'}, '.pypirc': EXTENSIONS['ini'] | {'pypirc'}, + '.rstcheck.cfg': EXTENSIONS['ini'], '.yamllint': EXTENSIONS['yaml'] | {'yamllint'}, '.zshrc': EXTENSIONS['zsh'], 'AUTHORS': EXTENSIONS['txt'], - 'BUILD.bazel': {'text', 'bazel'}, 'BUILD': {'text', 'bazel'}, + 'BUILD.bazel': {'text', 'bazel'}, 'CMakeLists.txt': EXTENSIONS['cmake'], 'COPYING': EXTENSIONS['txt'], 'Dockerfile': {'text', 'dockerfile'}, diff --git a/identify/identify.py b/identify/identify.py index 8a21d8b..1c0e677 100644 --- a/identify/identify.py +++ b/identify/identify.py @@ -141,6 +141,27 @@ def _shebang_split(line): return line.split() +def _parse_nix_shebang(bytesio, cmd): + while bytesio.read(2) == b'#!': + next_line = bytesio.readline() + try: + next_line = next_line.decode('UTF-8') + except UnicodeDecodeError: + return cmd + + for c in next_line: + if c not in printable: + return cmd + + line_tokens = tuple(_shebang_split(next_line.strip())) + for i, token in enumerate(line_tokens[:-1]): + if token != '-i': + continue + # the argument to -i flag + cmd = (line_tokens[i + 1],) + return cmd + + def parse_shebang(bytesio): """Parse the shebang from a file opened for reading binary.""" if bytesio.read(2) != b'#!': @@ -159,6 +180,8 @@ def parse_shebang(bytesio): cmd = tuple(_shebang_split(first_line.strip())) if cmd and cmd[0] == '/usr/bin/env': cmd = cmd[1:] + if cmd == ('nix-shell',): + return _parse_nix_shebang(bytesio, cmd) return cmd diff --git a/identify/interpreters.py b/identify/interpreters.py index bcffb5a..7feb4b1 100644 --- a/identify/interpreters.py +++ b/identify/interpreters.py @@ -3,9 +3,14 @@ from __future__ import absolute_import from __future__ import unicode_literals INTERPRETERS = { + 'ash': {'shell', 'ash'}, + 'awk': {'awk'}, 'bash': {'shell', 'bash'}, + 'bats': {'shell', 'bash', 'bats'}, 'csh': {'shell', 'csh'}, 'dash': {'shell', 'dash'}, + 'expect': {'expect'}, + 'ksh': {'shell', 'ksh'}, 'node': {'javascript'}, 'nodejs': {'javascript'}, 'perl': {'perl'}, diff --git a/setup.cfg b/setup.cfg index ea0b137..c742f5a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.4.29 +version = 1.5.5 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown diff --git a/tests/identify_test.py b/tests/identify_test.py index 44406a1..a80c401 100644 --- a/tests/identify_test.py +++ b/tests/identify_test.py @@ -217,6 +217,66 @@ def test_file_is_text_does_not_exist(tmpdir): (b"#!/path'with/quotes y", ("/path'with/quotes", 'y')), # Don't regress on leading/trailing ws (b"#! /path'with/quotes y ", ("/path'with/quotes", 'y')), + # Test nix-shell specialites with shebang on second line + ( + b'#! /usr/bin/env nix-shell\n' + b'#! nix-shell -i bash -p python', + ('bash',), + ), + ( + b'#! /usr/bin/env nix-shell\n' + b'#! nix-shell -i python -p coreutils', + ('python',), + ), + ( + b'#! /usr/bin/env nix-shell\n' + b'#! nix-shell -p coreutils -i python', + ('python',), + ), + # multi-line and no whitespace variation + ( + b'#! /usr/bin/env nix-shell\n' + b'#! nix-shell -p coreutils\n' + b'#! nix-shell -i python', + ('python',), + ), + ( + b'#! /usr/bin/env nix-shell\n' + b'#!nix-shell -p coreutils\n' + b'#!nix-shell -i python', + ('python',), + ), + ( + b'#! /usr/bin/env nix-shell\n' + b'#!\xf9\x93\x01\x42\xcd', + ('nix-shell',), + ), + ( + b'#! /usr/bin/env nix-shell\n' + b'#!\x00\x00\x00\x00', + ('nix-shell',), + ), + # non-proper nix-shell + (b'#! /usr/bin/nix-shell', ('/usr/bin/nix-shell',)), + (b'#! /usr/bin/env nix-shell', ('nix-shell',)), + ( + b'#! /usr/bin/env nix-shell non-portable-argument', + ('nix-shell', 'non-portable-argument'), + ), + ( + b'#! /usr/bin/env nix-shell\n' + b'#! nix-shell -i', + ('nix-shell',), # guard against index error + ), + # interpret quotes correctly + ( + b'#!/usr/bin/env nix-shell\n' + b'#!nix-shell --argstr x "a -i python3 p"\n' + b'#!nix-shell -p hello\n' + b'#!nix-shell -i bash\n' + b'#!nix-shell --argstr y "b -i runhaskell q"', + ('bash',), + ), (b'\xf9\x93\x01\x42\xcd', ()), (b'#!\xf9\x93\x01\x42\xcd', ()), (b'#!\x00\x00\x00\x00', ()), -- cgit v1.2.3