summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2020-10-04 14:50:35 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2020-10-04 14:50:35 +0000
commitccc606e17ceea4cdc9666131506c19b74a528d1a (patch)
treea00063fa71c7561768e1bae568e323b3030b2839
parentReleasing debian version 1.4.29-1. (diff)
downloadidentify-ccc606e17ceea4cdc9666131506c19b74a528d1a.tar.xz
identify-ccc606e17ceea4cdc9666131506c19b74a528d1a.zip
Merging upstream version 1.5.5.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r--README.md1
-rw-r--r--identify/extensions.py32
-rw-r--r--identify/identify.py23
-rw-r--r--identify/interpreters.py5
-rw-r--r--setup.cfg2
-rw-r--r--tests/identify_test.py60
6 files changed, 110 insertions, 13 deletions
diff --git a/README.md b/README.md
index 7a58a1f..17aabd2 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,7 @@ If you have an actual file on disk, you can get the most information possible
(a superset of all other methods):
```python
+>>> from identify import identify
>>> identify.tags_from_path('/path/to/file.py')
{'file', 'text', 'python', 'non-executable'}
>>> identify.tags_from_path('/path/to/file-with-shebang')
diff --git a/identify/extensions.py b/identify/extensions.py
index 4475a41..1110ccb 100644
--- a/identify/extensions.py
+++ b/identify/extensions.py
@@ -10,11 +10,11 @@ EXTENSIONS = {
'asar': {'binary', 'asar'},
'bash': {'text', 'shell', 'bash'},
'bat': {'text', 'batch'},
+ 'bib': {'text', 'bib'},
'bmp': {'binary', 'image', 'bitmap'},
'bz2': {'binary', 'bzip2'},
'c': {'text', 'c'},
'cc': {'text', 'c++'},
- 'cu': {'text', 'cuda'},
'cfg': {'text'},
'chs': {'text', 'c2hs'},
'clj': {'text', 'clojure'},
@@ -31,6 +31,7 @@ EXTENSIONS = {
'cson': {'text', 'cson'},
'css': {'text', 'css'},
'csv': {'text', 'csv'},
+ 'cu': {'text', 'cuda'},
'cxx': {'text', 'c++'},
'dart': {'text', 'dart'},
'def': {'text', 'def'},
@@ -89,6 +90,7 @@ EXTENSIONS = {
'key': {'text', 'pem'},
'kml': {'text', 'kml', 'xml'},
'kt': {'text', 'kotlin'},
+ 'lean': {'text', 'lean'},
'less': {'text', 'less'},
'lhs': {'text', 'literate-haskell'},
'libsonnet': {'text', 'jsonnet'},
@@ -130,31 +132,32 @@ EXTENSIONS = {
'proto': {'text', 'proto'},
'puml': {'text', 'plantuml'},
'purs': {'text', 'purescript'},
+ 'pxd': {'text', 'cython'},
+ 'pxi': {'text', 'cython'},
'py': {'text', 'python'},
'pyi': {'text', 'pyi'},
'pyx': {'text', 'cython'},
'pyz': {'binary', 'pyz'},
'pyzw': {'binary', 'pyz'},
- 'pxd': {'text', 'cython'},
- 'pxi': {'text', 'cython'},
'r': {'text', 'r'},
'rb': {'text', 'ruby'},
'rs': {'text', 'rust'},
'rst': {'text', 'rst'},
's': {'text', 'asm'},
+ 'sass': {'text', 'sass'},
'sbt': {'text', 'sbt', 'scala'},
'sc': {'text', 'scala'},
'scala': {'text', 'scala'},
- 'scss': {'text', 'scss'},
'scm': {'text', 'scheme'},
+ 'scss': {'text', 'scss'},
'sh': {'text', 'shell'},
'sls': {'text', 'salt'},
'so': {'binary'},
'sol': {'text', 'solidity'},
'spec': {'text', 'spec'},
+ 'sql': {'text', 'sql'},
'ss': {'text', 'scheme'},
'styl': {'text', 'stylus'},
- 'sql': {'text', 'sql'},
'sv': {'text', 'system-verilog'},
'svg': {'text', 'image', 'svg', 'xml'},
'svh': {'text', 'system-verilog'},
@@ -163,15 +166,17 @@ EXTENSIONS = {
'swiftdeps': {'text', 'swiftdeps'},
'tac': {'text', 'twisted', 'python'},
'tar': {'binary', 'tar'},
+ 'tex': {'text', 'tex'},
+ 'tf': {'text', 'terraform'},
+ 'tfvars': {'text', 'terraform'},
'tgz': {'binary', 'gzip'},
'thrift': {'text', 'thrift'},
'tiff': {'binary', 'image', 'tiff'},
'toml': {'text', 'toml'},
- 'tf': {'text', 'terraform'},
- 'tfvars': {'text', 'terraform'},
'ts': {'text', 'ts'},
'tsx': {'text', 'tsx'},
'ttf': {'binary', 'ttf'},
+ 'txsprofile': {'text', 'ini', 'txsprofile'},
'txt': {'text', 'plain-text'},
'v': {'text', 'verilog'},
'vdx': {'text', 'vdx'},
@@ -181,11 +186,12 @@ EXTENSIONS = {
'vue': {'text', 'vue'},
'war': {'binary', 'zip', 'jar'},
'wav': {'binary', 'audio', 'wav'},
- 'wkt': {'text', 'wkt'},
'whl': {'binary', 'wheel', 'zip'},
+ 'wkt': {'text', 'wkt'},
'woff': {'binary', 'woff'},
'woff2': {'binary', 'woff2'},
'wsgi': {'text', 'wsgi', 'python'},
+ 'xhtml': {'text', 'xml', 'html', 'xhtml'},
'xml': {'text', 'xml'},
'xq': {'text', 'xquery'},
'xql': {'text', 'xquery'},
@@ -209,30 +215,32 @@ EXTENSIONS_NEED_BINARY_CHECK = {
NAMES = {
'.babelrc': EXTENSIONS['json'] | {'babelrc'},
- '.bashrc': EXTENSIONS['bash'],
'.bash_aliases': EXTENSIONS['bash'],
'.bash_profile': EXTENSIONS['bash'],
+ '.bashrc': EXTENSIONS['bash'],
'.bowerrc': EXTENSIONS['json'] | {'bowerrc'},
'.coveragerc': EXTENSIONS['ini'] | {'coveragerc'},
'.cshrc': EXTENSIONS['csh'],
'.dockerignore': {'text', 'dockerignore'},
'.editorconfig': {'text', 'editorconfig'},
- '.gitconfig': EXTENSIONS['ini'] | {'gitconfig'},
- '.hgrc': EXTENSIONS['ini'] | {'hgrc'},
+ '.flake8': EXTENSIONS['ini'] | {'flake8'},
'.gitattributes': {'text', 'gitattributes'},
+ '.gitconfig': EXTENSIONS['ini'] | {'gitconfig'},
'.gitignore': {'text', 'gitignore'},
'.gitmodules': {'text', 'gitmodules'},
+ '.hgrc': EXTENSIONS['ini'] | {'hgrc'},
'.jshintrc': EXTENSIONS['json'] | {'jshintrc'},
'.mailmap': {'text', 'mailmap'},
'.mention-bot': EXTENSIONS['json'] | {'mention-bot'},
'.npmignore': {'text', 'npmignore'},
'.pdbrc': EXTENSIONS['py'] | {'pdbrc'},
'.pypirc': EXTENSIONS['ini'] | {'pypirc'},
+ '.rstcheck.cfg': EXTENSIONS['ini'],
'.yamllint': EXTENSIONS['yaml'] | {'yamllint'},
'.zshrc': EXTENSIONS['zsh'],
'AUTHORS': EXTENSIONS['txt'],
- 'BUILD.bazel': {'text', 'bazel'},
'BUILD': {'text', 'bazel'},
+ 'BUILD.bazel': {'text', 'bazel'},
'CMakeLists.txt': EXTENSIONS['cmake'],
'COPYING': EXTENSIONS['txt'],
'Dockerfile': {'text', 'dockerfile'},
diff --git a/identify/identify.py b/identify/identify.py
index 8a21d8b..1c0e677 100644
--- a/identify/identify.py
+++ b/identify/identify.py
@@ -141,6 +141,27 @@ def _shebang_split(line):
return line.split()
+def _parse_nix_shebang(bytesio, cmd):
+ while bytesio.read(2) == b'#!':
+ next_line = bytesio.readline()
+ try:
+ next_line = next_line.decode('UTF-8')
+ except UnicodeDecodeError:
+ return cmd
+
+ for c in next_line:
+ if c not in printable:
+ return cmd
+
+ line_tokens = tuple(_shebang_split(next_line.strip()))
+ for i, token in enumerate(line_tokens[:-1]):
+ if token != '-i':
+ continue
+ # the argument to -i flag
+ cmd = (line_tokens[i + 1],)
+ return cmd
+
+
def parse_shebang(bytesio):
"""Parse the shebang from a file opened for reading binary."""
if bytesio.read(2) != b'#!':
@@ -159,6 +180,8 @@ def parse_shebang(bytesio):
cmd = tuple(_shebang_split(first_line.strip()))
if cmd and cmd[0] == '/usr/bin/env':
cmd = cmd[1:]
+ if cmd == ('nix-shell',):
+ return _parse_nix_shebang(bytesio, cmd)
return cmd
diff --git a/identify/interpreters.py b/identify/interpreters.py
index bcffb5a..7feb4b1 100644
--- a/identify/interpreters.py
+++ b/identify/interpreters.py
@@ -3,9 +3,14 @@ from __future__ import absolute_import
from __future__ import unicode_literals
INTERPRETERS = {
+ 'ash': {'shell', 'ash'},
+ 'awk': {'awk'},
'bash': {'shell', 'bash'},
+ 'bats': {'shell', 'bash', 'bats'},
'csh': {'shell', 'csh'},
'dash': {'shell', 'dash'},
+ 'expect': {'expect'},
+ 'ksh': {'shell', 'ksh'},
'node': {'javascript'},
'nodejs': {'javascript'},
'perl': {'perl'},
diff --git a/setup.cfg b/setup.cfg
index ea0b137..c742f5a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
[metadata]
name = identify
-version = 1.4.29
+version = 1.5.5
description = File identification library for Python
long_description = file: README.md
long_description_content_type = text/markdown
diff --git a/tests/identify_test.py b/tests/identify_test.py
index 44406a1..a80c401 100644
--- a/tests/identify_test.py
+++ b/tests/identify_test.py
@@ -217,6 +217,66 @@ def test_file_is_text_does_not_exist(tmpdir):
(b"#!/path'with/quotes y", ("/path'with/quotes", 'y')),
# Don't regress on leading/trailing ws
(b"#! /path'with/quotes y ", ("/path'with/quotes", 'y')),
+ # Test nix-shell specialites with shebang on second line
+ (
+ b'#! /usr/bin/env nix-shell\n'
+ b'#! nix-shell -i bash -p python',
+ ('bash',),
+ ),
+ (
+ b'#! /usr/bin/env nix-shell\n'
+ b'#! nix-shell -i python -p coreutils',
+ ('python',),
+ ),
+ (
+ b'#! /usr/bin/env nix-shell\n'
+ b'#! nix-shell -p coreutils -i python',
+ ('python',),
+ ),
+ # multi-line and no whitespace variation
+ (
+ b'#! /usr/bin/env nix-shell\n'
+ b'#! nix-shell -p coreutils\n'
+ b'#! nix-shell -i python',
+ ('python',),
+ ),
+ (
+ b'#! /usr/bin/env nix-shell\n'
+ b'#!nix-shell -p coreutils\n'
+ b'#!nix-shell -i python',
+ ('python',),
+ ),
+ (
+ b'#! /usr/bin/env nix-shell\n'
+ b'#!\xf9\x93\x01\x42\xcd',
+ ('nix-shell',),
+ ),
+ (
+ b'#! /usr/bin/env nix-shell\n'
+ b'#!\x00\x00\x00\x00',
+ ('nix-shell',),
+ ),
+ # non-proper nix-shell
+ (b'#! /usr/bin/nix-shell', ('/usr/bin/nix-shell',)),
+ (b'#! /usr/bin/env nix-shell', ('nix-shell',)),
+ (
+ b'#! /usr/bin/env nix-shell non-portable-argument',
+ ('nix-shell', 'non-portable-argument'),
+ ),
+ (
+ b'#! /usr/bin/env nix-shell\n'
+ b'#! nix-shell -i',
+ ('nix-shell',), # guard against index error
+ ),
+ # interpret quotes correctly
+ (
+ b'#!/usr/bin/env nix-shell\n'
+ b'#!nix-shell --argstr x "a -i python3 p"\n'
+ b'#!nix-shell -p hello\n'
+ b'#!nix-shell -i bash\n'
+ b'#!nix-shell --argstr y "b -i runhaskell q"',
+ ('bash',),
+ ),
(b'\xf9\x93\x01\x42\xcd', ()),
(b'#!\xf9\x93\x01\x42\xcd', ()),
(b'#!\x00\x00\x00\x00', ()),