Merging upstream version 2.1.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2021-03-04 15:04:39 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2021-03-04 15:04:39 +0000
commit: 9143c0beda96a82579dc6ca87cba5ba74b907dea (patch)
tree: 79e9ef1de7f0919c55bcaad00f0d3b110b1ce371 /identify
parent: Releasing debian version 1.5.14-1. (diff)
download: identify-9143c0beda96a82579dc6ca87cba5ba74b907dea.tar.xz
identify-9143c0beda96a82579dc6ca87cba5ba74b907dea.zip
5 files changed, 55 insertions, 55 deletions
diff --git a/identify/cli.py b/identify/cli.py
index 511caf6..28e6155 100644
--- a/identify/cli.py
+++ b/identify/cli.py
@@ -1,14 +1,12 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
 import argparse
 import json
+from typing import Optional
+from typing import Sequence
 
 from identify import identify
 
 
-def main(argv=None):
+def main(argv: Optional[Sequence[str]] = None) -> int:
     parser = argparse.ArgumentParser()
     parser.add_argument('--filename-only', action='store_true')
     parser.add_argument('path')
diff --git a/identify/extensions.py b/identify/extensions.py
index 62a7f5b..778b695 100644
--- a/identify/extensions.py
+++ b/identify/extensions.py
@@ -1,13 +1,9 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
-
 EXTENSIONS = {
     'adoc': {'text', 'asciidoc'},
     'asciidoc': {'text', 'asciidoc'},
     'apinotes': {'text', 'apinotes'},
     'asar': {'binary', 'asar'},
+    'avif': {'binary', 'image', 'avif'},
     'bash': {'text', 'shell', 'bash'},
     'bat': {'text', 'batch'},
     'bib': {'text', 'bib'},
diff --git a/identify/identify.py b/identify/identify.py
index 1c0e677..51c1288 100644
--- a/identify/identify.py
+++ b/identify/identify.py
@@ -1,14 +1,14 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import unicode_literals
-
-import io
 import os.path
 import re
 import shlex
+import stat
 import string
 import sys
+from typing import IO
+from typing import List
+from typing import Optional
+from typing import Set
+from typing import Tuple
 
 from identify import extensions
 from identify import interpreters
@@ -19,27 +19,37 @@ printable = frozenset(string.printable)
 
 DIRECTORY = 'directory'
 SYMLINK = 'symlink'
+SOCKET = 'socket'
 FILE = 'file'
 EXECUTABLE = 'executable'
 NON_EXECUTABLE = 'non-executable'
 TEXT = 'text'
 BINARY = 'binary'
 
-ALL_TAGS = {DIRECTORY, SYMLINK, FILE, EXECUTABLE, NON_EXECUTABLE, TEXT, BINARY}
-ALL_TAGS.update(*extensions.EXTENSIONS.values())
-ALL_TAGS.update(*extensions.EXTENSIONS_NEED_BINARY_CHECK.values())
-ALL_TAGS.update(*extensions.NAMES.values())
-ALL_TAGS.update(*interpreters.INTERPRETERS.values())
-ALL_TAGS = frozenset(ALL_TAGS)
+TYPE_TAGS = frozenset((DIRECTORY, FILE, SYMLINK, SOCKET))
+MODE_TAGS = frozenset((EXECUTABLE, NON_EXECUTABLE))
+ENCODING_TAGS = frozenset((BINARY, TEXT))
+_ALL_TAGS = {*TYPE_TAGS, *MODE_TAGS, *ENCODING_TAGS}
+_ALL_TAGS.update(*extensions.EXTENSIONS.values())
+_ALL_TAGS.update(*extensions.EXTENSIONS_NEED_BINARY_CHECK.values())
+_ALL_TAGS.update(*extensions.NAMES.values())
+_ALL_TAGS.update(*interpreters.INTERPRETERS.values())
+ALL_TAGS = frozenset(_ALL_TAGS)
 
 
-def tags_from_path(path):
-    if not os.path.lexists(path):
-        raise ValueError('{} does not exist.'.format(path))
-    if os.path.isdir(path):
+def tags_from_path(path: str) -> Set[str]:
+    try:
+        sr = os.lstat(path)
+    except (OSError, ValueError):  # same error-handling as `os.lexists()`
+        raise ValueError(f'{path} does not exist.')
+
+    mode = sr.st_mode
+    if stat.S_ISDIR(mode):
         return {DIRECTORY}
-    if os.path.islink(path):
+    if stat.S_ISLNK(mode):
         return {SYMLINK}
+    if stat.S_ISSOCK(mode):
+        return {SOCKET}
 
     tags = {FILE}
 
@@ -62,19 +72,19 @@ def tags_from_path(path):
 
     # some extensions can be both binary and text
     # see EXTENSIONS_NEED_BINARY_CHECK
-    if not {TEXT, BINARY} & tags:
+    if not ENCODING_TAGS & tags:
         if file_is_text(path):
             tags.add(TEXT)
         else:
             tags.add(BINARY)
 
-    assert {TEXT, BINARY} & tags, tags
-    assert {EXECUTABLE, NON_EXECUTABLE} & tags, tags
+    assert ENCODING_TAGS & tags, tags
+    assert MODE_TAGS & tags, tags
     return tags
 
 
-def tags_from_filename(filename):
-    _, filename = os.path.split(filename)
+def tags_from_filename(path: str) -> Set[str]:
+    _, filename = os.path.split(path)
     _, ext = os.path.splitext(filename)
 
     ret = set()
@@ -95,7 +105,7 @@ def tags_from_filename(filename):
     return ret
 
 
-def tags_from_interpreter(interpreter):
+def tags_from_interpreter(interpreter: str) -> Set[str]:
     _, _, interpreter = interpreter.rpartition('/')
 
     # Try "python3.5.2" => "python3.5" => "python3" until one matches.
@@ -108,7 +118,7 @@ def tags_from_interpreter(interpreter):
     return set()
 
 
-def is_text(bytesio):
+def is_text(bytesio: IO[bytes]) -> bool:
     """Return whether the first KB of contents seems to be binary.
 
     This is roughly based on libmagic's binary/text detection:
@@ -122,14 +132,14 @@ def is_text(bytesio):
     return not bool(bytesio.read(1024).translate(None, text_chars))
 
 
-def file_is_text(path):
+def file_is_text(path: str) -> bool:
     if not os.path.lexists(path):
-        raise ValueError('{} does not exist.'.format(path))
+        raise ValueError(f'{path} does not exist.')
     with open(path, 'rb') as f:
         return is_text(f)
 
 
-def _shebang_split(line):
+def _shebang_split(line: str) -> List[str]:
     try:
         # shebangs aren't supposed to be quoted, though some tools such as
         # setuptools will write them with quotes so we'll best-guess parse
@@ -141,11 +151,14 @@ def _shebang_split(line):
         return line.split()
 
 
-def _parse_nix_shebang(bytesio, cmd):
+def _parse_nix_shebang(
+        bytesio: IO[bytes],
+        cmd: Tuple[str, ...],
+) -> Tuple[str, ...]:
     while bytesio.read(2) == b'#!':
-        next_line = bytesio.readline()
+        next_line_b = bytesio.readline()
         try:
-            next_line = next_line.decode('UTF-8')
+            next_line = next_line_b.decode('UTF-8')
         except UnicodeDecodeError:
             return cmd
 
@@ -162,13 +175,13 @@ def _parse_nix_shebang(bytesio, cmd):
     return cmd
 
 
-def parse_shebang(bytesio):
+def parse_shebang(bytesio: IO[bytes]) -> Tuple[str, ...]:
     """Parse the shebang from a file opened for reading binary."""
     if bytesio.read(2) != b'#!':
         return ()
-    first_line = bytesio.readline()
+    first_line_b = bytesio.readline()
     try:
-        first_line = first_line.decode('UTF-8')
+        first_line = first_line_b.decode('UTF-8')
     except UnicodeDecodeError:
         return ()
 
@@ -185,10 +198,10 @@ def parse_shebang(bytesio):
     return cmd
 
 
-def parse_shebang_from_file(path):
+def parse_shebang_from_file(path: str) -> Tuple[str, ...]:
     """Parse the shebang given a file path."""
     if not os.path.lexists(path):
-        raise ValueError('{} does not exist.'.format(path))
+        raise ValueError(f'{path} does not exist.')
     if not os.access(path, os.X_OK):
         return ()
 
@@ -200,13 +213,13 @@ COPYRIGHT_RE = re.compile(r'^\s*(Copyright|\(C\)) .*$', re.I | re.MULTILINE)
 WS_RE = re.compile(r'\s+')
 
 
-def _norm_license(s):
+def _norm_license(s: str) -> str:
     s = COPYRIGHT_RE.sub('', s)
     s = WS_RE.sub(' ', s)
     return s.strip()
 
 
-def license_id(filename):
+def license_id(filename: str) -> Optional[str]:
     """Return the spdx id for the license contained in `filename`.  If no
     license is detected, returns `None`.
 
@@ -222,7 +235,7 @@ def license_id(filename):
     """
     import editdistance  # `pip install identify[license]`
 
-    with io.open(filename, encoding='UTF-8') as f:
+    with open(filename, encoding='UTF-8') as f:
         contents = f.read()
 
     norm = _norm_license(contents)
diff --git a/identify/interpreters.py b/identify/interpreters.py
index 7feb4b1..dabf36c 100644
--- a/identify/interpreters.py
+++ b/identify/interpreters.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
 INTERPRETERS = {
     'ash': {'shell', 'ash'},
     'awk': {'awk'},
diff --git a/identify/vendor/licenses.py b/identify/vendor/licenses.py
index 912b5c8..3478d0f 100644
--- a/identify/vendor/licenses.py
+++ b/identify/vendor/licenses.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from __future__ import unicode_literals
 LICENSES = (
     (
         '0BSD',
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2021-03-04 15:04:39 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2021-03-04 15:04:39 +0000
commit	9143c0beda96a82579dc6ca87cba5ba74b907dea (patch)
tree	79e9ef1de7f0919c55bcaad00f0d3b110b1ce371 /identify
parent	Releasing debian version 1.5.14-1. (diff)
download	identify-9143c0beda96a82579dc6ca87cba5ba74b907dea.tar.xz identify-9143c0beda96a82579dc6ca87cba5ba74b907dea.zip