diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/__init__.py | 0 | ||||
-rw-r--r-- | tests/cli_test.py | 33 | ||||
-rw-r--r-- | tests/extensions_test.py | 26 | ||||
-rw-r--r-- | tests/identify_test.py | 281 |
4 files changed, 340 insertions, 0 deletions
diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/__init__.py diff --git a/tests/cli_test.py b/tests/cli_test.py new file mode 100644 index 0000000..9369a5e --- /dev/null +++ b/tests/cli_test.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from __future__ import unicode_literals + +from identify import cli + + +def test_identify_cli(capsys): + ret = cli.main(('setup.py',)) + out, _ = capsys.readouterr() + assert ret == 0 + assert out == '["file", "non-executable", "python", "text"]\n' + + +def test_identify_cli_filename_only(capsys): + ret = cli.main(('setup.py', '--filename-only')) + out, _ = capsys.readouterr() + assert ret == 0 + assert out == '["python", "text"]\n' + + +def test_identify_cli_filename_only_unidentified(capsys): + ret = cli.main(('x.unknown', '--filename-only')) + out, _ = capsys.readouterr() + assert ret == 1 + assert out == '' + + +def test_file_not_found(capsys): + ret = cli.main(('x.unknown',)) + out, _ = capsys.readouterr() + assert ret == 1 + assert out == 'x.unknown does not exist.\n' diff --git a/tests/extensions_test.py b/tests/extensions_test.py new file mode 100644 index 0000000..44f16a8 --- /dev/null +++ b/tests/extensions_test.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from __future__ import unicode_literals + +import pytest + +from identify import extensions + + +@pytest.mark.parametrize('extension', extensions.EXTENSIONS) +def test_extensions_have_binary_or_text(extension): + tags = extensions.EXTENSIONS[extension] + assert len({'text', 'binary'} & tags) == 1, tags + + +@pytest.mark.parametrize('extension', extensions.EXTENSIONS_NEED_BINARY_CHECK) +def test_need_binary_check_do_not_specify_text_binary(extension): + tags = extensions.EXTENSIONS_NEED_BINARY_CHECK[extension] + assert len({'text', 'binary'} & tags) == 0, tags + + +def test_mutually_exclusive_check_types(): + assert not ( + set(extensions.EXTENSIONS) & + set(extensions.EXTENSIONS_NEED_BINARY_CHECK) + ) diff --git a/tests/identify_test.py b/tests/identify_test.py new file mode 100644 index 0000000..44406a1 --- /dev/null +++ b/tests/identify_test.py @@ -0,0 +1,281 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from __future__ import unicode_literals + +import io +import os +import stat + +import pytest + +from identify import identify + + +def test_all_tags_includes_basic_ones(): + assert 'file' in identify.ALL_TAGS + assert 'directory' in identify.ALL_TAGS + + +def test_all_tags_contains_each_type(): + assert 'xml' in identify.ALL_TAGS # extension + assert 'plist' in identify.ALL_TAGS # extension, needs binary check + assert 'dockerfile' in identify.ALL_TAGS # by file convention + assert 'python3' in identify.ALL_TAGS # by shebang + + +def test_tags_from_path_does_not_exist(tmpdir): + x = tmpdir.join('foo') + with pytest.raises(ValueError): + identify.tags_from_path(x.strpath) + + +def test_tags_from_path_directory(tmpdir): + x = tmpdir.join('foo') + x.mkdir() + assert identify.tags_from_path(x.strpath) == {'directory'} + + +def test_tags_from_path_symlink(tmpdir): + x = tmpdir.join('foo') + x.mksymlinkto(tmpdir.join('lol').ensure()) + assert identify.tags_from_path(x.strpath) == {'symlink'} + + +def test_tags_from_path_broken_symlink(tmpdir): + x = tmpdir.join('foo') + x.mksymlinkto(tmpdir.join('lol')) + assert identify.tags_from_path(x.strpath) == {'symlink'} + + +def test_tags_from_path_simple_file(tmpdir): + x = tmpdir.join('test.py').ensure() + assert identify.tags_from_path(x.strpath) == { + 'file', 'text', 'non-executable', 'python', + } + + +def test_tags_from_path_file_with_incomplete_shebang(tmpdir): + x = tmpdir.join('test') + x.write_text('#! \n', encoding='UTF-8') + make_executable(x.strpath) + assert identify.tags_from_path(x.strpath) == { + 'file', 'text', 'executable', + } + + +def test_tags_from_path_file_with_shebang_non_executable(tmpdir): + x = tmpdir.join('test') + x.write_text('#!/usr/bin/env python\nimport sys\n', encoding='UTF-8') + assert identify.tags_from_path(x.strpath) == { + 'file', 'text', 'non-executable', + } + + +def test_tags_from_path_file_with_shebang_executable(tmpdir): + x = tmpdir.join('test') + x.write_text('#!/usr/bin/env python\nimport sys\n', encoding='UTF-8') + make_executable(x.strpath) + assert identify.tags_from_path(x.strpath) == { + 'file', 'text', 'executable', 'python', + } + + +def test_tags_from_path_binary(tmpdir): + x = tmpdir.join('test') + x.write(b'\x7f\x45\x4c\x46\x02\x01\x01') + make_executable(x.strpath) + assert identify.tags_from_path(x.strpath) == { + 'file', 'binary', 'executable', + } + + +def test_tags_from_path_plist_binary(tmpdir): + x = tmpdir.join('t.plist') + x.write_binary( + b'bplist00\xd1\x01\x02_\x10\x0fLast Login NameWDefault\x08\x0b\x1d\x00' + b'\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00%', + ) + assert identify.tags_from_path(x.strpath) == { + 'file', 'plist', 'binary', 'non-executable', + } + + +def test_tags_from_path_plist_text(tmpdir): + x = tmpdir.join('t.plist') + x.write( + '<?xml version="1.0" encoding="UTF-8"?>\n' + '<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">\n' + '<plist version="1.0">\n' + '<dict>\n' + '\t<key>Last Login Name</key>\n' + '\t<string>Default</string>\n' + '</dict>\n' + '</plist>\n', + ) + assert identify.tags_from_path(x.strpath) == { + 'file', 'plist', 'text', 'non-executable', + } + + +@pytest.mark.parametrize( + ('filename', 'expected'), + ( + ('test.py', {'text', 'python'}), + ('test.mk', {'text', 'makefile'}), + ('Makefile', {'text', 'makefile'}), + ('Dockerfile', {'text', 'dockerfile'}), + ('Dockerfile.xenial', {'text', 'dockerfile'}), + ('xenial.Dockerfile', {'text', 'dockerfile'}), + ('Pipfile', {'text', 'toml'}), + ('Pipfile.lock', {'text', 'json'}), + ('mod/test.py', {'text', 'python'}), + ('mod/Dockerfile', {'text', 'dockerfile'}), + + # does not set binary / text + ('f.plist', {'plist'}), + + # case of extension should be ignored + ('f.JPG', {'binary', 'image', 'jpeg'}), + # but case of name checks should still be honored + ('dockerfile.py', {'text', 'python'}), + + # full filename tests should take precedence over extension tests + ('test.cfg', {'text'}), + ('setup.cfg', {'text', 'ini'}), + + # Filename matches should still include extensions if applicable + ('README.md', {'text', 'markdown', 'plain-text'}), + + ('test.weird-unrecognized-extension', set()), + ('test', set()), + ('', set()), + ), +) +def test_tags_from_filename(filename, expected): + assert identify.tags_from_filename(filename) == expected + + +@pytest.mark.parametrize( + ('interpreter', 'expected'), + ( + ('python', {'python'}), + ('python3', {'python3', 'python'}), + ('python3.5.2', {'python3', 'python'}), + ('/usr/bin/python3.5.2', {'python3', 'python'}), + ('/usr/bin/herpderpderpderpderp', set()), + ('something-random', set()), + ('', set()), + ), +) +def test_tags_from_interpreter(interpreter, expected): + assert identify.tags_from_interpreter(interpreter) == expected + + +@pytest.mark.parametrize( + ('data', 'expected'), + ( + (b'hello world', True), + (b'', True), + ('éóñəå ⊂(◉‿◉)つ(ノ≥∇≤)ノ'.encode('utf8'), True), + (r'¯\_(ツ)_/¯'.encode('utf8'), True), + ('♪┏(・o・)┛♪┗ ( ・o・) ┓♪┏ ( ) ┛♪┗ (・o・ ) ┓♪┏(・o・)┛♪'.encode('utf8'), True), + ('éóñå'.encode('latin1'), True), + + (b'hello world\x00', False), + (b'\x7f\x45\x4c\x46\x02\x01\x01', False), # first few bytes of /bin/bash + (b'\x43\x92\xd9\x0f\xaf\x32\x2c', False), # some /dev/urandom output + ), +) +def test_is_text(data, expected): + assert identify.is_text(io.BytesIO(data)) is expected + + +def test_file_is_text_simple(tmpdir): + x = tmpdir.join('f') + x.write_text('hello there\n', encoding='UTF-8') + assert identify.file_is_text(x.strpath) is True + + +def test_file_is_text_does_not_exist(tmpdir): + x = tmpdir.join('f') + with pytest.raises(ValueError): + identify.file_is_text(x.strpath) + + +@pytest.mark.parametrize( + ('s', 'expected'), + ( + (b'', ()), + (b'#!/usr/bin/python', ('/usr/bin/python',)), + (b'#!/usr/bin/env python', ('python',)), + (b'#! /usr/bin/python', ('/usr/bin/python',)), + (b'#!/usr/bin/foo python', ('/usr/bin/foo', 'python')), + # despite this being invalid, setuptools will write shebangs like this + (b'#!"/path/with spaces/x" y', ('/path/with spaces/x', 'y')), + # this is apparently completely ok to embed quotes + (b"#!/path'with/quotes y", ("/path'with/quotes", 'y')), + # Don't regress on leading/trailing ws + (b"#! /path'with/quotes y ", ("/path'with/quotes", 'y')), + (b'\xf9\x93\x01\x42\xcd', ()), + (b'#!\xf9\x93\x01\x42\xcd', ()), + (b'#!\x00\x00\x00\x00', ()), + ), +) +def test_parse_shebang(s, expected): + assert identify.parse_shebang(io.BytesIO(s)) == expected + + +def test_parse_shebang_from_file_does_not_exist(): + with pytest.raises(ValueError): + identify.parse_shebang_from_file('herp derp derp') + + +def test_parse_shebang_from_file_nonexecutable(tmpdir): + x = tmpdir.join('f') + x.write_text('#!/usr/bin/env python', encoding='UTF-8') + assert identify.parse_shebang_from_file(x.strpath) == () + + +def test_parse_shebang_from_file_simple(tmpdir): + x = tmpdir.join('f') + x.write_text('#!/usr/bin/env python', encoding='UTF-8') + make_executable(x.strpath) + assert identify.parse_shebang_from_file(x.strpath) == ('python',) + + +def make_executable(filename): + original_mode = os.stat(filename).st_mode + os.chmod( + filename, + original_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH, + ) + + +def test_license_identification(): + assert identify.license_id('LICENSE') == 'MIT' + + +def test_license_exact_identification(tmpdir): + wtfpl = '''\ +DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Copyright (C) 2004 Sam Hocevar <sam@hocevar.net> + + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. +''' + f = tmpdir.join('LICENSE') + f.write(wtfpl) + assert identify.license_id(f.strpath) == 'WTFPL' + + +def test_license_not_identified(): + assert identify.license_id(os.devnull) is None |