diff options
Diffstat (limited to '')
-rw-r--r-- | python/magic.py | 361 |
1 files changed, 361 insertions, 0 deletions
diff --git a/python/magic.py b/python/magic.py new file mode 100644 index 0000000..4300ee0 --- /dev/null +++ b/python/magic.py @@ -0,0 +1,361 @@ +# coding: utf-8 + +''' +Python bindings for libmagic +''' + +import ctypes +import threading + +from collections import namedtuple + +from ctypes import * +from ctypes.util import find_library + + +def _init(): + """ + Loads the shared library through ctypes and returns a library + L{ctypes.CDLL} instance + """ + return ctypes.cdll.LoadLibrary(find_library('magic')) + +_libraries = {} +_libraries['magic'] = _init() + +# Flag constants for open and setflags +MAGIC_NONE = NONE = 0 +MAGIC_DEBUG = DEBUG = 1 +MAGIC_SYMLINK = SYMLINK = 2 +MAGIC_COMPRESS = COMPRESS = 4 +MAGIC_DEVICES = DEVICES = 8 +MAGIC_MIME_TYPE = MIME_TYPE = 16 +MAGIC_CONTINUE = CONTINUE = 32 +MAGIC_CHECK = CHECK = 64 +MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128 +MAGIC_RAW = RAW = 256 +MAGIC_ERROR = ERROR = 512 +MAGIC_MIME_ENCODING = MIME_ENCODING = 1024 +MAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING +MAGIC_APPLE = APPLE = 2048 + +MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096 +MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192 +MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384 +MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768 +MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536 +MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072 +MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144 +MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576 +MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152 + +MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824 + +MAGIC_PARAM_INDIR_MAX = PARAM_INDIR_MAX = 0 +MAGIC_PARAM_NAME_MAX = PARAM_NAME_MAX = 1 +MAGIC_PARAM_ELF_PHNUM_MAX = PARAM_ELF_PHNUM_MAX = 2 +MAGIC_PARAM_ELF_SHNUM_MAX = PARAM_ELF_SHNUM_MAX = 3 +MAGIC_PARAM_ELF_NOTES_MAX = PARAM_ELF_NOTES_MAX = 4 +MAGIC_PARAM_REGEX_MAX = PARAM_REGEX_MAX = 5 +MAGIC_PARAM_BYTES_MAX = PARAM_BYTES_MAX = 6 + +FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name')) + + +class magic_set(Structure): + pass +magic_set._fields_ = [] +magic_t = POINTER(magic_set) + +_open = _libraries['magic'].magic_open +_open.restype = magic_t +_open.argtypes = [c_int] + +_close = _libraries['magic'].magic_close +_close.restype = None +_close.argtypes = [magic_t] + +_file = _libraries['magic'].magic_file +_file.restype = c_char_p +_file.argtypes = [magic_t, c_char_p] + +_descriptor = _libraries['magic'].magic_descriptor +_descriptor.restype = c_char_p +_descriptor.argtypes = [magic_t, c_int] + +_buffer = _libraries['magic'].magic_buffer +_buffer.restype = c_char_p +_buffer.argtypes = [magic_t, c_void_p, c_size_t] + +_error = _libraries['magic'].magic_error +_error.restype = c_char_p +_error.argtypes = [magic_t] + +_setflags = _libraries['magic'].magic_setflags +_setflags.restype = c_int +_setflags.argtypes = [magic_t, c_int] + +_load = _libraries['magic'].magic_load +_load.restype = c_int +_load.argtypes = [magic_t, c_char_p] + +_compile = _libraries['magic'].magic_compile +_compile.restype = c_int +_compile.argtypes = [magic_t, c_char_p] + +_check = _libraries['magic'].magic_check +_check.restype = c_int +_check.argtypes = [magic_t, c_char_p] + +_list = _libraries['magic'].magic_list +_list.restype = c_int +_list.argtypes = [magic_t, c_char_p] + +_errno = _libraries['magic'].magic_errno +_errno.restype = c_int +_errno.argtypes = [magic_t] + +_getparam = _libraries['magic'].magic_getparam +_getparam.restype = c_int +_getparam.argtypes = [magic_t, c_int, c_void_p] + +_setparam = _libraries['magic'].magic_setparam +_setparam.restype = c_int +_setparam.argtypes = [magic_t, c_int, c_void_p] + + +class Magic(object): + def __init__(self, ms): + self._magic_t = ms + + def close(self): + """ + Closes the magic database and deallocates any resources used. + """ + _close(self._magic_t) + + @staticmethod + def __tostr(s): + if s is None: + return None + if isinstance(s, str): + return s + try: # keep Python 2 compatibility + return str(s, 'utf-8') + except TypeError: + return str(s) + + @staticmethod + def __tobytes(b): + if b is None: + return None + if isinstance(b, bytes): + return b + try: # keep Python 2 compatibility + return bytes(b, 'utf-8') + except TypeError: + return bytes(b) + + def file(self, filename): + """ + Returns a textual description of the contents of the argument passed + as a filename or None if an error occurred and the MAGIC_ERROR flag + is set. A call to errno() will return the numeric error code. + """ + return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename))) + + def descriptor(self, fd): + """ + Returns a textual description of the contents of the argument passed + as a file descriptor or None if an error occurred and the MAGIC_ERROR + flag is set. A call to errno() will return the numeric error code. + """ + return Magic.__tostr(_descriptor(self._magic_t, fd)) + + def buffer(self, buf): + """ + Returns a textual description of the contents of the argument passed + as a buffer or None if an error occurred and the MAGIC_ERROR flag + is set. A call to errno() will return the numeric error code. + """ + return Magic.__tostr(_buffer(self._magic_t, buf, len(buf))) + + def error(self): + """ + Returns a textual explanation of the last error or None + if there was no error. + """ + return Magic.__tostr(_error(self._magic_t)) + + def setflags(self, flags): + """ + Set flags on the magic object which determine how magic checking + behaves; a bitwise OR of the flags described in libmagic(3), but + without the MAGIC_ prefix. + + Returns -1 on systems that don't support utime(2) or utimes(2) + when PRESERVE_ATIME is set. + """ + return _setflags(self._magic_t, flags) + + def load(self, filename=None): + """ + Must be called to load entries in the colon separated list of database + files passed as argument or the default database file if no argument + before any magic queries can be performed. + + Returns 0 on success and -1 on failure. + """ + return _load(self._magic_t, Magic.__tobytes(filename)) + + def compile(self, dbs): + """ + Compile entries in the colon separated list of database files + passed as argument or the default database file if no argument. + The compiled files created are named from the basename(1) of each file + argument with ".mgc" appended to it. + + Returns 0 on success and -1 on failure. + """ + return _compile(self._magic_t, Magic.__tobytes(dbs)) + + def check(self, dbs): + """ + Check the validity of entries in the colon separated list of + database files passed as argument or the default database file + if no argument. + + Returns 0 on success and -1 on failure. + """ + return _check(self._magic_t, Magic.__tobytes(dbs)) + + def list(self, dbs): + """ + Check the validity of entries in the colon separated list of + database files passed as argument or the default database file + if no argument. + + Returns 0 on success and -1 on failure. + """ + return _list(self._magic_t, Magic.__tobytes(dbs)) + + def errno(self): + """ + Returns a numeric error code. If return value is 0, an internal + magic error occurred. If return value is non-zero, the value is + an OS error code. Use the errno module or os.strerror() can be used + to provide detailed error information. + """ + return _errno(self._magic_t) + + def getparam(self, param): + """ + Returns the param value if successful and -1 if the parameter + was unknown. + """ + v = c_int() + i = _getparam(self._magic_t, param, byref(v)) + if i == -1: + return -1 + return v.value + + def setparam(self, param, value): + """ + Returns 0 if successful and -1 if the parameter was unknown. + """ + v = c_int(value) + return _setparam(self._magic_t, param, byref(v)) + + +def open(flags): + """ + Returns a magic object on success and None on failure. + Flags argument as for setflags. + """ + magic_t = _open(flags) + if magic_t is None: + return None + return Magic(magic_t) + + +# Objects used by `detect_from_` functions +class error(Exception): + pass + +class MagicDetect(object): + def __init__(self): + self.mime_magic = open(MAGIC_MIME) + if self.mime_magic is None: + raise error + if self.mime_magic.load() == -1: + self.mime_magic.close() + self.mime_magic = None + raise error + self.none_magic = open(MAGIC_NONE) + if self.none_magic is None: + self.mime_magic.close() + self.mime_magic = None + raise error + if self.none_magic.load() == -1: + self.none_magic.close() + self.none_magic = None + self.mime_magic.close() + self.mime_magic = None + raise error + + def __del__(self): + if self.mime_magic is not None: + self.mime_magic.close() + if self.none_magic is not None: + self.none_magic.close() + +threadlocal = threading.local() + +def _detect_make(): + v = getattr(threadlocal, "magic_instance", None) + if v is None: + v = MagicDetect() + setattr(threadlocal, "magic_instance", v) + return v + +def _create_filemagic(mime_detected, type_detected): + try: + mime_type, mime_encoding = mime_detected.split('; ') + except ValueError: + raise ValueError(mime_detected) + + return FileMagic(name=type_detected, mime_type=mime_type, + encoding=mime_encoding.replace('charset=', '')) + + +def detect_from_filename(filename): + '''Detect mime type, encoding and file type from a filename + + Returns a `FileMagic` namedtuple. + ''' + x = _detect_make() + return _create_filemagic(x.mime_magic.file(filename), + x.none_magic.file(filename)) + + +def detect_from_fobj(fobj): + '''Detect mime type, encoding and file type from file-like object + + Returns a `FileMagic` namedtuple. + ''' + + file_descriptor = fobj.fileno() + x = _detect_make() + return _create_filemagic(x.mime_magic.descriptor(file_descriptor), + x.none_magic.descriptor(file_descriptor)) + + +def detect_from_content(byte_content): + '''Detect mime type, encoding and file type from bytes + + Returns a `FileMagic` namedtuple. + ''' + + x = _detect_make() + return _create_filemagic(x.mime_magic.buffer(byte_content), + x.none_magic.buffer(byte_content)) |