summaryrefslogtreecommitdiffstats
path: root/python/magic.py
blob: 4300ee0719a9e790dde11aa499c7ac1abc3614d6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
# coding: utf-8

'''
Python bindings for libmagic
'''

import ctypes
import threading

from collections import namedtuple

from ctypes import *
from ctypes.util import find_library


def _init():
    """
    Loads the shared library through ctypes and returns a library
    L{ctypes.CDLL} instance
    """
    return ctypes.cdll.LoadLibrary(find_library('magic'))

_libraries = {}
_libraries['magic'] = _init()

# Flag constants for open and setflags
MAGIC_NONE = NONE = 0
MAGIC_DEBUG = DEBUG = 1
MAGIC_SYMLINK = SYMLINK = 2
MAGIC_COMPRESS = COMPRESS = 4
MAGIC_DEVICES = DEVICES = 8
MAGIC_MIME_TYPE = MIME_TYPE = 16
MAGIC_CONTINUE = CONTINUE = 32
MAGIC_CHECK = CHECK = 64
MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
MAGIC_RAW = RAW = 256
MAGIC_ERROR = ERROR = 512
MAGIC_MIME_ENCODING = MIME_ENCODING = 1024
MAGIC_MIME = MIME = 1040  # MIME_TYPE + MIME_ENCODING
MAGIC_APPLE = APPLE = 2048

MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152

MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824

MAGIC_PARAM_INDIR_MAX = PARAM_INDIR_MAX = 0
MAGIC_PARAM_NAME_MAX = PARAM_NAME_MAX = 1
MAGIC_PARAM_ELF_PHNUM_MAX = PARAM_ELF_PHNUM_MAX = 2
MAGIC_PARAM_ELF_SHNUM_MAX = PARAM_ELF_SHNUM_MAX = 3
MAGIC_PARAM_ELF_NOTES_MAX = PARAM_ELF_NOTES_MAX = 4
MAGIC_PARAM_REGEX_MAX = PARAM_REGEX_MAX = 5
MAGIC_PARAM_BYTES_MAX = PARAM_BYTES_MAX = 6

FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))


class magic_set(Structure):
    pass
magic_set._fields_ = []
magic_t = POINTER(magic_set)

_open = _libraries['magic'].magic_open
_open.restype = magic_t
_open.argtypes = [c_int]

_close = _libraries['magic'].magic_close
_close.restype = None
_close.argtypes = [magic_t]

_file = _libraries['magic'].magic_file
_file.restype = c_char_p
_file.argtypes = [magic_t, c_char_p]

_descriptor = _libraries['magic'].magic_descriptor
_descriptor.restype = c_char_p
_descriptor.argtypes = [magic_t, c_int]

_buffer = _libraries['magic'].magic_buffer
_buffer.restype = c_char_p
_buffer.argtypes = [magic_t, c_void_p, c_size_t]

_error = _libraries['magic'].magic_error
_error.restype = c_char_p
_error.argtypes = [magic_t]

_setflags = _libraries['magic'].magic_setflags
_setflags.restype = c_int
_setflags.argtypes = [magic_t, c_int]

_load = _libraries['magic'].magic_load
_load.restype = c_int
_load.argtypes = [magic_t, c_char_p]

_compile = _libraries['magic'].magic_compile
_compile.restype = c_int
_compile.argtypes = [magic_t, c_char_p]

_check = _libraries['magic'].magic_check
_check.restype = c_int
_check.argtypes = [magic_t, c_char_p]

_list = _libraries['magic'].magic_list
_list.restype = c_int
_list.argtypes = [magic_t, c_char_p]

_errno = _libraries['magic'].magic_errno
_errno.restype = c_int
_errno.argtypes = [magic_t]

_getparam = _libraries['magic'].magic_getparam
_getparam.restype = c_int
_getparam.argtypes = [magic_t, c_int, c_void_p]

_setparam = _libraries['magic'].magic_setparam
_setparam.restype = c_int
_setparam.argtypes = [magic_t, c_int, c_void_p]


class Magic(object):
    def __init__(self, ms):
        self._magic_t = ms

    def close(self):
        """
        Closes the magic database and deallocates any resources used.
        """
        _close(self._magic_t)

    @staticmethod
    def __tostr(s):
        if s is None:
            return None
        if isinstance(s, str):
            return s
        try:  # keep Python 2 compatibility
            return str(s, 'utf-8')
        except TypeError:
            return str(s)

    @staticmethod
    def __tobytes(b):
        if b is None:
            return None
        if isinstance(b, bytes):
            return b
        try:  # keep Python 2 compatibility
            return bytes(b, 'utf-8')
        except TypeError:
            return bytes(b)

    def file(self, filename):
        """
        Returns a textual description of the contents of the argument passed
        as a filename or None if an error occurred and the MAGIC_ERROR flag
        is set. A call to errno() will return the numeric error code.
        """
        return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))

    def descriptor(self, fd):
        """
        Returns a textual description of the contents of the argument passed
        as a file descriptor or None if an error occurred and the MAGIC_ERROR
        flag is set. A call to errno() will return the numeric error code.
        """
        return Magic.__tostr(_descriptor(self._magic_t, fd))

    def buffer(self, buf):
        """
        Returns a textual description of the contents of the argument passed
        as a buffer or None if an error occurred and the MAGIC_ERROR flag
        is set. A call to errno() will return the numeric error code.
        """
        return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))

    def error(self):
        """
        Returns a textual explanation of the last error or None
        if there was no error.
        """
        return Magic.__tostr(_error(self._magic_t))

    def setflags(self, flags):
        """
        Set flags on the magic object which determine how magic checking
        behaves; a bitwise OR of the flags described in libmagic(3), but
        without the MAGIC_ prefix.

        Returns -1 on systems that don't support utime(2) or utimes(2)
        when PRESERVE_ATIME is set.
        """
        return _setflags(self._magic_t, flags)

    def load(self, filename=None):
        """
        Must be called to load entries in the colon separated list of database
        files passed as argument or the default database file if no argument
        before any magic queries can be performed.

        Returns 0 on success and -1 on failure.
        """
        return _load(self._magic_t, Magic.__tobytes(filename))

    def compile(self, dbs):
        """
        Compile entries in the colon separated list of database files
        passed as argument or the default database file if no argument.
        The compiled files created are named from the basename(1) of each file
        argument with ".mgc" appended to it.

        Returns 0 on success and -1 on failure.
        """
        return _compile(self._magic_t, Magic.__tobytes(dbs))

    def check(self, dbs):
        """
        Check the validity of entries in the colon separated list of
        database files passed as argument or the default database file
        if no argument.

        Returns 0 on success and -1 on failure.
        """
        return _check(self._magic_t, Magic.__tobytes(dbs))

    def list(self, dbs):
        """
        Check the validity of entries in the colon separated list of
        database files passed as argument or the default database file
        if no argument.

        Returns 0 on success and -1 on failure.
        """
        return _list(self._magic_t, Magic.__tobytes(dbs))

    def errno(self):
        """
        Returns a numeric error code. If return value is 0, an internal
        magic error occurred. If return value is non-zero, the value is
        an OS error code. Use the errno module or os.strerror() can be used
        to provide detailed error information.
        """
        return _errno(self._magic_t)

    def getparam(self, param):
        """
        Returns the param value if successful and -1 if the parameter
        was unknown.
        """
        v = c_int()
        i = _getparam(self._magic_t, param, byref(v))
        if i == -1:
            return -1
        return v.value

    def setparam(self, param, value):
        """
        Returns 0 if successful and -1 if the parameter was unknown.
        """
        v = c_int(value)
        return _setparam(self._magic_t, param, byref(v))


def open(flags):
    """
    Returns a magic object on success and None on failure.
    Flags argument as for setflags.
    """
    magic_t = _open(flags)
    if magic_t is None:
        return None
    return Magic(magic_t)


# Objects used by `detect_from_` functions
class error(Exception):
    pass

class MagicDetect(object):
    def __init__(self):
        self.mime_magic = open(MAGIC_MIME)
        if self.mime_magic is None:
            raise error
        if self.mime_magic.load() == -1:
            self.mime_magic.close()
            self.mime_magic = None
            raise error
        self.none_magic = open(MAGIC_NONE)
        if self.none_magic is None:
            self.mime_magic.close()
            self.mime_magic = None
            raise error
        if self.none_magic.load() == -1:
            self.none_magic.close()
            self.none_magic = None
            self.mime_magic.close()
            self.mime_magic = None
            raise error

    def __del__(self):
        if self.mime_magic is not None:
            self.mime_magic.close()
        if self.none_magic is not None:
            self.none_magic.close()

threadlocal = threading.local()

def _detect_make():
    v = getattr(threadlocal, "magic_instance", None)
    if v is None:
        v = MagicDetect()
        setattr(threadlocal, "magic_instance", v)
    return v

def _create_filemagic(mime_detected, type_detected):
    try:
        mime_type, mime_encoding = mime_detected.split('; ')
    except ValueError:
        raise ValueError(mime_detected)

    return FileMagic(name=type_detected, mime_type=mime_type,
                     encoding=mime_encoding.replace('charset=', ''))


def detect_from_filename(filename):
    '''Detect mime type, encoding and file type from a filename

    Returns a `FileMagic` namedtuple.
    '''
    x = _detect_make()
    return _create_filemagic(x.mime_magic.file(filename),
                             x.none_magic.file(filename))


def detect_from_fobj(fobj):
    '''Detect mime type, encoding and file type from file-like object

    Returns a `FileMagic` namedtuple.
    '''

    file_descriptor = fobj.fileno()
    x = _detect_make()
    return _create_filemagic(x.mime_magic.descriptor(file_descriptor),
                             x.none_magic.descriptor(file_descriptor))


def detect_from_content(byte_content):
    '''Detect mime type, encoding and file type from bytes

    Returns a `FileMagic` namedtuple.
    '''

    x = _detect_make()
    return _create_filemagic(x.mime_magic.buffer(byte_content),
                             x.none_magic.buffer(byte_content))