summaryrefslogtreecommitdiffstats
path: root/python/tarfile.cc
diff options
context:
space:
mode:
Diffstat (limited to 'python/tarfile.cc')
-rw-r--r--python/tarfile.cc521
1 files changed, 521 insertions, 0 deletions
diff --git a/python/tarfile.cc b/python/tarfile.cc
new file mode 100644
index 0000000..b87fa71
--- /dev/null
+++ b/python/tarfile.cc
@@ -0,0 +1,521 @@
+/*
+ * arfile.cc - Wrapper around ExtractTar which behaves like Python's tarfile.
+ *
+ * Copyright 2009 Julian Andres Klode <jak@debian.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include "generic.h"
+#include "apt_instmodule.h"
+#include <apt-pkg/extracttar.h>
+#include <apt-pkg/error.h>
+#include <apt-pkg/dirstream.h>
+
+/**
+ * A subclass of pkgDirStream which calls a Python callback.
+ *
+ * This calls a Python callback in FinishedFile() with the Item as the first
+ * argument and the data as the second argument.
+ *
+ * It can also work without a callback, in which case it just sets the
+ * 'py_member' and 'py_data' members. This can be combined with setting
+ * 'member' to extract a single member into the memory.
+ */
+class PyDirStream : public pkgDirStream
+{
+
+public:
+ PyObject *callback;
+ PyObject *py_data;
+ // The requested member or NULL.
+ const char *member;
+ // Set to true if an error occurred in the Python callback, or a file
+ // was too large to read in extractdata.
+ bool error;
+ // Place where the copy of the data is stored.
+ char *copy;
+ // The size of the copy
+ size_t copy_size;
+
+ virtual bool DoItem(Item &Itm,int &Fd);
+ virtual bool FinishedFile(Item &Itm,int Fd);
+#if (APT_PKG_MAJOR >= 5)
+ virtual bool Process(Item &Itm,const unsigned char *Data,
+ unsigned long long Size,unsigned long long Pos);
+#else
+ virtual bool Process(Item &Itm,const unsigned char *Data,
+ unsigned long Size,unsigned long Pos);
+#endif
+ PyDirStream(PyObject *callback, const char *member=0) : callback(callback),
+ py_data(0), member(member), error(false), copy(0), copy_size(0)
+ {
+ Py_XINCREF(callback);
+ }
+
+ virtual ~PyDirStream() {
+ Py_XDECREF(callback);
+ Py_XDECREF(py_data);
+ delete[] copy;
+ }
+};
+
+bool PyDirStream::DoItem(Item &Itm, int &Fd)
+{
+ if (!member || strcmp(Itm.Name, member) == 0) {
+ // Allocate a new buffer if the old one is too small.
+ if (Itm.Size > SIZE_MAX)
+ goto to_large;
+ if (copy == NULL || copy_size < Itm.Size) {
+ delete[] copy;
+ copy = new (std::nothrow) char[Itm.Size];
+ if (copy == NULL)
+ goto to_large;
+ copy_size = Itm.Size;
+ }
+ Fd = -2;
+ } else {
+ Fd = -1;
+ }
+ return true;
+to_large:
+ delete[] copy;
+ copy = NULL;
+ copy_size = 0;
+ /* If we are looking for a specific member, abort reading now */
+ if (member) {
+ error = true;
+ PyErr_Format(PyExc_MemoryError,
+ "The member %s was too large to read into memory",
+ Itm.Name);
+ return false;
+ }
+ return true;
+}
+
+#if (APT_PKG_MAJOR >= 5)
+bool PyDirStream::Process(Item &Itm,const unsigned char *Data,
+ unsigned long long Size,unsigned long long Pos)
+#else
+bool PyDirStream::Process(Item &Itm,const unsigned char *Data,
+ unsigned long Size,unsigned long Pos)
+#endif
+{
+ if (copy != NULL)
+ memcpy(copy + Pos, Data,Size);
+ return true;
+}
+
+bool PyDirStream::FinishedFile(Item &Itm,int Fd)
+{
+ if (member && strcmp(Itm.Name, member) != 0)
+ // Skip non-matching Items, if a specific one is requested.
+ return true;
+
+ Py_XDECREF(py_data);
+ if (copy == NULL) {
+ Py_INCREF(Py_None);
+ py_data = Py_None;
+ } else {
+ py_data = PyBytes_FromStringAndSize(copy, Itm.Size);
+ }
+
+ if (!callback)
+ return true;
+
+ // The current member and data.
+ CppPyObject<Item> *py_member;
+ py_member = CppPyObject_NEW<Item>(0, &PyTarMember_Type);
+ // Clone our object, including the strings in it.
+ py_member->Object = Itm;
+ py_member->Object.Name = new char[strlen(Itm.Name)+1];
+ py_member->Object.LinkTarget = new char[strlen(Itm.LinkTarget)+1];
+ strcpy(py_member->Object.Name, Itm.Name);
+ strcpy(py_member->Object.LinkTarget,Itm.LinkTarget);
+ py_member->NoDelete = true;
+ error = PyObject_CallFunctionObjArgs(callback, py_member, py_data, 0) == 0;
+ // Clear the old objects and create new ones.
+ Py_XDECREF(py_member);
+ return (!error);
+}
+
+void tarmember_dealloc(PyObject *self) {
+ // We cloned those strings, delete them again.
+ delete[] GetCpp<pkgDirStream::Item>(self).Name;
+ delete[] GetCpp<pkgDirStream::Item>(self).LinkTarget;
+ CppDealloc<pkgDirStream::Item>(self);
+}
+
+// The tarfile.TarInfo interface for our TarMember class.
+static PyObject *tarmember_isblk(PyObject *self, PyObject *args)
+{
+ return PyBool_FromLong(GetCpp<pkgDirStream::Item>(self).Type ==
+ pkgDirStream::Item::BlockDevice);
+}
+static PyObject *tarmember_ischr(PyObject *self, PyObject *args)
+{
+ return PyBool_FromLong(GetCpp<pkgDirStream::Item>(self).Type ==
+ pkgDirStream::Item::CharDevice);
+}
+static PyObject *tarmember_isdev(PyObject *self, PyObject *args)
+{
+ pkgDirStream::Item::Type_t type = GetCpp<pkgDirStream::Item>(self).Type;
+ return PyBool_FromLong(type == pkgDirStream::Item::CharDevice ||
+ type == pkgDirStream::Item::BlockDevice ||
+ type == pkgDirStream::Item::FIFO);
+}
+
+static PyObject *tarmember_isdir(PyObject *self, PyObject *args)
+{
+ return PyBool_FromLong(GetCpp<pkgDirStream::Item>(self).Type ==
+ pkgDirStream::Item::Directory);
+}
+
+static PyObject *tarmember_isfifo(PyObject *self, PyObject *args)
+{
+ return PyBool_FromLong(GetCpp<pkgDirStream::Item>(self).Type ==
+ pkgDirStream::Item::FIFO);
+}
+
+static PyObject *tarmember_isfile(PyObject *self, PyObject *args)
+{
+ return PyBool_FromLong(GetCpp<pkgDirStream::Item>(self).Type ==
+ pkgDirStream::Item::File);
+}
+static PyObject *tarmember_islnk(PyObject *self, PyObject *args)
+{
+ return PyBool_FromLong(GetCpp<pkgDirStream::Item>(self).Type ==
+ pkgDirStream::Item::HardLink);
+}
+static PyObject *tarmember_isreg(PyObject *self, PyObject *args)
+{
+ return tarmember_isfile(self, NULL);
+}
+static PyObject *tarmember_issym(PyObject *self, PyObject *args)
+{
+ return PyBool_FromLong(GetCpp<pkgDirStream::Item>(self).Type ==
+ pkgDirStream::Item::SymbolicLink);
+}
+
+static PyObject *tarmember_get_name(PyObject *self, void *closure)
+{
+ return CppPyPath(GetCpp<pkgDirStream::Item>(self).Name);
+}
+
+static PyObject *tarmember_get_linkname(PyObject *self, void *closure)
+{
+ return CppPyPath(GetCpp<pkgDirStream::Item>(self).LinkTarget);
+}
+
+static PyObject *tarmember_get_mode(PyObject *self, void *closure)
+{
+ return MkPyNumber(GetCpp<pkgDirStream::Item>(self).Mode);
+}
+
+static PyObject *tarmember_get_uid(PyObject *self, void *closure)
+{
+ return MkPyNumber(GetCpp<pkgDirStream::Item>(self).UID);
+}
+static PyObject *tarmember_get_gid(PyObject *self, void *closure)
+{
+ return MkPyNumber(GetCpp<pkgDirStream::Item>(self).GID);
+}
+static PyObject *tarmember_get_size(PyObject *self, void *closure)
+{
+ return MkPyNumber(GetCpp<pkgDirStream::Item>(self).Size);
+}
+
+static PyObject *tarmember_get_mtime(PyObject *self, void *closure)
+{
+ return MkPyNumber(GetCpp<pkgDirStream::Item>(self).MTime);
+}
+
+static PyObject *tarmember_get_major(PyObject *self, void *closure)
+{
+ return MkPyNumber(GetCpp<pkgDirStream::Item>(self).Major);
+}
+
+static PyObject *tarmember_get_minor(PyObject *self, void *closure)
+{
+ return MkPyNumber(GetCpp<pkgDirStream::Item>(self).Minor);
+}
+
+static PyObject *tarmember_repr(PyObject *self)
+{
+ return PyString_FromFormat("<%s object: name:'%s'>",
+ self->ob_type->tp_name,
+ GetCpp<pkgDirStream::Item>(self).Name);
+
+}
+
+
+static PyMethodDef tarmember_methods[] = {
+ {"isblk",tarmember_isblk,METH_NOARGS,
+ "Determine whether the member is a block device."},
+ {"ischr",tarmember_ischr,METH_NOARGS,
+ "Determine whether the member is a character device."},
+ {"isdev",tarmember_isdev,METH_NOARGS,
+ "Determine whether the member is a device (block, character or FIFO)."},
+ {"isdir",tarmember_isdir,METH_NOARGS,
+ "Determine whether the member is a directory."},
+ {"isfifo",tarmember_isfifo,METH_NOARGS,
+ "Determine whether the member is a FIFO."},
+ {"isfile",tarmember_isfile,METH_NOARGS,
+ "Determine whether the member is a regular file."},
+ {"islnk",tarmember_islnk,METH_NOARGS,
+ "Determine whether the member is a hardlink."},
+ {"isreg",tarmember_isreg,METH_NOARGS,
+ "Determine whether the member is a regular file, same as isfile()."},
+ {"issym",tarmember_issym,METH_NOARGS,
+ "Determine whether the member is a symbolic link."},
+ {NULL}
+};
+
+static PyGetSetDef tarmember_getset[] = {
+ {"gid",tarmember_get_gid,0,"The owner's group ID."},
+ {"linkname",tarmember_get_linkname,0,"The target of the link."},
+ {"major",tarmember_get_major,0,"The major ID of the device."},
+ {"minor",tarmember_get_minor,0,"The minor ID of the device."},
+ {"mode",tarmember_get_mode,0,"The mode (permissions)."},
+ {"mtime",tarmember_get_mtime,0,"Last time of modification."},
+ {"name",tarmember_get_name,0,"The name of the file."},
+ {"size",tarmember_get_size,0,"The size of the file."},
+ {"uid",tarmember_get_uid,0,"The owner's user ID."},
+ {NULL}
+};
+
+static const char *tarmember_doc =
+ "Represent a single member of a 'tar' archive.\n\n"
+ "This class, which has been modelled after 'tarfile.TarInfo', represents\n"
+ "information about a given member in an archive.";
+PyTypeObject PyTarMember_Type = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ "apt_inst.TarMember", // tp_name
+ sizeof(CppPyObject<pkgDirStream::Item>), // tp_basicsize
+ 0, // tp_itemsize
+ // Methods
+ tarmember_dealloc, // tp_dealloc
+ 0, // tp_print
+ 0, // tp_getattr
+ 0, // tp_setattr
+ 0, // tp_compare
+ tarmember_repr, // tp_repr
+ 0, // tp_as_number
+ 0, // tp_as_sequence
+ 0, // tp_as_mapping
+ 0, // tp_hash
+ 0, // tp_call
+ 0, // tp_str
+ 0, // tp_getattro
+ 0, // tp_setattro
+ 0, // tp_as_buffer
+ Py_TPFLAGS_DEFAULT | // tp_flags
+ Py_TPFLAGS_HAVE_GC,
+ tarmember_doc, // tp_doc
+ CppTraverse<pkgDirStream::Item>, // tp_traverse
+ CppClear<pkgDirStream::Item>, // tp_clear
+ 0, // tp_richcompare
+ 0, // tp_weaklistoffset
+ 0, // tp_iter
+ 0, // tp_iternext
+ tarmember_methods, // tp_methods
+ 0, // tp_members
+ tarmember_getset // tp_getset
+};
+
+
+
+static PyObject *tarfile_new(PyTypeObject *type,PyObject *args,PyObject *kwds)
+{
+ PyObject *file;
+ PyApt_Filename filename;
+ int fileno;
+ int min = 0;
+ int max = 0xFFFFFFFF;
+ char *comp = "gzip";
+
+ static char *kwlist[] = {"file","min","max","comp",NULL};
+ if (PyArg_ParseTupleAndKeywords(args, kwds, "O|iis", kwlist, &file, &min,
+ &max,&comp) == 0)
+ return 0;
+
+ PyApt_UniqueObject<PyTarFileObject> self((PyTarFileObject*)CppPyObject_NEW<ExtractTar*>(file,type));
+
+ // We receive a filename.
+ if (filename.init(file))
+ new (&self->Fd) FileFd((const char *) filename,FileFd::ReadOnly);
+ else if ((fileno = PyObject_AsFileDescriptor(file)) != -1) {
+ // clear the error set by PyObject_AsString().
+ PyErr_Clear();
+ new (&self->Fd) FileFd(fileno,false);
+ }
+ else {
+ return 0;
+ }
+
+ self->min = min;
+ self->Object = new ExtractTar(self->Fd,max,comp);
+ if (_error->PendingError() == true)
+ return HandleErrors(self.release());
+ return self.release();
+}
+
+static const char *tarfile_extractall_doc =
+ "extractall([rootdir: str]) -> True\n\n"
+ "Extract the archive in the current directory. The argument 'rootdir'\n"
+ "can be used to change the target directory.";
+static PyObject *tarfile_extractall(PyObject *self, PyObject *args)
+{
+ std::string cwd = SafeGetCWD();
+ PyApt_Filename rootdir;
+ if (PyArg_ParseTuple(args,"|O&:extractall", PyApt_Filename::Converter, &rootdir) == 0)
+ return 0;
+
+ if (rootdir) {
+ if (chdir(rootdir) == -1)
+ return PyErr_SetFromErrnoWithFilename(PyExc_OSError, rootdir);
+ }
+
+ pkgDirStream Extract;
+
+ ((PyTarFileObject*)self)->Fd.Seek(((PyTarFileObject*)self)->min);
+ bool res = GetCpp<ExtractTar*>(self)->Go(Extract);
+
+
+
+ if (rootdir) {
+ if (chdir(cwd.c_str()) == -1)
+ return PyErr_SetFromErrnoWithFilename(PyExc_OSError,
+ (char*)cwd.c_str());
+ }
+ return HandleErrors(PyBool_FromLong(res));
+}
+
+static const char *tarfile_go_doc =
+ "go(callback: callable[, member: str]) -> True\n\n"
+ "Go through the archive and call the callable 'callback' for each\n"
+ "member with 2 arguments. The first argument is the TarMember and\n"
+ "the second one is the data, as bytes.\n\n"
+ "The optional parameter 'member' can be used to specify the member for\n"
+ "which to call the callback. If not specified, it will be called for all\n"
+ "members. If specified and not found, LookupError will be raised.";
+static PyObject *tarfile_go(PyObject *self, PyObject *args)
+{
+ PyObject *callback;
+ PyApt_Filename member;
+ if (PyArg_ParseTuple(args,"O|O&",&callback, PyApt_Filename::Converter, &member) == 0)
+ return 0;
+ if (member && strcmp(member, "") == 0)
+ member = 0;
+ pkgDirStream Extract;
+ PyDirStream stream(callback, member);
+ ((PyTarFileObject*)self)->Fd.Seek(((PyTarFileObject*)self)->min);
+ bool res = GetCpp<ExtractTar*>(self)->Go(stream);
+ if (stream.error)
+ return 0;
+ if (member && !stream.py_data)
+ return PyErr_Format(PyExc_LookupError, "There is no member named '%s'",
+ member.path);
+ return HandleErrors(PyBool_FromLong(res));
+}
+
+static const char *tarfile_extractdata_doc =
+ "extractdata(member: str) -> bytes\n\n"
+ "Return the contents of the member, as a bytes object. Raise\n"
+ "LookupError if there is no member with the given name.";
+static PyObject *tarfile_extractdata(PyObject *self, PyObject *args)
+{
+ PyApt_Filename member;
+ if (PyArg_ParseTuple(args,"O&", PyApt_Filename::Converter, &member) == 0)
+ return 0;
+ PyDirStream stream(NULL, member);
+ ((PyTarFileObject*)self)->Fd.Seek(((PyTarFileObject*)self)->min);
+ // Go through the stream.
+ GetCpp<ExtractTar*>(self)->Go(stream);
+
+ if (stream.error)
+ return 0;
+
+ if (!stream.py_data)
+ return PyErr_Format(PyExc_LookupError, "There is no member named '%s'",
+ member.path);
+ return Py_INCREF(stream.py_data), stream.py_data;
+}
+
+static PyMethodDef tarfile_methods[] = {
+ {"extractdata",tarfile_extractdata,METH_VARARGS,tarfile_extractdata_doc},
+ {"extractall",tarfile_extractall,METH_VARARGS,tarfile_extractall_doc},
+ {"go",tarfile_go,METH_VARARGS,tarfile_go_doc},
+ {NULL}
+};
+
+static PyObject *tarfile_repr(PyObject *self)
+{
+ return PyString_FromFormat("<%s object: %s>", self->ob_type->tp_name,
+ PyString_AsString(PyObject_Repr(GetOwner<ExtractTar*>(self))));
+}
+
+static const char *tarfile_doc =
+ "TarFile(file: str/int/file[, min: int, max: int, comp: str])\n\n"
+ "The parameter 'file' may be a string specifying the path of a file, or\n"
+ "a file-like object providing the fileno() method. It may also be an int\n"
+ "specifying a file descriptor (returned by e.g. os.open()).\n\n"
+ "The parameter 'min' describes the offset in the file where the archive\n"
+ "begins and the parameter 'max' is the size of the archive.\n\n"
+ "The compression of the archive is set by the parameter 'comp'. It can\n"
+ "be set to any program supporting the -d switch, the default being gzip.";
+PyTypeObject PyTarFile_Type = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ "apt_inst.TarFile", // tp_name
+ sizeof(PyTarFileObject), // tp_basicsize
+ 0, // tp_itemsize
+ // Methods
+ CppDealloc<ExtractTar*>, // tp_dealloc
+ 0, // tp_print
+ 0, // tp_getattr
+ 0, // tp_setattr
+ 0, // tp_compare
+ tarfile_repr, // tp_repr
+ 0, // tp_as_number
+ 0, // tp_as_sequence
+ 0, // tp_as_mapping
+ 0, // tp_hash
+ 0, // tp_call
+ 0, // tp_str
+ 0, // tp_getattro
+ 0, // tp_setattro
+ 0, // tp_as_buffer
+ Py_TPFLAGS_DEFAULT | // tp_flags
+ Py_TPFLAGS_HAVE_GC,
+ tarfile_doc, // tp_doc
+ CppTraverse<ExtractTar*>, // tp_traverse
+ CppClear<ExtractTar*>, // tp_clear
+ 0, // tp_richcompare
+ 0, // tp_weaklistoffset
+ 0, // tp_iter
+ 0, // tp_iternext
+ tarfile_methods, // tp_methods
+ 0, // tp_members
+ 0, // tp_getset
+ 0, // tp_base
+ 0, // tp_dict
+ 0, // tp_descr_get
+ 0, // tp_descr_set
+ 0, // tp_dictoffset
+ 0, // tp_init
+ 0, // tp_alloc
+ tarfile_new // tp_new
+};