diff options
Diffstat (limited to 'apt-pkg/contrib/extracttar.cc')
-rw-r--r-- | apt-pkg/contrib/extracttar.cc | 325 |
1 files changed, 325 insertions, 0 deletions
diff --git a/apt-pkg/contrib/extracttar.cc b/apt-pkg/contrib/extracttar.cc new file mode 100644 index 0000000..dc24375 --- /dev/null +++ b/apt-pkg/contrib/extracttar.cc @@ -0,0 +1,325 @@ +// -*- mode: cpp; mode: fold -*- +// Description /*{{{*/ +/* ###################################################################### + + Extract a Tar - Tar Extractor + + Some performance measurements showed that zlib performed quite poorly + in comparison to a forked gzip process. This tar extractor makes use + of the fact that dup'd file descriptors have the same seek pointer + and that gzip will not read past the end of a compressed stream, + even if there is more data. We use the dup property to track extraction + progress and the gzip feature to just feed gzip a fd in the middle + of an AR file. + + ##################################################################### */ + /*}}}*/ +// Include Files /*{{{*/ +#include <config.h> + +#include <apt-pkg/configuration.h> +#include <apt-pkg/dirstream.h> +#include <apt-pkg/error.h> +#include <apt-pkg/extracttar.h> +#include <apt-pkg/fileutl.h> +#include <apt-pkg/strutl.h> + +#include <algorithm> +#include <iostream> +#include <string> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <unistd.h> + +#include <apti18n.h> + /*}}}*/ + +using namespace std; + +// The on disk header for a tar file. +struct ExtractTar::TarHeader +{ + char Name[100]; + char Mode[8]; + char UserID[8]; + char GroupID[8]; + char Size[12]; + char MTime[12]; + char Checksum[8]; + char LinkFlag; + char LinkName[100]; + char MagicNumber[8]; + char UserName[32]; + char GroupName[32]; + char Major[8]; + char Minor[8]; +}; + +// We need to read long names (names and link targets) into memory, so let's +// have a limit (shamelessly stolen from libarchive) to avoid people OOMing +// us with large streams. +static const unsigned long long APT_LONGNAME_LIMIT = 1048576llu; + +// A file size limit that we allow extracting. Currently, that's 128 GB. +// We also should leave some wiggle room for code adding files to it, and +// possibly conversion for signed, so this should not be larger than like 2**62. +static const unsigned long long APT_FILESIZE_LIMIT = 1llu << 37; + +// ExtractTar::ExtractTar - Constructor /*{{{*/ +// --------------------------------------------------------------------- +/* */ +ExtractTar::ExtractTar(FileFd &Fd,unsigned long long Max,string DecompressionProgram) + : File(Fd), MaxInSize(Max), DecompressProg(DecompressionProgram) +{ + GZPid = -1; + Eof = false; +} + /*}}}*/ +// ExtractTar::ExtractTar - Destructor /*{{{*/ +// --------------------------------------------------------------------- +/* */ +ExtractTar::~ExtractTar() +{ + // Error close + Done(); +} + /*}}}*/ +// ExtractTar::Done - Reap the gzip sub process /*{{{*/ +bool ExtractTar::Done() +{ + return InFd.Close(); +} + /*}}}*/ +// ExtractTar::StartGzip - Startup gzip /*{{{*/ +// --------------------------------------------------------------------- +/* This creates a gzip sub process that has its input as the file itself. + If this tar file is embedded into something like an ar file then + gzip will efficiently ignore the extra bits. */ +bool ExtractTar::StartGzip() +{ + if (DecompressProg.empty()) + { + InFd.OpenDescriptor(File.Fd(), FileFd::ReadOnly, FileFd::None, false); + return true; + } + + std::vector<APT::Configuration::Compressor> const compressors = APT::Configuration::getCompressors(); + std::vector<APT::Configuration::Compressor>::const_iterator compressor = compressors.begin(); + for (; compressor != compressors.end(); ++compressor) { + if (compressor->Name == DecompressProg) { + return InFd.OpenDescriptor(File.Fd(), FileFd::ReadOnly, *compressor, false); + } + } + + return _error->Error(_("Cannot find a configured compressor for '%s'"), + DecompressProg.c_str()); + +} + /*}}}*/ +// ExtractTar::Go - Perform extraction /*{{{*/ +// --------------------------------------------------------------------- +/* This reads each 512 byte block from the archive and extracts the header + information into the Item structure. Then it resolves the UID/GID and + invokes the correct processing function. */ +bool ExtractTar::Go(pkgDirStream &Stream) +{ + if (StartGzip() == false) + return false; + + // Loop over all blocks + string LastLongLink, ItemLink; + string LastLongName, ItemName; + while (1) + { + bool BadRecord = false; + unsigned char Block[512]; + if (InFd.Read(Block,sizeof(Block),true) == false) + return false; + + if (InFd.Eof() == true) + break; + + // Get the checksum + TarHeader *Tar = (TarHeader *)Block; + unsigned long CheckSum; + if (StrToNum(Tar->Checksum,CheckSum,sizeof(Tar->Checksum),8) == false) + return _error->Error(_("Corrupted archive")); + + /* Compute the checksum field. The actual checksum is blanked out + with spaces so it is not included in the computation */ + unsigned long NewSum = 0; + memset(Tar->Checksum,' ',sizeof(Tar->Checksum)); + for (int I = 0; I != sizeof(Block); I++) + NewSum += Block[I]; + + /* Check for a block of nulls - in this case we kill gzip, GNU tar + does this.. */ + if (NewSum == ' '*sizeof(Tar->Checksum)) + return Done(); + + if (NewSum != CheckSum) + return _error->Error(_("Tar checksum failed, archive corrupted")); + + // Decode all of the fields + pkgDirStream::Item Itm; + if (StrToNum(Tar->Mode,Itm.Mode,sizeof(Tar->Mode),8) == false || + (Base256ToNum(Tar->UserID,Itm.UID,8) == false && + StrToNum(Tar->UserID,Itm.UID,sizeof(Tar->UserID),8) == false) || + (Base256ToNum(Tar->GroupID,Itm.GID,8) == false && + StrToNum(Tar->GroupID,Itm.GID,sizeof(Tar->GroupID),8) == false) || + (Base256ToNum(Tar->Size,Itm.Size,12) == false && + StrToNum(Tar->Size,Itm.Size,sizeof(Tar->Size),8) == false) || + (Base256ToNum(Tar->MTime,Itm.MTime,12) == false && + StrToNum(Tar->MTime,Itm.MTime,sizeof(Tar->MTime),8) == false) || + StrToNum(Tar->Major,Itm.Major,sizeof(Tar->Major),8) == false || + StrToNum(Tar->Minor,Itm.Minor,sizeof(Tar->Minor),8) == false) + return _error->Error(_("Corrupted archive")); + + // Security check. Prevents overflows below the code when rounding up in skip/copy code, + // and provides modest protection against decompression bombs. + if (Itm.Size > APT_FILESIZE_LIMIT) + return _error->Error("Tar member too large: %llu > %llu bytes", Itm.Size, APT_FILESIZE_LIMIT); + + // Grab the filename and link target: use last long name if one was + // set, otherwise use the header value as-is, but remember that it may + // fill the entire 100-byte block and needs to be zero-terminated. + // See Debian Bug #689582. + if (LastLongName.empty() == false) + Itm.Name = (char *)LastLongName.c_str(); + else + Itm.Name = (char *)ItemName.assign(Tar->Name, sizeof(Tar->Name)).c_str(); + if (Itm.Name[0] == '.' && Itm.Name[1] == '/' && Itm.Name[2] != 0) + Itm.Name += 2; + + if (LastLongLink.empty() == false) + Itm.LinkTarget = (char *)LastLongLink.c_str(); + else + Itm.LinkTarget = (char *)ItemLink.assign(Tar->LinkName, sizeof(Tar->LinkName)).c_str(); + + // Convert the type over + switch (Tar->LinkFlag) + { + case NormalFile0: + case NormalFile: + Itm.Type = pkgDirStream::Item::File; + break; + + case HardLink: + Itm.Type = pkgDirStream::Item::HardLink; + break; + + case SymbolicLink: + Itm.Type = pkgDirStream::Item::SymbolicLink; + break; + + case CharacterDevice: + Itm.Type = pkgDirStream::Item::CharDevice; + break; + + case BlockDevice: + Itm.Type = pkgDirStream::Item::BlockDevice; + break; + + case Directory: + Itm.Type = pkgDirStream::Item::Directory; + break; + + case FIFO: + Itm.Type = pkgDirStream::Item::FIFO; + break; + + case GNU_LongLink: + { + unsigned long long Length = Itm.Size; + unsigned char Block[512]; + if (Length > APT_LONGNAME_LIMIT) + return _error->Error("Long name to large: %llu bytes > %llu bytes", Length, APT_LONGNAME_LIMIT); + while (Length > 0) + { + if (InFd.Read(Block,sizeof(Block),true) == false) + return false; + if (Length <= sizeof(Block)) + { + LastLongLink.append(Block,Block+sizeof(Block)); + break; + } + LastLongLink.append(Block,Block+sizeof(Block)); + Length -= sizeof(Block); + } + continue; + } + + case GNU_LongName: + { + unsigned long long Length = Itm.Size; + unsigned char Block[512]; + if (Length > APT_LONGNAME_LIMIT) + return _error->Error("Long name to large: %llu bytes > %llu bytes", Length, APT_LONGNAME_LIMIT); + while (Length > 0) + { + if (InFd.Read(Block,sizeof(Block),true) == false) + return false; + if (Length < sizeof(Block)) + { + LastLongName.append(Block,Block+sizeof(Block)); + break; + } + LastLongName.append(Block,Block+sizeof(Block)); + Length -= sizeof(Block); + } + continue; + } + + default: + BadRecord = true; + _error->Warning(_("Unknown TAR header type %u"), (unsigned)Tar->LinkFlag); + break; + } + + int Fd = -1; + if (not BadRecord && not Stream.DoItem(Itm, Fd)) + return false; + + if (Fd == -1 || Fd < -2 || BadRecord) + { + if (Itm.Size > 0 && not InFd.Skip(((Itm.Size + (sizeof(Block) - 1)) / sizeof(Block)) * sizeof(Block))) + return false; + } + else if (Itm.Size != 0) + { + // Copy the file over the FD + auto Size = Itm.Size; + unsigned char Junk[32*1024]; + do + { + auto const Read = std::min<unsigned long long>(Size, sizeof(Junk)); + if (not InFd.Read(Junk, ((Read + (sizeof(Block) - 1)) / sizeof(Block)) * sizeof(Block))) + return false; + + if (Fd > 0) + { + if (not FileFd::Write(Fd, Junk, Read)) + return Stream.Fail(Itm, Fd); + } + // An Fd of -2 means to send to a special processing function + else if (Fd == -2) + { + if (not Stream.Process(Itm, Junk, Read, Itm.Size - Size)) + return Stream.Fail(Itm, Fd); + } + + Size -= Read; + } while (Size != 0); + } + + // And finish up + if (not BadRecord && not Stream.FinishedFile(Itm, Fd)) + return false; + LastLongName.erase(); + LastLongLink.erase(); + } + + return Done(); +} + /*}}}*/ |