summaryrefslogtreecommitdiffstats
path: root/ftparchive/multicompress.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ftparchive/multicompress.cc360
1 files changed, 360 insertions, 0 deletions
diff --git a/ftparchive/multicompress.cc b/ftparchive/multicompress.cc
new file mode 100644
index 0000000..ac85670
--- /dev/null
+++ b/ftparchive/multicompress.cc
@@ -0,0 +1,360 @@
+// -*- mode: cpp; mode: fold -*-
+// Description /*{{{*/
+/* ######################################################################
+
+ MultiCompressor
+
+ This class is very complicated in order to optimize for the common
+ case of its use, writing a large set of compressed files that are
+ different from the old set. It spawns off compressors in parallel
+ to maximize compression throughput and has a separate task managing
+ the data going into the compressors.
+
+ ##################################################################### */
+ /*}}}*/
+// Include Files /*{{{*/
+#include <config.h>
+
+#include <apt-pkg/aptconfiguration.h>
+#include <apt-pkg/error.h>
+#include <apt-pkg/fileutl.h>
+#include <apt-pkg/hashes.h>
+#include <apt-pkg/strutl.h>
+
+#include <cctype>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "multicompress.h"
+#include <apti18n.h>
+ /*}}}*/
+
+using namespace std;
+
+static std::vector<APT::Configuration::Compressor>::const_iterator findMatchingCompressor(std::string::const_iterator &I,
+ std::string::const_iterator const &End, std::vector<APT::Configuration::Compressor> const &Compressors)
+{
+ // Grab a word (aka: a compressor name)
+ for (; I != End && isspace(*I); ++I);
+ string::const_iterator Start = I;
+ for (; I != End && !isspace(*I); ++I);
+
+ auto const Comp = std::find_if(Compressors.begin(), Compressors.end(),
+ [&](APT::Configuration::Compressor const &C) { return stringcmp(Start, I, C.Name.c_str()) == 0;
+ });
+ if (Comp == Compressors.end())
+ _error->Warning(_("Unknown compression algorithm '%s'"),string(Start,I).c_str());
+ return Comp;
+}
+
+// MultiCompress::MultiCompress - Constructor /*{{{*/
+// ---------------------------------------------------------------------
+/* Setup the file outputs, compression modes and fork the writer child */
+MultiCompress::MultiCompress(string const &Output, string const &Compress,
+ mode_t const &Permissions, bool const &Write) : Outputter{-1}, Permissions(Permissions)
+{
+ Outputs = 0;
+ UpdateMTime = 0;
+
+ auto const Compressors = APT::Configuration::getCompressors();
+ // Parse the compression string, a space separated lists of compression types
+ for (auto I = Compress.cbegin(); I != Compress.cend();)
+ {
+ auto const Comp = findMatchingCompressor(I, Compress.cend(), Compressors);
+ if (Comp == Compressors.end())
+ continue;
+
+ // Create and link in a new output
+ Files *NewOut = new Files;
+ NewOut->Next = Outputs;
+ Outputs = NewOut;
+ NewOut->CompressProg = *Comp;
+ NewOut->Output = Output + Comp->Extension;
+
+ struct stat St;
+ if (stat(NewOut->Output.c_str(),&St) == 0)
+ NewOut->OldMTime = St.st_mtime;
+ else
+ NewOut->OldMTime = 0;
+ }
+
+ if (Write == false)
+ return;
+
+ /* Open all the temp files now so we can report any errors. File is
+ made unreable to prevent people from touching it during creating. */
+ for (Files *I = Outputs; I != 0; I = I->Next)
+ I->TmpFile.Open(I->Output + ".new", FileFd::WriteOnly | FileFd::Create | FileFd::Empty, FileFd::Extension, 0600);
+ if (_error->PendingError() == true)
+ return;
+
+ if (Outputs == 0)
+ {
+ _error->Error(_("Compressed output %s needs a compression set"),Output.c_str());
+ return;
+ }
+
+ Start();
+}
+ /*}}}*/
+// MultiCompress::~MultiCompress - Destructor /*{{{*/
+// ---------------------------------------------------------------------
+/* Just erase the file linked list. */
+MultiCompress::~MultiCompress()
+{
+ Die();
+
+ for (; Outputs != 0;)
+ {
+ Files *Tmp = Outputs->Next;
+ delete Outputs;
+ Outputs = Tmp;
+ }
+}
+ /*}}}*/
+// MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
+// ---------------------------------------------------------------------
+/* This checks each compressed file to make sure it exists and returns
+ stat information for a random file from the collection. False means
+ one or more of the files is missing. */
+bool MultiCompress::GetStat(string const &Output,string const &Compress,struct stat &St)
+{
+ auto const Compressors = APT::Configuration::getCompressors();
+
+ // Parse the compression string, a space separated lists of compression types
+ bool DidStat = false;
+ for (auto I = Compress.cbegin(); I != Compress.cend();)
+ {
+ auto const Comp = findMatchingCompressor(I, Compress.cend(), Compressors);
+ if (Comp == Compressors.end())
+ continue;
+
+ string Name = Output + Comp->Extension;
+ if (stat(Name.c_str(),&St) != 0)
+ return false;
+ DidStat = true;
+ }
+ return DidStat;
+}
+ /*}}}*/
+// MultiCompress::Start - Start up the writer child /*{{{*/
+// ---------------------------------------------------------------------
+/* Fork a child and setup the communication pipe. */
+bool MultiCompress::Start()
+{
+ // Create a data pipe
+ int Pipe[2] = {-1,-1};
+ if (pipe(Pipe) != 0)
+ return _error->Errno("pipe",_("Failed to create IPC pipe to subprocess"));
+ for (int I = 0; I != 2; I++)
+ SetCloseExec(Pipe[I],true);
+
+ // The child..
+ Outputter = fork();
+ if (Outputter == 0)
+ {
+ close(Pipe[1]);
+ Child(Pipe[0]);
+ if (_error->PendingError() == true)
+ {
+ _error->DumpErrors();
+ _exit(100);
+ }
+ _exit(0);
+ };
+
+ close(Pipe[0]);
+ if (Input.OpenDescriptor(Pipe[1], FileFd::WriteOnly, true) == false)
+ return false;
+
+ if (Outputter == -1)
+ return _error->Errno("fork",_("Failed to fork"));
+ return true;
+}
+ /*}}}*/
+// MultiCompress::Die - Clean up the writer /*{{{*/
+// ---------------------------------------------------------------------
+/* */
+bool MultiCompress::Die()
+{
+ if (Input.IsOpen() == false)
+ return true;
+
+ Input.Close();
+ bool Res = ExecWait(Outputter,_("Compress child"),false);
+ Outputter = -1;
+ return Res;
+}
+ /*}}}*/
+// MultiCompress::Finalize - Finish up writing /*{{{*/
+// ---------------------------------------------------------------------
+/* This is only necessary for statistics reporting. */
+bool MultiCompress::Finalize(unsigned long long &OutSize)
+{
+ OutSize = 0;
+ if (Input.IsOpen() == false || Die() == false)
+ return false;
+
+ time_t Now;
+ time(&Now);
+
+ // Check the mtimes to see if the files were replaced.
+ bool Changed = false;
+ for (Files *I = Outputs; I != 0; I = I->Next)
+ {
+ struct stat St;
+ if (stat(I->Output.c_str(),&St) != 0)
+ return _error->Error(_("Internal error, failed to create %s"),
+ I->Output.c_str());
+
+ if (I->OldMTime != St.st_mtime)
+ Changed = true;
+ else
+ {
+ // Update the mtime if necessary
+ if (UpdateMTime > 0 &&
+ (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
+ {
+ utimes(I->Output.c_str(), NULL);
+ Changed = true;
+ }
+ }
+
+ // Force the file permissions
+ if (St.st_mode != Permissions)
+ chmod(I->Output.c_str(),Permissions);
+
+ OutSize += St.st_size;
+ }
+
+ if (Changed == false)
+ OutSize = 0;
+
+ return true;
+}
+ /*}}}*/
+// MultiCompress::OpenOld - Open an old file /*{{{*/
+// ---------------------------------------------------------------------
+/* This opens one of the original output files, possibly decompressing it. */
+bool MultiCompress::OpenOld(FileFd &Fd)
+{
+ Files *Best = Outputs;
+ for (Files *I = Outputs; I != 0; I = I->Next)
+ if (Best->CompressProg.Cost > I->CompressProg.Cost)
+ Best = I;
+
+ // Open the file
+ return Fd.Open(Best->Output, FileFd::ReadOnly, FileFd::Extension);
+}
+ /*}}}*/
+// MultiCompress::Child - The writer child /*{{{*/
+// ---------------------------------------------------------------------
+/* The child process forks a bunch of compression children and takes
+ input on FD and passes it to all the compressor child. On the way it
+ computes the MD5 of the raw data. After this the raw data in the
+ original files is compared to see if this data is new. If the data
+ is new then the temp files are renamed, otherwise they are erased. */
+bool MultiCompress::Child(int const &FD)
+{
+ /* Okay, now we just feed data from FD to all the other FDs. Also
+ stash a hash of the data to use later. */
+ SetNonBlock(FD,false);
+ unsigned char Buffer[32*1024];
+ unsigned long long FileSize = 0;
+ Hashes MD5(Hashes::MD5SUM);
+ while (1)
+ {
+ WaitFd(FD,false);
+ int Res = read(FD,Buffer,sizeof(Buffer));
+ if (Res == 0)
+ break;
+ if (Res < 0)
+ continue;
+
+ MD5.Add(Buffer,Res);
+ FileSize += Res;
+ for (Files *I = Outputs; I != 0; I = I->Next)
+ {
+ if (I->TmpFile.Write(Buffer, Res) == false)
+ {
+ _error->Errno("write",_("IO to subprocess/file failed"));
+ break;
+ }
+ }
+ }
+
+ if (_error->PendingError() == true)
+ return false;
+
+ /* Now we have to copy the files over, or erase them if they
+ have not changed. First find the cheapest decompressor */
+ bool Missing = false;
+ for (Files *I = Outputs; I != 0; I = I->Next)
+ {
+ if (I->OldMTime == 0)
+ {
+ Missing = true;
+ break;
+ }
+ }
+
+ // Check the MD5 of the lowest cost entity.
+ while (Missing == false)
+ {
+ FileFd CompFd;
+ if (OpenOld(CompFd) == false)
+ {
+ _error->Discard();
+ break;
+ }
+
+ // Compute the hash
+ Hashes OldMD5(Hashes::MD5SUM);
+ unsigned long long NewFileSize = 0;
+ while (1)
+ {
+ unsigned long long Res = 0;
+ if (CompFd.Read(Buffer,sizeof(Buffer), &Res) == false)
+ return _error->Errno("read",_("Failed to read while computing MD5"));
+ if (Res == 0)
+ break;
+ NewFileSize += Res;
+ OldMD5.Add(Buffer,Res);
+ }
+ CompFd.Close();
+
+ // Check the hash
+ if (OldMD5.GetHashString(Hashes::MD5SUM) == MD5.GetHashString(Hashes::MD5SUM) &&
+ FileSize == NewFileSize)
+ {
+ for (Files *I = Outputs; I != 0; I = I->Next)
+ {
+ I->TmpFile.Close();
+ RemoveFile("MultiCompress::Child", I->TmpFile.Name());
+ }
+ return !_error->PendingError();
+ }
+ break;
+ }
+
+ // Finalize
+ for (Files *I = Outputs; I != 0; I = I->Next)
+ {
+ // Set the correct file modes
+ chmod(I->TmpFile.Name().c_str(),Permissions);
+
+ if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
+ _error->Errno("rename",_("Failed to rename %s to %s"),
+ I->TmpFile.Name().c_str(),I->Output.c_str());
+ I->TmpFile.Close();
+ }
+
+ return !_error->PendingError();
+}
+ /*}}}*/
+