summaryrefslogtreecommitdiffstats
path: root/ftparchive/contents.cc
blob: 8a7adfd8e721997151c1846d75370a0f6f207a89 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
// -*- mode: cpp; mode: fold -*-
// Description								/*{{{*/
/* ######################################################################
   
   contents - Archive contents generator
   
   The GenContents class is a back end for an archive contents generator. 
   It takes a list of per-deb file name and merges it into a memory 
   database of all previous output. This database is stored as a set
   of binary trees linked across directories to form a tree of all files+dirs
   given to it. The tree will also be sorted as it is built up thus 
   removing the massive sort time overhead.
   
   By breaking all the pathnames into components and storing them 
   separately a space saving is realized by not duplicating the string
   over and over again. Ultimately this saving is sacrificed to storage of
   the tree structure itself but the tree structure yields a speed gain
   in the sorting and processing. Ultimately it takes about 5 seconds to
   do 141000 nodes and about 5 meg of ram.

   The tree looks something like:
   
     usr/
      / \             / libslang
   bin/ lib/ --> libc6
        /   \         \ libfoo
   games/  sbin/
   
   The ---> is the DirDown link
   
   
   ##################################################################### */
									/*}}}*/
// Include Files							/*{{{*/
#include <config.h>

#include <apt-pkg/debfile.h>
#include <apt-pkg/dirstream.h>
#include <apt-pkg/error.h>
#include <apt-pkg/fileutl.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "contents.h"

#include <apti18n.h>
									/*}}}*/

// GenContents::~GenContents - Free allocated memory			/*{{{*/
// ---------------------------------------------------------------------
/* Since all our allocations are static big-block allocations all that is 
   needed is to free all of them. */
GenContents::~GenContents()
{
   while (BlockList != 0)
   {
      BigBlock *Old = BlockList;
      BlockList = Old->Next;
      free(Old->Block);
      delete Old;
   }   
}
									/*}}}*/
// GenContents::Mystrdup - Custom strdup				/*{{{*/
// ---------------------------------------------------------------------
/* This strdup also uses a large block allocator to eliminate glibc
   overhead */
char *GenContents::Mystrdup(const char *From)
{
   unsigned int Len = strlen(From) + 1;
   if (StrLeft <= Len)
   {
      StrLeft = 4096*10;
      StrPool = (char *)malloc(StrLeft);
      
      BigBlock *Block = new BigBlock;
      Block->Block = StrPool;
      Block->Next = BlockList;
      BlockList = Block;
   }
   
   memcpy(StrPool,From,Len);
   StrLeft -= Len;
   
   char *Res = StrPool;
   StrPool += Len;
   return Res;
}
									/*}}}*/
// GenContents::Node::operator new - Big block allocator		/*{{{*/
// ---------------------------------------------------------------------
/* This eliminates glibc's malloc overhead by allocating large blocks and
   having a continuous set of Nodes. This takes about 8 bytes off each nodes
   space needs. Freeing is not supported. */
void *GenContents::Node::operator new(size_t Amount,GenContents *Owner)
{
   if (Owner->NodeLeft == 0)
   {
      Owner->NodeLeft = 10000;
      Owner->NodePool = static_cast<Node *>(malloc(Amount*Owner->NodeLeft));
      BigBlock *Block = new BigBlock;
      Block->Block = Owner->NodePool;
      Block->Next = Owner->BlockList;
      Owner->BlockList = Block;
   }
   
   Owner->NodeLeft--;
   return Owner->NodePool++;
}
									/*}}}*/
// GenContents::Grab - Grab a new node representing Name under Top	/*{{{*/
// ---------------------------------------------------------------------
/* This grabs a new node representing the pathname component Name under
   the node Top. The node is given the name Package. It is assumed that Name
   is inside of top. If a duplicate already entered name is found then 
   a note is made on the Dup list and the previous in-tree node is returned. */
GenContents::Node *GenContents::Grab(GenContents::Node *Top,const char *Name,
			const char *Package)
{
   /* We drop down to the next dir level each call. This simplifies
      the calling routine */
   if (Top->DirDown == 0)
   {
      Node *Item = new(this) Node;
      Item->Path = Mystrdup(Name);
      Item->Package = Package;
      Top->DirDown = Item;
      return Item;
   }
   Top = Top->DirDown;
   
   int Res;
   while (1)
   {
      Res = strcmp(Name,Top->Path);
      
      // Collision!
      if (Res == 0)
      {
	 // See if this is the same package (multi-version dup)
	 if (Top->Package == Package ||
	     strcasecmp(Top->Package,Package) == 0)
	    return Top;
	 
	 // Look for an already existing Dup
	 for (Node *I = Top->Dups; I != 0; I = I->Dups)
	    if (I->Package == Package || 
		strcasecmp(I->Package,Package) == 0)
	       return Top;

	 // Add the dup in
	 Node *Item = new(this) Node;
	 Item->Path = Top->Path;
	 Item->Package = Package;
	 Item->Dups = Top->Dups;
	 Top->Dups = Item;
	 return Top;
      }
      
      // Continue to traverse the tree
      if (Res < 0)
      {
	 if (Top->BTreeLeft == 0)
	    break;
	 Top = Top->BTreeLeft;
      }      
      else
      {
	 if (Top->BTreeRight == 0)
	    break;
	 Top = Top->BTreeRight;
      }      
   }

   // The item was not found in the tree
   Node *Item = new(this) Node;
   Item->Path = Mystrdup(Name);
   Item->Package = Package;
   
   // Link it into the tree
   if (Res < 0)
   {
      Item->BTreeLeft = Top->BTreeLeft;
      Top->BTreeLeft = Item;
   }
   else
   {
      Item->BTreeRight = Top->BTreeRight;
      Top->BTreeRight = Item;
   }
   
   return Item;
}
									/*}}}*/
// GenContents::Add - Add a path to the tree				/*{{{*/
// ---------------------------------------------------------------------
/* This takes a full pathname and adds it into the tree. We split the
   pathname into directory fragments adding each one as we go. Technically
   in output from tar this should result in hitting previous items. */
void GenContents::Add(const char *Dir,const char *Package)
{
   Node *Root = &this->Root;
   
   // Drop leading slashes
   while (*Dir == '/')
      Dir++;
   
   // Run over the string and grab out each bit up to and including a /
   const char *Start = Dir;
   const char *I = Dir;
   while (*I != 0)
   {
      if (*I != '/' || I - Start <= 1)
      {
	 I++;
	 continue;
      }      
      I++;
      
      // Copy the path fragment over
      char Tmp[1024];
      strncpy(Tmp,Start,I - Start);
      Tmp[I - Start] = 0;
      
      // Grab a node for it
      Root = Grab(Root,Tmp,Package);
      
      Start = I;
   }
   
   // The final component if it does not have a trailing /
   if (I - Start >= 1)
      Grab(Root,Start,Package);
}
									/*}}}*/
// GenContents::WriteSpace - Write a given number of white space chars	/*{{{*/
// ---------------------------------------------------------------------
/* We mod 8 it and write tabs where possible. */
void GenContents::WriteSpace(std::string &out, size_t Current, size_t Target)
{
   if (Target <= Current)
      Target = Current + 1;

   /* Now we write tabs so long as the next tab stop would not pass
      the target */
   for (; (Current/8 + 1)*8 < Target; Current = (Current/8 + 1)*8)
      out.append("\t");

   // Fill the last bit with spaces
   for (; Current < Target; Current++)
      out.append(" ");
}
									/*}}}*/
// GenContents::Print - Display the tree				/*{{{*/
// ---------------------------------------------------------------------
/* This is the final result function. It takes the tree and recursively
   calls itself and runs over each section of the tree printing out
   the pathname and the hit packages. We use Buf to build the pathname
   summed over all the directory parents of this node. */
void GenContents::Print(FileFd &Out)
{
   char Buffer[1024];
   Buffer[0] = 0;
   DoPrint(Out,&Root,Buffer);
}
void GenContents::DoPrint(FileFd &Out,GenContents::Node *Top, char *Buf)
{
   if (Top == 0)
      return;

   // Go left
   DoPrint(Out,Top->BTreeLeft,Buf);

   // Print the current dir location and then descend to lower dirs
   char *OldEnd = Buf + strlen(Buf);
   if (Top->Path != 0)
   {
      strcat(Buf,Top->Path);

      // Do not show the item if it is a directory with dups
      if (Top->Path[strlen(Top->Path)-1] != '/' /*|| Top->Dups == 0*/)
      {
	 std::string out = Buf;
	 WriteSpace(out, out.length(), 60);
	 for (Node *I = Top; I != 0; I = I->Dups)
	 {
	    if (I != Top)
	       out.append(",");
	    out.append(I->Package);
	 }
         out.append("\n");
	 Out.Write(out.c_str(), out.length());
      }
   }

   // Go along the directory link
   DoPrint(Out,Top->DirDown,Buf);
   *OldEnd = 0;

   // Go right
   DoPrint(Out,Top->BTreeRight,Buf);  
}
									/*}}}*/
// ContentsExtract Constructor						/*{{{*/
ContentsExtract::ContentsExtract()
   : Data(0), MaxSize(0), CurSize(0)
{
}
									/*}}}*/
// ContentsExtract Destructor						/*{{{*/
ContentsExtract::~ContentsExtract()
{
   free(Data);
}
									/*}}}*/
// ContentsExtract::Read - Read the archive				/*{{{*/
// ---------------------------------------------------------------------
/* */
bool ContentsExtract::Read(debDebFile &Deb)
{
   Reset();
   return Deb.ExtractArchive(*this);
}
									/*}}}*/
// ContentsExtract::DoItem - Extract an item				/*{{{*/
// ---------------------------------------------------------------------
/* This just tacks the name onto the end of our memory buffer */
bool ContentsExtract::DoItem(Item &Itm, int &/*Fd*/)
{
   unsigned long Len = strlen(Itm.Name);
   
   // Strip leading ./'s
   if (Itm.Name[0] == '.' && Itm.Name[1] == '/')
   {
      // == './'
      if (Len == 2)
	 return true;
      
      Len -= 2;
      Itm.Name += 2;
   }

   // Allocate more storage for the string list
   if (CurSize + Len + 2 >= MaxSize || Data == 0)
   {
      if (MaxSize == 0)
	 MaxSize = 512*1024/2;
      char *NewData = (char *)realloc(Data,MaxSize*2);
      if (NewData == 0)
	 return _error->Error(_("realloc - Failed to allocate memory"));
      Data = NewData;
      MaxSize *= 2;
   }
   
   strcpy(Data+CurSize,Itm.Name);   
   CurSize += Len + 1;
   return true;
}
									/*}}}*/
// ContentsExtract::TakeContents - Load the contents data		/*{{{*/
// ---------------------------------------------------------------------
/* */
bool ContentsExtract::TakeContents(const void *NewData,unsigned long long Length)
{
   if (Length == 0)
   {
      CurSize = 0;
      return true;
   }

   // Allocate more storage for the string list
   if (Length + 2 >= MaxSize || Data == 0)
   {
      if (MaxSize == 0)
	 MaxSize = 512*1024/2;
      while (MaxSize*2 <= Length)
	 MaxSize *= 2;
      
      char *NewData = (char *)realloc(Data,MaxSize*2);
      if (NewData == 0)
	 return _error->Error(_("realloc - Failed to allocate memory"));
      Data = NewData;
      MaxSize *= 2;
   }
   memcpy(Data,NewData,Length);
   CurSize = Length;
   
   return Data[CurSize-1] == 0;
}
									/*}}}*/
// ContentsExtract::Add - Read the contents data into the sorter	/*{{{*/
// ---------------------------------------------------------------------
/* */
void ContentsExtract::Add(GenContents &Contents,std::string const &Package)
{
   const char *Start = Data;
   char *Pkg = Contents.Mystrdup(Package.c_str());
   for (const char *I = Data; I < Data + CurSize; I++)
   {
      if (*I == 0)
      {
	 Contents.Add(Start,Pkg);
	 Start = ++I;
      }      
   }   
}
									/*}}}*/