summaryrefslogtreecommitdiffstats
path: root/file_index.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--file_index.cc143
1 files changed, 143 insertions, 0 deletions
diff --git a/file_index.cc b/file_index.cc
new file mode 100644
index 0000000..5cdba46
--- /dev/null
+++ b/file_index.cc
@@ -0,0 +1,143 @@
+/* Plzip - A parallel compressor compatible with lzip
+ Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "lzip.h"
+#include "file_index.h"
+
+
+int seek_read( const int fd, uint8_t * const buf, const int size,
+ const long long pos )
+ {
+ if( lseek( fd, pos, SEEK_SET ) == pos )
+ return readblock( fd, buf, size );
+ return 0;
+ }
+
+
+const char * format_num( unsigned long long num,
+ unsigned long long limit = -1ULL,
+ const int set_prefix = 0 )
+ {
+ const char * const si_prefix[8] =
+ { "k", "M", "G", "T", "P", "E", "Z", "Y" };
+ const char * const binary_prefix[8] =
+ { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
+ static bool si = true;
+ static char buf[32];
+
+ if( set_prefix ) si = ( set_prefix > 0 );
+ const unsigned factor = ( si ? 1000 : 1024 );
+ const char * const * prefix = ( si ? si_prefix : binary_prefix );
+ const char * p = "";
+ bool exact = ( num % factor == 0 );
+
+ for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i )
+ { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; }
+ snprintf( buf, sizeof buf, "%llu %s", num, p );
+ return buf;
+ }
+
+
+File_index::File_index( const int infd ) : retval_( 0 )
+ {
+ const long long isize = lseek( infd, 0, SEEK_END );
+ if( isize < 0 )
+ { error_ = "Input file is not seekable :";
+ error_ += std::strerror( errno ); retval_ = 1; return; }
+ if( isize > INT64_MAX )
+ { error_ = "Input file is too long (2^63 bytes or more).";
+ retval_ = 2; return; }
+ long long pos = isize; // always points to a header or EOF
+ File_header header;
+ File_trailer trailer;
+
+ if( isize < min_member_size )
+ { error_ = "Input file is too short."; retval_ = 2; return; }
+ if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size )
+ { error_ = "Error reading member header :";
+ error_ += std::strerror( errno ); retval_ = 1; return; }
+ if( !header.verify_magic() )
+ { error_ = "Bad magic number (file not in lzip format).";
+ retval_ = 2; return; }
+ if( !header.verify_version() )
+ { error_ = "Version "; error_ += format_num( header.version() );
+ error_ += "member format not supported."; retval_ = 2; return; }
+
+ while( pos >= min_member_size )
+ {
+ if( seek_read( infd, trailer.data, File_trailer::size,
+ pos - File_trailer::size ) != File_trailer::size )
+ { error_ = "Error reading member trailer :";
+ error_ += std::strerror( errno ); retval_ = 1; break; }
+ const long long member_size = trailer.member_size();
+ if( member_size < min_member_size || member_size > pos )
+ {
+ if( member_vector.size() == 0 ) // maybe trailing garbage
+ { --pos; continue; }
+ error_ = "Member size in trailer is corrupt at pos ";
+ error_ += format_num( pos - 8 ); retval_ = 2; break;
+ }
+ if( seek_read( infd, header.data, File_header::size,
+ pos - member_size ) != File_header::size )
+ { error_ = "Error reading member header :";
+ error_ += std::strerror( errno ); retval_ = 1; break; }
+ if( !header.verify_magic() || !header.verify_version() )
+ {
+ if( member_vector.size() == 0 ) // maybe trailing garbage
+ { --pos; continue; }
+ error_ = "Bad header at pos ";
+ error_ += format_num( pos - member_size ); retval_ = 2; break;
+ }
+ if( member_vector.size() == 0 && isize - pos > File_header::size &&
+ seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
+ header.verify_magic() && header.verify_version() )
+ { // last trailer is corrupt
+ error_ = "Member size in trailer is corrupt at pos ";
+ error_ += format_num( isize - 8 ); retval_ = 2; break;
+ }
+ pos -= member_size;
+ member_vector.push_back( Member( 0, trailer.data_size(),
+ pos, member_size ) );
+ }
+ if( pos != 0 || member_vector.size() == 0 )
+ {
+ member_vector.clear();
+ if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
+ return;
+ }
+ std::reverse( member_vector.begin(), member_vector.end() );
+ for( unsigned i = 0; i < member_vector.size() - 1; ++i )
+ {
+ const long long end = member_vector[i].dblock.end();
+ if( end < 0 || end > INT64_MAX )
+ {
+ member_vector.clear();
+ error_ = "Data in input file is too long (2^63 bytes or more).";
+ retval_ = 2; return;
+ }
+ member_vector[i+1].dblock.pos( end );
+ }
+ }