summaryrefslogtreecommitdiffstats
path: root/extended.cc
blob: 7e0cb3053acb8079de224e69e926e9ded0251244 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/*  Tarlz - Archiver with multimember lzip compression
    Copyright (C) 2013-2019 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#define _FILE_OFFSET_BITS 64

#include <cctype>
#include <climits>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <stdint.h>

#include "tarlz.h"


namespace {

unsigned decimal_digits( unsigned long long value )
  {
  unsigned digits = 1;
  while( value >= 10 ) { value /= 10; ++digits; }
  return digits;
  }


int record_size( const unsigned keyword_size, const unsigned long value_size )
  {
  // size = ' ' + keyword + '=' + value + '\n'
  unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
  const unsigned d1 = decimal_digits( size );
  size += decimal_digits( d1 + size );
  if( size >= INT_MAX ) size = 0;		// overflows snprintf size
  return size;
  }


unsigned long long parse_decimal( const char * const ptr,
                                  const char ** const tailp,
                                  const unsigned long long size )
  {
  unsigned long long result = 0;
  unsigned long long i = 0;
  while( i < size && std::isspace( ptr[i] ) ) ++i;
  if( !std::isdigit( (unsigned char)ptr[i] ) )
    { if( tailp ) *tailp = ptr; return 0; }
  for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i )
    {
    const unsigned long long prev = result;
    result *= 10; result += ptr[i] - '0';
    if( result < prev || result > LLONG_MAX )		// overflow
      { if( tailp ) *tailp = ptr; return 0; }
    }
  if( tailp ) *tailp = ptr + i;
  return result;
  }


uint32_t parse_record_crc( const char * const ptr )
  {
  uint32_t crc = 0;
  for( int i = 0; i < 8; ++i )
    {
    crc <<= 4;
    if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0';
    else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A';
    else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a';
    else { crc = 0; break; }		// invalid digit in crc string
    }
  return crc;
  }

} // end namespace


const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" );

int Extended::recsize_linkpath() const
  {
  if( recsize_linkpath_ < 0 ) recsize_linkpath_ =
    linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0;
  return recsize_linkpath_;
  }

int Extended::recsize_path() const
  {
  if( recsize_path_ < 0 )
    recsize_path_ = path_.size() ? record_size( 4, path_.size() ) : 0;
  return recsize_path_;
  }

int Extended::recsize_file_size() const
  {
  if( recsize_file_size_ < 0 ) recsize_file_size_ =
    ( file_size_ > 0 ) ? record_size( 4, file_size_ ) : 0;
  return recsize_file_size_;
  }


bool Extended::parse( const char * const buf, const unsigned long long edsize,
                      const bool permissive )
  {
  reset();
  for( unsigned long long pos = 0; pos < edsize; )	// parse records
    {
    const char * tail;
    const unsigned long long rsize =
      parse_decimal( buf + pos, &tail, edsize - pos );
    if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
        buf[pos+rsize-1] != '\n' ) return false;
    ++tail;	// point to keyword
    // rest = length of (keyword + '=' + value) without the final newline
    const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
    if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
      { if( path_.size() && !permissive ) return false;
        path_.assign( tail + 5, rest - 5 ); }
    else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
      { if( linkpath_.size() && !permissive ) return false;
        linkpath_.assign( tail + 9, rest - 9 ); }
    else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
      {
      if( file_size_ != 0 && !permissive ) return false;
      file_size_ = parse_decimal( tail + 5, &tail, rest - 5 );
      // parse error or size fits in ustar header
      if( file_size_ < 1ULL << 33 || tail != buf + ( pos + rsize - 1 ) )
        return false;
      }
    else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
      {
      if( crc_present_ && !permissive ) return false;
      if( rsize != crc_record.size() ) return false;
      const uint32_t stored_crc = parse_record_crc( tail + 10 );
      const uint32_t computed_crc =
        crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
      crc_present_ = true;
      if( stored_crc != computed_crc ) return false;
      }
    pos += rsize;
    }
  full_size_ = header_size + round_up( edsize );
  return true;
  }