summaryrefslogtreecommitdiffstats
path: root/src/vfs/tar/tar-internal.c
blob: 2c4a0ad99816a330ad3e8d240338ac64c7788bbb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
/*
   Virtual File System: GNU Tar file system.

   Copyright (C) 2023-2024
   Free Software Foundation, Inc.

   Written by:
   Andrew Borodin <aborodin@vmail.ru>, 2023

   This file is part of the Midnight Commander.

   The Midnight Commander is free software: you can redistribute it
   and/or modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation, either version 3 of the License,
   or (at your option) any later version.

   The Midnight Commander is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * \file
 * \brief Source: Virtual File System: GNU Tar file system
 * \author Andrew Borodin
 * \date 2022
 */

#include <config.h>

#include <inttypes.h>           /* uintmax_t */
#include <stdint.h>             /* UINTMAX_MAX, etc */

#include "lib/global.h"
#include "lib/widget.h"         /* message() */
#include "lib/vfs/vfs.h"        /* mc_read() */

#include "tar-internal.h"

/*** global variables ****************************************************************************/

/*** file scope macro definitions ****************************************************************/

/* Log base 2 of common values. */
#define LG_8 3
#define LG_64 6
#define LG_256 8

/*** file scope type declarations ****************************************************************/

/*** forward declarations (file scope functions) *************************************************/

/*** file scope variables ************************************************************************/

/* Base 64 digits; see RFC 2045 Table 1.  */
static char const base_64_digits[64] = {
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
};

/* Table of base 64 digit values indexed by unsigned chars.
   The value is 64 for unsigned chars that are not base 64 digits. */
static char base64_map[1 + (unsigned char) (-1)];

/* --------------------------------------------------------------------------------------------- */
/*** file scope functions ************************************************************************/
/* --------------------------------------------------------------------------------------------- */

static gboolean
tar_short_read (size_t status, tar_super_t * archive)
{
    size_t left;                /* bytes left */
    char *more;                 /* pointer to next byte to read */

    more = archive->record_start->buffer + status;
    left = record_size - status;

    while (left % BLOCKSIZE != 0 || (left != 0 && status != 0))
    {
        if (status != 0)
        {
            ssize_t r;

            r = mc_read (archive->fd, more, left);
            if (r == -1)
                return FALSE;

            status = (size_t) r;
        }

        if (status == 0)
            break;

        left -= status;
        more += status;
    }

    record_end = archive->record_start + (record_size - left) / BLOCKSIZE;

    return TRUE;
}

/* --------------------------------------------------------------------------------------------- */

static gboolean
tar_flush_read (tar_super_t * archive)
{
    size_t status;

    status = mc_read (archive->fd, archive->record_start->buffer, record_size);
    if (status == record_size)
        return TRUE;

    return tar_short_read (status, archive);
}

/* --------------------------------------------------------------------------------------------- */

/**  Flush the current buffer from the archive.
 */
static gboolean
tar_flush_archive (tar_super_t * archive)
{
    record_start_block += record_end - archive->record_start;
    current_block = archive->record_start;
    record_end = archive->record_start + blocking_factor;

    return tar_flush_read (archive);
}

/* --------------------------------------------------------------------------------------------- */

static off_t
tar_seek_archive (tar_super_t * archive, off_t size)
{
    off_t start, offset;
    off_t nrec, nblk;
    off_t skipped;

    skipped = (blocking_factor - (current_block - archive->record_start)) * BLOCKSIZE;
    if (size <= skipped)
        return 0;

    /* Compute number of records to skip */
    nrec = (size - skipped) / record_size;
    if (nrec == 0)
        return 0;

    start = tar_current_block_ordinal (archive);

    offset = mc_lseek (archive->fd, nrec * record_size, SEEK_CUR);
    if (offset < 0)
        return offset;

#if 0
    if ((offset % record_size) != 0)
    {
        message (D_ERROR, MSG_ERROR, _("tar: mc_lseek not stopped at a record boundary"));
        return -1;
    }
#endif

    /* Convert to number of records */
    offset /= BLOCKSIZE;
    /* Compute number of skipped blocks */
    nblk = offset - start;

    /* Update buffering info */
    record_start_block = offset - blocking_factor;
    current_block = record_end;

    return nblk;
}

/* --------------------------------------------------------------------------------------------- */
/*** public functions ****************************************************************************/
/* --------------------------------------------------------------------------------------------- */

gboolean
is_octal_digit (char c)
{
    return '0' <= c && c <= '7';
}

/* --------------------------------------------------------------------------------------------- */

void
tar_base64_init (void)
{
    size_t i;

    memset (base64_map, 64, sizeof base64_map);
    for (i = 0; i < 64; i++)
        base64_map[(int) base_64_digits[i]] = i;
}

/* --------------------------------------------------------------------------------------------- */

void
tar_assign_string (char **string, char *value)
{
    g_free (*string);
    *string = value;
}

/* --------------------------------------------------------------------------------------------- */

void
tar_assign_string_dup (char **string, const char *value)
{
    g_free (*string);
    *string = g_strdup (value);
}

/* --------------------------------------------------------------------------------------------- */

void
tar_assign_string_dup_n (char **string, const char *value, size_t n)
{
    g_free (*string);
    *string = g_strndup (value, n);
}

/* --------------------------------------------------------------------------------------------- */

/**
 * Convert buffer at @where0 of size @digs from external format to intmax_t.
 * @digs must be positive.
 * If @type is non-NULL, data are of type @type.
 * The buffer must represent a value in the range -@minval through @maxval;
 * if the mathematically correct result V would be greater than INTMAX_MAX,
 * return a negative integer V such that (uintmax_t) V yields the correct result.
 * If @octal_only, allow only octal numbers instead of the other GNU extensions.
 *
 * Result is -1 if the field is invalid.
 */
#if !(INTMAX_MAX <= UINTMAX_MAX && - (INTMAX_MIN + 1) <= UINTMAX_MAX)
#error "tar_from_header() internally represents intmax_t as uintmax_t + sign"
#endif
#if !(UINTMAX_MAX / 2 <= INTMAX_MAX)
#error "tar_from_header() returns intmax_t to represent uintmax_t"
#endif
intmax_t
tar_from_header (const char *where0, size_t digs, char const *type, intmax_t minval,
                 uintmax_t maxval, gboolean octal_only)
{
    uintmax_t value = 0;
    uintmax_t uminval = minval;
    uintmax_t minus_minval = -uminval;
    const char *where = where0;
    char const *lim = where + digs;
    gboolean negative = FALSE;

    /* Accommodate buggy tar of unknown vintage, which outputs leading
       NUL if the previous field overflows. */
    if (*where == '\0')
        where++;

    /* Accommodate older tars, which output leading spaces. */
    while (TRUE)
    {
        if (where == lim)
            return (-1);

        if (!g_ascii_isspace (*where))
            break;

        where++;
    }

    if (is_octal_digit (*where))
    {
        char const *where1 = where;
        gboolean overflow = FALSE;

        while (TRUE)
        {
            value += *where++ - '0';
            if (where == lim || !is_octal_digit (*where))
                break;
            overflow |= value != (value << LG_8 >> LG_8);
            value <<= LG_8;
        }

        /* Parse the output of older, unportable tars, which generate
           negative values in two's complement octal. If the leading
           nonzero digit is 1, we can't recover the original value
           reliably; so do this only if the digit is 2 or more. This
           catches the common case of 32-bit negative time stamps. */
        if ((overflow || maxval < value) && *where1 >= 2 && type != NULL)
        {
            /* Compute the negative of the input value, assuming two's complement. */
            int digit;

            digit = (*where1 - '0') | 4;
            overflow = FALSE;
            value = 0;
            where = where1;

            while (TRUE)
            {
                value += 7 - digit;
                where++;
                if (where == lim || !is_octal_digit (*where))
                    break;
                digit = *where - '0';
                overflow |= value != (value << LG_8 >> LG_8);
                value <<= LG_8;
            }

            value++;
            overflow |= value == 0;

            if (!overflow && value <= minus_minval)
                negative = TRUE;
        }

        if (overflow)
            return (-1);
    }
    else if (octal_only)
    {
        /* Suppress the following extensions. */
    }
    else if (*where == '-' || *where == '+')
    {
        /* Parse base-64 output produced only by tar test versions
           1.13.6 (1999-08-11) through 1.13.11 (1999-08-23).
           Support for this will be withdrawn in future tar releases. */
        int dig;

        negative = *where++ == '-';

        while (where != lim && (dig = base64_map[(unsigned char) *where]) < 64)
        {
            if (value << LG_64 >> LG_64 != value)
                return (-1);
            value = (value << LG_64) | dig;
            where++;
        }
    }
    else if (where <= lim - 2 && (*where == '\200'      /* positive base-256 */
                                  || *where == '\377' /* negative base-256 */ ))
    {
        /* Parse base-256 output.  A nonnegative number N is
           represented as (256**DIGS)/2 + N; a negative number -N is
           represented as (256**DIGS) - N, i.e. as two's complement.
           The representation guarantees that the leading bit is
           always on, so that we don't confuse this format with the
           others (assuming ASCII bytes of 8 bits or more). */

        int signbit;
        uintmax_t topbits;

        signbit = *where & (1 << (LG_256 - 2));
        topbits =
            (((uintmax_t) - signbit) << (CHAR_BIT * sizeof (uintmax_t) - LG_256 - (LG_256 - 2)));

        value = (*where++ & ((1 << (LG_256 - 2)) - 1)) - signbit;

        while (TRUE)
        {
            value = (value << LG_256) + (unsigned char) *where++;
            if (where == lim)
                break;

            if (((value << LG_256 >> LG_256) | topbits) != value)
                return (-1);
        }

        negative = signbit != 0;
        if (negative)
            value = -value;
    }

    if (where != lim && *where != '\0' && !g_ascii_isspace (*where))
        return (-1);

    if (value <= (negative ? minus_minval : maxval))
        return tar_represent_uintmax (negative ? -value : value);

    return (-1);
}

/* --------------------------------------------------------------------------------------------- */

off_t
off_from_header (const char *p, size_t s)
{
    /* Negative offsets are not allowed in tar files, so invoke
       from_header with minimum value 0, not TYPE_MINIMUM (off_t). */
    return tar_from_header (p, s, "off_t", 0, TYPE_MAXIMUM (off_t), FALSE);
}

/* --------------------------------------------------------------------------------------------- */

/**
 * Return the location of the next available input or output block.
 * Return NULL for EOF.
 */
union block *
tar_find_next_block (tar_super_t * archive)
{
    if (current_block == record_end)
    {
        if (hit_eof)
            return NULL;

        if (!tar_flush_archive (archive))
        {
            message (D_ERROR, MSG_ERROR, _("Inconsistent tar archive"));
            return NULL;
        }

        if (current_block == record_end)
        {
            hit_eof = TRUE;
            return NULL;
        }
    }

    return current_block;
}

/* --------------------------------------------------------------------------------------------- */

/**
 * Indicate that we have used all blocks up thru @block.
 */
gboolean
tar_set_next_block_after (union block * block)
{
    while (block >= current_block)
        current_block++;

    /* Do *not* flush the archive here. If we do, the same argument to tar_set_next_block_after()
       could mean the next block (if the input record is exactly one block long), which is not
       what is intended.  */

    return !(current_block > record_end);
}

/* --------------------------------------------------------------------------------------------- */

/**
 * Compute and return the block ordinal at current_block.
 */
off_t
tar_current_block_ordinal (const tar_super_t * archive)
{
    return record_start_block + (current_block - archive->record_start);
}

/* --------------------------------------------------------------------------------------------- */

/**
 * Skip over @size bytes of data in blocks in the archive.
 */
gboolean
tar_skip_file (tar_super_t * archive, off_t size)
{
    union block *x;
    off_t nblk;

    nblk = tar_seek_archive (archive, size);
    if (nblk >= 0)
        size -= nblk * BLOCKSIZE;

    while (size > 0)
    {
        x = tar_find_next_block (archive);
        if (x == NULL)
            return FALSE;

        tar_set_next_block_after (x);
        size -= BLOCKSIZE;
    }

    return TRUE;
}

/* --------------------------------------------------------------------------------------------- */