summaryrefslogtreecommitdiffstats
path: root/wiretap/file_wrappers.c
diff options
context:
space:
mode:
Diffstat (limited to 'wiretap/file_wrappers.c')
-rw-r--r--wiretap/file_wrappers.c2236
1 files changed, 2236 insertions, 0 deletions
diff --git a/wiretap/file_wrappers.c b/wiretap/file_wrappers.c
new file mode 100644
index 00000000..67979a5b
--- /dev/null
+++ b/wiretap/file_wrappers.c
@@ -0,0 +1,2236 @@
+/* file_wrappers.c
+ *
+ * Wiretap Library
+ * Copyright (c) 1998 by Gilbert Ramirez <gram@alumni.rice.edu>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+/* file_access interface based heavily on zlib gzread.c and gzlib.c from zlib
+ * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
+ * under licence:
+ *
+ * SPDX-License-Identifier: Zlib
+ *
+ */
+
+#include "config.h"
+#include "file_wrappers.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include "wtap-int.h"
+
+#include <wsutil/file_util.h>
+
+#ifdef HAVE_ZLIB
+#define ZLIB_CONST
+#include <zlib.h>
+#endif /* HAVE_ZLIB */
+
+#ifdef HAVE_ZSTD
+#include <zstd.h>
+#endif
+
+#ifdef HAVE_LZ4
+#include <lz4.h>
+
+#if LZ4_VERSION_NUMBER >= 10703
+#define USE_LZ4
+#include <lz4frame.h>
+#endif
+#endif
+
+/*
+ * See RFC 1952:
+ *
+ * https://tools.ietf.org/html/rfc1952
+ *
+ * for a description of the gzip file format.
+ *
+ * Some other compressed file formats we might want to support:
+ *
+ * XZ format: https://tukaani.org/xz/
+ *
+ * Bzip2 format: https://www.sourceware.org/bzip2/
+ *
+ * Lzip format: https://www.nongnu.org/lzip/
+ */
+
+/*
+ * List of compression types supported.
+ */
+static struct compression_type {
+ wtap_compression_type type;
+ const char *extension;
+ const char *description;
+} compression_types[] = {
+#ifdef HAVE_ZLIB
+ { WTAP_GZIP_COMPRESSED, "gz", "gzip compressed" },
+#endif
+#ifdef HAVE_ZSTD
+ { WTAP_ZSTD_COMPRESSED, "zst", "zstd compressed" },
+#endif
+#ifdef USE_LZ4
+ { WTAP_LZ4_COMPRESSED, "lz4", "lz4 compressed" },
+#endif
+ { WTAP_UNCOMPRESSED, NULL, NULL }
+};
+
+static wtap_compression_type file_get_compression_type(FILE_T stream);
+
+wtap_compression_type
+wtap_get_compression_type(wtap *wth)
+{
+ return file_get_compression_type((wth->fh == NULL) ? wth->random_fh : wth->fh);
+}
+
+const char *
+wtap_compression_type_description(wtap_compression_type compression_type)
+{
+ for (struct compression_type *p = compression_types;
+ p->type != WTAP_UNCOMPRESSED; p++) {
+ if (p->type == compression_type)
+ return p->description;
+ }
+ return NULL;
+}
+
+const char *
+wtap_compression_type_extension(wtap_compression_type compression_type)
+{
+ for (struct compression_type *p = compression_types;
+ p->type != WTAP_UNCOMPRESSED; p++) {
+ if (p->type == compression_type)
+ return p->extension;
+ }
+ return NULL;
+}
+
+GSList *
+wtap_get_all_compression_type_extensions_list(void)
+{
+ GSList *extensions;
+
+ extensions = NULL; /* empty list, to start with */
+
+ for (struct compression_type *p = compression_types;
+ p->type != WTAP_UNCOMPRESSED; p++)
+ extensions = g_slist_prepend(extensions, (gpointer)p->extension);
+
+ return extensions;
+}
+
+/* #define GZBUFSIZE 8192 */
+#define GZBUFSIZE 4096
+
+/* values for wtap_reader compression */
+typedef enum {
+ UNKNOWN, /* unknown - look for a compression header */
+ UNCOMPRESSED, /* uncompressed - copy input directly */
+ ZLIB, /* decompress a zlib stream */
+ GZIP_AFTER_HEADER,
+ ZSTD,
+ LZ4,
+} compression_t;
+
+/*
+ * We limit the size of our input and output buffers to 2^30 bytes,
+ * because:
+ *
+ * 1) on Windows with MSVC, the return value of _read() is int,
+ * so the biggest read you can do is INT_MAX, and the biggest
+ * power of 2 below that is 2^30;
+ *
+ * 2) the "avail_in" and "avail_out" values in a z_stream structure
+ * in zlib are uInts, and those are unsigned ints, and that
+ * imposes a limit on the buffer size when we're reading a
+ * gzipped file.
+ *
+ * Thus, we use guint for the buffer sizes, offsets, amount available
+ * from the buffer, etc.
+ *
+ * If we want an even bigger buffer for uncompressed data, or for
+ * some other form of compression, then the guint-sized values should
+ * be in structure values used only for reading gzipped files, and
+ * other values should be used for uncompressed data or data
+ * compressed using other algorithms (e.g., in a union).
+ */
+#define MAX_READ_BUF_SIZE (1U << 30)
+
+struct wtap_reader_buf {
+ guint8 *buf; /* buffer */
+ guint8 *next; /* next byte to deliver from buffer */
+ guint avail; /* number of bytes available to deliver at next */
+};
+
+struct wtap_reader {
+ int fd; /* file descriptor */
+ gint64 raw_pos; /* current position in file (just to not call lseek()) */
+ gint64 pos; /* current position in uncompressed data */
+ guint size; /* buffer size */
+
+ struct wtap_reader_buf in; /* input buffer, containing compressed data */
+ struct wtap_reader_buf out; /* output buffer, containing uncompressed data */
+
+ gboolean eof; /* TRUE if end of input file reached */
+ gint64 start; /* where the gzip data started, for rewinding */
+ gint64 raw; /* where the raw data started, for seeking */
+ compression_t compression; /* type of compression, if any */
+ compression_t last_compression; /* last known compression type */
+ gboolean is_compressed; /* FALSE if completely uncompressed, TRUE otherwise */
+
+ /* seek request */
+ gint64 skip; /* amount to skip (already rewound if backwards) */
+ gboolean seek_pending; /* TRUE if seek request pending */
+
+ /* error information */
+ int err; /* error code */
+ const char *err_info; /* additional error information string for some errors */
+
+#ifdef HAVE_ZLIB
+ /* zlib inflate stream */
+ z_stream strm; /* stream structure in-place (not a pointer) */
+ gboolean dont_check_crc; /* TRUE if we aren't supposed to check the CRC */
+#endif
+ /* fast seeking */
+ GPtrArray *fast_seek;
+ void *fast_seek_cur;
+#ifdef HAVE_ZSTD
+ ZSTD_DCtx *zstd_dctx;
+#endif
+#ifdef USE_LZ4
+ LZ4F_dctx *lz4_dctx;
+#endif
+};
+
+/* Current read offset within a buffer. */
+static guint
+offset_in_buffer(struct wtap_reader_buf *buf)
+{
+ /* buf->next points to the next byte to read, and buf->buf points
+ to the first byte in the buffer, so the difference between them
+ is the offset.
+
+ This will fit in an unsigned int, because it can't be bigger
+ than the size of the buffer, which is an unsigned int. */
+ return (guint)(buf->next - buf->buf);
+}
+
+/* Number of bytes of data that are in a buffer. */
+static guint
+bytes_in_buffer(struct wtap_reader_buf *buf)
+{
+ /* buf->next + buf->avail points just past the last byte of data in
+ the buffer.
+ Thus, (buf->next + buf->avail) - buf->buf is the number of bytes
+ of data in the buffer.
+
+ This will fit in an guint, because it can't be bigger
+ than the size of the buffer, which is a guint. */
+ return (guint)((buf->next + buf->avail) - buf->buf);
+}
+
+/* Reset a buffer, discarding all data in the buffer, so we read into
+ it starting at the beginning. */
+static void
+buf_reset(struct wtap_reader_buf *buf)
+{
+ buf->next = buf->buf;
+ buf->avail = 0;
+}
+
+static int
+buf_read(FILE_T state, struct wtap_reader_buf *buf)
+{
+ guint space_left, to_read;
+ unsigned char *read_ptr;
+ ssize_t ret;
+
+ /* How much space is left at the end of the buffer?
+ XXX - the output buffer actually has state->size * 2 bytes. */
+ space_left = state->size - bytes_in_buffer(buf);
+ if (space_left == 0) {
+ /* There's no space left, so we start fresh at the beginning
+ of the buffer. */
+ buf_reset(buf);
+
+ read_ptr = buf->buf;
+ to_read = state->size;
+ } else {
+ /* There's some space left; try to read as much data as we
+ can into that space. We may get less than that if we're
+ reading from a pipe or if we're near the end of the file. */
+ read_ptr = buf->next + buf->avail;
+ to_read = space_left;
+ }
+
+ ret = ws_read(state->fd, read_ptr, to_read);
+ if (ret < 0) {
+ state->err = errno;
+ state->err_info = NULL;
+ return -1;
+ }
+ if (ret == 0)
+ state->eof = TRUE;
+ state->raw_pos += ret;
+ buf->avail += (guint)ret;
+ return 0;
+}
+
+static int /* gz_avail */
+fill_in_buffer(FILE_T state)
+{
+ if (state->err != 0)
+ return -1;
+ if (!state->eof) {
+ if (buf_read(state, &state->in) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+#define ZLIB_WINSIZE 32768
+
+struct fast_seek_point {
+ gint64 out; /* corresponding offset in uncompressed data */
+ gint64 in; /* offset in input file of first full byte */
+
+ compression_t compression;
+ union {
+ struct {
+#ifdef HAVE_INFLATEPRIME
+ int bits; /* number of bits (1-7) from byte at in - 1, or 0 */
+#endif
+ unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
+
+ /* be gentle with Z_STREAM_END, 8 bytes more... Another solution would be to comment checks out */
+ guint32 adler;
+ guint32 total_out;
+ } zlib;
+ } data;
+};
+
+struct zlib_cur_seek_point {
+ unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
+ unsigned int pos;
+ unsigned int have;
+};
+
+#define SPAN G_GINT64_CONSTANT(1048576)
+static struct fast_seek_point *
+fast_seek_find(FILE_T file, gint64 pos)
+{
+ struct fast_seek_point *smallest = NULL;
+ struct fast_seek_point *item;
+ guint low, i, max;
+
+ if (!file->fast_seek)
+ return NULL;
+
+ for (low = 0, max = file->fast_seek->len; low < max; ) {
+ i = (low + max) / 2;
+ item = (struct fast_seek_point *)file->fast_seek->pdata[i];
+
+ if (pos < item->out)
+ max = i;
+ else if (pos > item->out) {
+ smallest = item;
+ low = i + 1;
+ } else {
+ return item;
+ }
+ }
+ return smallest;
+}
+
+static void
+fast_seek_header(FILE_T file, gint64 in_pos, gint64 out_pos,
+ compression_t compression)
+{
+ struct fast_seek_point *item = NULL;
+
+ if (file->fast_seek->len != 0)
+ item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
+
+ if (!item || item->out < out_pos) {
+ struct fast_seek_point *val = g_new(struct fast_seek_point,1);
+ val->in = in_pos;
+ val->out = out_pos;
+ val->compression = compression;
+
+ g_ptr_array_add(file->fast_seek, val);
+ }
+}
+
+static void
+fast_seek_reset(
+#ifdef HAVE_ZLIB
+ FILE_T state)
+#else
+ FILE_T state _U_)
+#endif
+{
+#ifdef HAVE_ZLIB
+ if (state->compression == ZLIB && state->fast_seek_cur != NULL) {
+ struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
+
+ cur->have = 0;
+ }
+#endif
+}
+
+#ifdef HAVE_ZLIB
+
+/* Get next byte from input, or -1 if end or error.
+ *
+ * Note:
+ *
+ * 1) errors from buf_read(), and thus from fill_in_buffer(), are
+ * "sticky", and fill_in_buffer() won't do any reading if there's
+ * an error;
+ *
+ * 2) GZ_GETC() returns -1 on an EOF;
+ *
+ * so it's safe to make multiple GZ_GETC() calls and only check the
+ * last one for an error. */
+#define GZ_GETC() ((state->in.avail == 0 && fill_in_buffer(state) == -1) ? -1 : \
+ (state->in.avail == 0 ? -1 : \
+ (state->in.avail--, *(state->in.next)++)))
+
+/* Get a one-byte integer and return 0 on success and the value in *ret.
+ Otherwise -1 is returned, state->err is set, and *ret is not modified. */
+static int
+gz_next1(FILE_T state, guint8 *ret)
+{
+ int ch;
+
+ ch = GZ_GETC();
+ if (ch == -1) {
+ if (state->err == 0) {
+ /* EOF */
+ state->err = WTAP_ERR_SHORT_READ;
+ state->err_info = NULL;
+ }
+ return -1;
+ }
+ *ret = ch;
+ return 0;
+}
+
+/* Get a two-byte little-endian integer and return 0 on success and the value
+ in *ret. Otherwise -1 is returned, state->err is set, and *ret is not
+ modified. */
+static int
+gz_next2(FILE_T state, guint16 *ret)
+{
+ guint16 val;
+ int ch;
+
+ val = GZ_GETC();
+ ch = GZ_GETC();
+ if (ch == -1) {
+ if (state->err == 0) {
+ /* EOF */
+ state->err = WTAP_ERR_SHORT_READ;
+ state->err_info = NULL;
+ }
+ return -1;
+ }
+ val += (guint16)ch << 8;
+ *ret = val;
+ return 0;
+}
+
+/* Get a four-byte little-endian integer and return 0 on success and the value
+ in *ret. Otherwise -1 is returned, state->err is set, and *ret is not
+ modified. */
+static int
+gz_next4(FILE_T state, guint32 *ret)
+{
+ guint32 val;
+ int ch;
+
+ val = GZ_GETC();
+ val += (unsigned)GZ_GETC() << 8;
+ val += (guint32)GZ_GETC() << 16;
+ ch = GZ_GETC();
+ if (ch == -1) {
+ if (state->err == 0) {
+ /* EOF */
+ state->err = WTAP_ERR_SHORT_READ;
+ state->err_info = NULL;
+ }
+ return -1;
+ }
+ val += (guint32)ch << 24;
+ *ret = val;
+ return 0;
+}
+
+/* Skip the specified number of bytes and return 0 on success. Otherwise -1
+ is returned. */
+static int
+gz_skipn(FILE_T state, size_t n)
+{
+ while (n != 0) {
+ if (GZ_GETC() == -1) {
+ if (state->err == 0) {
+ /* EOF */
+ state->err = WTAP_ERR_SHORT_READ;
+ state->err_info = NULL;
+ }
+ return -1;
+ }
+ n--;
+ }
+ return 0;
+}
+
+/* Skip a null-terminated string and return 0 on success. Otherwise -1
+ is returned. */
+static int
+gz_skipzstr(FILE_T state)
+{
+ int ch;
+
+ /* It's null-terminated, so scan until we read a byte with
+ the value 0 or get an error. */
+ while ((ch = GZ_GETC()) > 0)
+ ;
+ if (ch == -1) {
+ if (state->err == 0) {
+ /* EOF */
+ state->err = WTAP_ERR_SHORT_READ;
+ state->err_info = NULL;
+ }
+ return -1;
+ }
+ return 0;
+}
+
+static void
+zlib_fast_seek_add(FILE_T file, struct zlib_cur_seek_point *point, int bits, gint64 in_pos, gint64 out_pos)
+{
+ /* it's for sure after gzip header, so file->fast_seek->len != 0 */
+ struct fast_seek_point *item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
+
+#ifndef HAVE_INFLATEPRIME
+ if (bits)
+ return;
+#endif
+
+ /* Glib has got Balanced Binary Trees (GTree) but I couldn't find a way to do quick search for nearest (and smaller) value to seek (It's what fast_seek_find() do)
+ * Inserting value in middle of sorted array is expensive, so we want to add only in the end.
+ * It's not big deal, cause first-read don't usually invoke seeking
+ */
+ if (item->out + SPAN < out_pos) {
+ struct fast_seek_point *val = g_new(struct fast_seek_point,1);
+ val->in = in_pos;
+ val->out = out_pos;
+ val->compression = ZLIB;
+#ifdef HAVE_INFLATEPRIME
+ val->data.zlib.bits = bits;
+#endif
+ if (point->pos != 0) {
+ unsigned int left = ZLIB_WINSIZE - point->pos;
+
+ memcpy(val->data.zlib.window, point->window + point->pos, left);
+ memcpy(val->data.zlib.window + left, point->window, point->pos);
+ } else
+ memcpy(val->data.zlib.window, point->window, ZLIB_WINSIZE);
+
+ /*
+ * XXX - strm.adler is a uLong in at least some versions
+ * of zlib, and uLong is an unsigned long in at least
+ * some of those versions, which means it's 64-bit
+ * on LP64 platforms, even though the checksum is
+ * 32-bit. We assume the actual Adler checksum
+ * is in the lower 32 bits of strm.adler; as the
+ * checksum in the file is only 32 bits, we save only
+ * those lower 32 bits, and cast away any additional
+ * bits to squelch warnings.
+ *
+ * The same applies to strm.total_out.
+ */
+ val->data.zlib.adler = (guint32) file->strm.adler;
+ val->data.zlib.total_out = (guint32) file->strm.total_out;
+ g_ptr_array_add(file->fast_seek, val);
+ }
+}
+
+static void /* gz_decomp */
+zlib_read(FILE_T state, unsigned char *buf, unsigned int count)
+{
+ int ret = 0; /* XXX */
+ guint32 crc, len;
+ z_streamp strm = &(state->strm);
+
+ unsigned char *buf2 = buf;
+ unsigned int count2 = count;
+
+ strm->avail_out = count;
+ strm->next_out = buf;
+
+ /* fill output buffer up to end of deflate stream or error */
+ do {
+ /* get more input for inflate() */
+ if (state->in.avail == 0 && fill_in_buffer(state) == -1)
+ break;
+ if (state->in.avail == 0) {
+ /* EOF */
+ state->err = WTAP_ERR_SHORT_READ;
+ state->err_info = NULL;
+ break;
+ }
+
+ strm->avail_in = state->in.avail;
+ strm->next_in = state->in.next;
+ /* decompress and handle errors */
+#ifdef Z_BLOCK
+ ret = inflate(strm, Z_BLOCK);
+#else
+ ret = inflate(strm, Z_NO_FLUSH);
+#endif
+ state->in.avail = strm->avail_in;
+#ifdef z_const
+DIAG_OFF(cast-qual)
+ state->in.next = (unsigned char *)strm->next_in;
+DIAG_ON(cast-qual)
+#else
+ state->in.next = strm->next_in;
+#endif
+ if (ret == Z_STREAM_ERROR) {
+ state->err = WTAP_ERR_DECOMPRESS;
+ state->err_info = strm->msg;
+ break;
+ }
+ if (ret == Z_NEED_DICT) {
+ state->err = WTAP_ERR_DECOMPRESS;
+ state->err_info = "preset dictionary needed";
+ break;
+ }
+ if (ret == Z_MEM_ERROR) {
+ /* This means "not enough memory". */
+ state->err = ENOMEM;
+ state->err_info = NULL;
+ break;
+ }
+ if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
+ state->err = WTAP_ERR_DECOMPRESS;
+ state->err_info = strm->msg;
+ break;
+ }
+ /*
+ * XXX - Z_BUF_ERROR?
+ */
+
+ strm->adler = crc32(strm->adler, buf2, count2 - strm->avail_out);
+#ifdef Z_BLOCK
+ if (state->fast_seek_cur != NULL) {
+ struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
+ unsigned int ready = count2 - strm->avail_out;
+
+ if (ready < ZLIB_WINSIZE) {
+ guint left = ZLIB_WINSIZE - cur->pos;
+
+ if (ready >= left) {
+ memcpy(cur->window + cur->pos, buf2, left);
+ if (ready != left)
+ memcpy(cur->window, buf2 + left, ready - left);
+
+ cur->pos = ready - left;
+ cur->have += ready;
+ } else {
+ memcpy(cur->window + cur->pos, buf2, ready);
+ cur->pos += ready;
+ cur->have += ready;
+ }
+
+ if (cur->have >= ZLIB_WINSIZE)
+ cur->have = ZLIB_WINSIZE;
+
+ } else {
+ memcpy(cur->window, buf2 + (ready - ZLIB_WINSIZE), ZLIB_WINSIZE);
+ cur->pos = 0;
+ cur->have = ZLIB_WINSIZE;
+ }
+
+ if (cur->have >= ZLIB_WINSIZE && ret != Z_STREAM_END && (strm->data_type & 128) && !(strm->data_type & 64))
+ zlib_fast_seek_add(state, cur, (strm->data_type & 7), state->raw_pos - strm->avail_in, state->pos + (count - strm->avail_out));
+ }
+#endif
+ buf2 = (buf2 + count2 - strm->avail_out);
+ count2 = strm->avail_out;
+
+ } while (strm->avail_out && ret != Z_STREAM_END);
+
+ /* update available output and crc check value */
+ state->out.next = buf;
+ state->out.avail = count - strm->avail_out;
+
+ /* Check gzip trailer if at end of deflate stream.
+ We don't fail immediately here, we just set an error
+ indication, so that we try to process what data we
+ got before the error. The next attempt to read
+ something past that data will get the error. */
+ if (ret == Z_STREAM_END) {
+ if (gz_next4(state, &crc) != -1 &&
+ gz_next4(state, &len) != -1) {
+ if (crc != strm->adler && !state->dont_check_crc) {
+ state->err = WTAP_ERR_DECOMPRESS;
+ state->err_info = "bad CRC";
+ } else if (len != (strm->total_out & 0xffffffffUL)) {
+ state->err = WTAP_ERR_DECOMPRESS;
+ state->err_info = "length field wrong";
+ }
+ }
+ state->last_compression = state->compression;
+ state->compression = UNKNOWN; /* ready for next stream, once have is 0 */
+ g_free(state->fast_seek_cur);
+ state->fast_seek_cur = NULL;
+ }
+}
+#endif
+
+static int
+gz_head(FILE_T state)
+{
+ guint already_read;
+
+ /* get some data in the input buffer */
+ if (state->in.avail == 0) {
+ if (fill_in_buffer(state) == -1)
+ return -1;
+ if (state->in.avail == 0)
+ return 0;
+ }
+
+ /* look for the gzip magic header bytes 31 and 139 */
+ if (state->in.next[0] == 31) {
+ state->in.avail--;
+ state->in.next++;
+
+ /* Make sure the byte after the first byte is present */
+ if (state->in.avail == 0 && fill_in_buffer(state) == -1) {
+ /* Read error. */
+ return -1;
+ }
+ if (state->in.avail != 0) {
+ if (state->in.next[0] == 139) {
+ /*
+ * We have what looks like the ID1 and ID2 bytes of a gzip
+ * header.
+ * Continue processing the file.
+ *
+ * XXX - some capture file formats (I'M LOOKING AT YOU,
+ * ENDACE!) can have 31 in the first byte of the file
+ * and 139 in the second byte of the file. For now, in
+ * those cases, you lose.
+ */
+#ifdef HAVE_ZLIB
+ guint8 cm;
+ guint8 flags;
+ guint16 len;
+ guint16 hcrc;
+
+ state->in.avail--;
+ state->in.next++;
+
+ /* read rest of header */
+
+ /* compression method (CM) */
+ if (gz_next1(state, &cm) == -1)
+ return -1;
+ if (cm != 8) {
+ state->err = WTAP_ERR_DECOMPRESS;
+ state->err_info = "unknown compression method";
+ return -1;
+ }
+
+ /* flags (FLG) */
+ if (gz_next1(state, &flags) == -1) {
+ /* Read error. */
+ return -1;
+ }
+ if (flags & 0xe0) { /* reserved flag bits */
+ state->err = WTAP_ERR_DECOMPRESS;
+ state->err_info = "reserved flag bits set";
+ return -1;
+ }
+
+ /* modification time (MTIME) */
+ if (gz_skipn(state, 4) == -1) {
+ /* Read error. */
+ return -1;
+ }
+
+ /* extra flags (XFL) */
+ if (gz_skipn(state, 1) == -1) {
+ /* Read error. */
+ return -1;
+ }
+
+ /* operating system (OS) */
+ if (gz_skipn(state, 1) == -1) {
+ /* Read error. */
+ return -1;
+ }
+
+ if (flags & 4) {
+ /* extra field - get XLEN */
+ if (gz_next2(state, &len) == -1) {
+ /* Read error. */
+ return -1;
+ }
+
+ /* skip the extra field */
+ if (gz_skipn(state, len) == -1) {
+ /* Read error. */
+ return -1;
+ }
+ }
+ if (flags & 8) {
+ /* file name */
+ if (gz_skipzstr(state) == -1) {
+ /* Read error. */
+ return -1;
+ }
+ }
+ if (flags & 16) {
+ /* comment */
+ if (gz_skipzstr(state) == -1) {
+ /* Read error. */
+ return -1;
+ }
+ }
+ if (flags & 2) {
+ /* header crc */
+ if (gz_next2(state, &hcrc) == -1) {
+ /* Read error. */
+ return -1;
+ }
+ /* XXX - check the CRC? */
+ }
+
+ /* set up for decompression */
+ inflateReset(&(state->strm));
+ state->strm.adler = crc32(0L, Z_NULL, 0);
+ state->compression = ZLIB;
+ state->is_compressed = TRUE;
+#ifdef Z_BLOCK
+ if (state->fast_seek) {
+ struct zlib_cur_seek_point *cur = g_new(struct zlib_cur_seek_point,1);
+
+ cur->pos = cur->have = 0;
+ g_free(state->fast_seek_cur);
+ state->fast_seek_cur = cur;
+ fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, GZIP_AFTER_HEADER);
+ }
+#endif /* Z_BLOCK */
+ return 0;
+#else /* HAVE_ZLIB */
+ state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
+ state->err_info = "reading gzip-compressed files isn't supported";
+ return -1;
+#endif /* HAVE_ZLIB */
+ }
+
+ /*
+ * Not a gzip file. "Unget" the first character; either:
+ *
+ * 1) we read both of the first two bytes into the
+ * buffer with the first ws_read, so we can just back
+ * up by one byte;
+ *
+ * 2) we only read the first byte into the buffer with
+ * the first ws_read (e.g., because we're reading from
+ * a pipe and only the first byte had been written to
+ * the pipe at that point), and read the second byte
+ * into the buffer after the first byte in the
+ * fill_in_buffer call, so we now have two bytes in
+ * the buffer, and can just back up by one byte.
+ */
+ state->in.avail++;
+ state->in.next--;
+ }
+ }
+#ifdef HAVE_LIBXZ
+ /* { 0xFD, '7', 'z', 'X', 'Z', 0x00 } */
+ /* FD 37 7A 58 5A 00 */
+#endif
+
+ if (state->in.avail >= 4
+ && state->in.buf[0] == 0x28 && state->in.buf[1] == 0xb5
+ && state->in.buf[2] == 0x2f && state->in.buf[3] == 0xfd) {
+#ifdef HAVE_ZSTD
+ const size_t ret = ZSTD_initDStream(state->zstd_dctx);
+ if (ZSTD_isError(ret)) {
+ state->err = WTAP_ERR_DECOMPRESS;
+ state->err_info = ZSTD_getErrorName(ret);
+ return -1;
+ }
+
+ state->compression = ZSTD;
+ state->is_compressed = TRUE;
+ return 0;
+#else
+ state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
+ state->err_info = "reading zstd-compressed files isn't supported";
+ return -1;
+#endif
+ }
+
+ if (state->in.avail >= 4
+ && state->in.buf[0] == 0x04 && state->in.buf[1] == 0x22
+ && state->in.buf[2] == 0x4d && state->in.buf[3] == 0x18) {
+#ifdef USE_LZ4
+#if LZ4_VERSION_NUMBER >= 10800
+ LZ4F_resetDecompressionContext(state->lz4_dctx);
+#else
+ LZ4F_freeDecompressionContext(state->lz4_dctx);
+ const LZ4F_errorCode_t ret = LZ4F_createDecompressionContext(&state->lz4_dctx, LZ4F_VERSION);
+ if (LZ4F_isError(ret)) {
+ state->err = WTAP_ERR_INTERNAL;
+ state->err_info = LZ4F_getErrorName(ret);
+ return -1;
+ }
+#endif
+ state->compression = LZ4;
+ state->is_compressed = TRUE;
+ return 0;
+#else
+ state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
+ state->err_info = "reading lz4-compressed files isn't supported";
+ return -1;
+#endif
+ }
+
+ if (state->fast_seek)
+ fast_seek_header(state, state->raw_pos - state->in.avail - state->out.avail, state->pos, UNCOMPRESSED);
+
+ /* doing raw i/o, save start of raw data for seeking, copy any leftover
+ input to output -- this assumes that the output buffer is larger than
+ the input buffer, which also assures space for gzungetc() */
+ state->raw = state->pos;
+ state->out.next = state->out.buf;
+ /* not a compressed file -- copy everything we've read into the
+ input buffer to the output buffer and fall to raw i/o */
+ already_read = bytes_in_buffer(&state->in);
+ if (already_read != 0) {
+ memcpy(state->out.buf, state->in.buf, already_read);
+ state->out.avail = already_read;
+
+ /* Now discard everything in the input buffer */
+ buf_reset(&state->in);
+ }
+ state->compression = UNCOMPRESSED;
+ return 0;
+}
+
+static int /* gz_make */
+fill_out_buffer(FILE_T state)
+{
+ if (state->compression == UNKNOWN) { /* look for compression header */
+ if (gz_head(state) == -1)
+ return -1;
+ if (state->out.avail != 0) /* got some data from gz_head() */
+ return 0;
+ }
+ if (state->compression == UNCOMPRESSED) { /* straight copy */
+ if (buf_read(state, &state->out) < 0)
+ return -1;
+ }
+#ifdef HAVE_ZLIB
+ else if (state->compression == ZLIB) { /* decompress */
+ zlib_read(state, state->out.buf, state->size << 1);
+ }
+#endif
+#ifdef HAVE_ZSTD
+ else if (state->compression == ZSTD) {
+ ws_assert(state->out.avail == 0);
+
+ if (state->in.avail == 0 && fill_in_buffer(state) == -1)
+ return -1;
+
+ ZSTD_outBuffer output = {state->out.buf, state->size << 1, 0};
+ ZSTD_inBuffer input = {state->in.next, state->in.avail, 0};
+ const size_t ret = ZSTD_decompressStream(state->zstd_dctx, &output, &input);
+ if (ZSTD_isError(ret)) {
+ state->err = WTAP_ERR_DECOMPRESS;
+ state->err_info = ZSTD_getErrorName(ret);
+ return -1;
+ }
+
+ state->in.next = state->in.next + input.pos;
+ state->in.avail -= (guint)input.pos;
+
+ state->out.next = output.dst;
+ state->out.avail = (guint)output.pos;
+
+ if (ret == 0) {
+ state->last_compression = state->compression;
+ state->compression = UNKNOWN;
+ }
+ }
+#endif
+#ifdef USE_LZ4
+ else if (state->compression == LZ4) {
+ ws_assert(state->out.avail == 0);
+
+ if (state->in.avail == 0 && fill_in_buffer(state) == -1)
+ return -1;
+
+ size_t outBufSize = state->size << 1;
+ size_t inBufSize = state->in.avail;
+ const size_t ret = LZ4F_decompress(state->lz4_dctx, state->out.buf, &outBufSize, state->in.next, &inBufSize, NULL);
+ if (LZ4F_isError(ret)) {
+ state->err = WTAP_ERR_DECOMPRESS;
+ state->err_info = LZ4F_getErrorName(ret);
+ return -1;
+ }
+
+ /*
+ * We assume LZ4F_decompress() will not set inBufSize to a
+ * value > state->in.avail.
+ */
+ state->in.next = state->in.next + inBufSize;
+ state->in.avail -= (guint)inBufSize;
+
+ state->out.next = state->out.buf;
+ state->out.avail = (guint)outBufSize;
+
+ if (ret == 0) {
+ state->last_compression = state->compression;
+ state->compression = UNKNOWN;
+ }
+ }
+#endif
+ return 0;
+}
+
+static int
+gz_skip(FILE_T state, gint64 len)
+{
+ guint n;
+
+ /* skip over len bytes or reach end-of-file, whichever comes first */
+ while (len)
+ if (state->out.avail != 0) {
+ /* We have stuff in the output buffer; skip over
+ it. */
+ n = (gint64)state->out.avail > len ? (unsigned)len : state->out.avail;
+ state->out.avail -= n;
+ state->out.next += n;
+ state->pos += n;
+ len -= n;
+ } else if (state->err != 0) {
+ /* We have nothing in the output buffer, and
+ we have an error that may not have been
+ reported yet; that means we can't generate
+ any more data into the output buffer, so
+ return an error indication. */
+ return -1;
+ } else if (state->eof && state->in.avail == 0) {
+ /* We have nothing in the output buffer, and
+ we're at the end of the input; just return. */
+ break;
+ } else {
+ /* We have nothing in the output buffer, and
+ we can generate more data; get more output,
+ looking for header if required. */
+ if (fill_out_buffer(state) == -1)
+ return -1;
+ }
+ return 0;
+}
+
+static void
+gz_reset(FILE_T state)
+{
+ buf_reset(&state->out); /* no output data available */
+ state->eof = FALSE; /* not at end of file */
+ state->compression = UNKNOWN; /* look for compression header */
+
+ state->seek_pending = FALSE; /* no seek request pending */
+ state->err = 0; /* clear error */
+ state->err_info = NULL;
+ state->pos = 0; /* no uncompressed data yet */
+ buf_reset(&state->in); /* no input data yet */
+}
+
+FILE_T
+file_fdopen(int fd)
+{
+ /*
+ * XXX - we now check whether we have st_blksize in struct stat;
+ * it's not available on all platforms.
+ *
+ * I'm not sure why we're testing _STATBUF_ST_BLKSIZE; it's not
+ * set on all platforms that have st_blksize in struct stat.
+ * (Not all platforms have st_blksize in struct stat.)
+ *
+ * Is there some reason *not* to make the buffer size the maximum
+ * of GBUFSIZE and st_blksize? On most UN*Xes, the standard I/O
+ * library does I/O with st_blksize as the buffer size; on others,
+ * and on Windows, it's a 4K buffer size. If st_blksize is bigger
+ * than GBUFSIZE (which is currently 4KB), that's probably a
+ * hint that reading in st_blksize chunks is considered a good
+ * idea (e.g., an 8K/1K Berkeley fast file system with st_blksize
+ * being 8K, or APFS, where st_blksize is big on at least some
+ * versions of macOS).
+ */
+#ifdef _STATBUF_ST_BLKSIZE
+ ws_statb64 st;
+#endif
+#ifdef HAVE_ZSTD
+ size_t zstd_buf_size;
+#endif
+ guint want = GZBUFSIZE;
+ FILE_T state;
+#ifdef USE_LZ4
+ size_t ret;
+#endif
+
+ if (fd == -1)
+ return NULL;
+
+ /* allocate FILE_T structure to return */
+ state = (FILE_T)g_try_malloc0(sizeof *state);
+ if (state == NULL)
+ return NULL;
+
+ state->fast_seek_cur = NULL;
+ state->fast_seek = NULL;
+
+ /* open the file with the appropriate mode (or just use fd) */
+ state->fd = fd;
+
+ /* we don't yet know whether it's compressed */
+ state->is_compressed = FALSE;
+ state->last_compression = UNKNOWN;
+
+ /* save the current position for rewinding (only if reading) */
+ state->start = ws_lseek64(state->fd, 0, SEEK_CUR);
+ if (state->start == -1) state->start = 0;
+ state->raw_pos = state->start;
+
+ /* initialize stream */
+ gz_reset(state);
+
+#ifdef _STATBUF_ST_BLKSIZE
+ /*
+ * See what I/O size the file system recommends using, and if
+ * it's bigger than what we're using and isn't too big, use
+ * it.
+ */
+ if (ws_fstat64(fd, &st) >= 0) {
+ /*
+ * Yes, st_blksize can be bigger than an int; apparently,
+ * it's a long on LP64 Linux, for example.
+ *
+ * If the value is too big to fit into a guint,
+ * just use the maximum read buffer size.
+ *
+ * On top of that, the Single UNIX Speification says that
+ * st_blksize is of type blksize_t, which is a *signed*
+ * integer type, and, at minimum, macOS 11.6 and Linux 5.14.11's
+ * include/uapi/asm-generic/stat.h define it as such.
+ *
+ * However, other OSes might make it unsigned, and older versions
+ * of OSes that currently make it signed might make it unsigned,
+ * so we try to avoid warnings from that.
+ *
+ * We cast MAX_READ_BUF_SIZE to long in order to avoid the
+ * warning, although it might introduce warnings on platforms
+ * where st_blocksize is unsigned; we'll deal with that if
+ * it ever shows up as an issue.
+ *
+ * MAX_READ_BUF_SIZE is < the largest *signed* 32-bt integer,
+ * so casting it to long won't turn it into a negative number.
+ * (We only support 32-bit and 64-bit 2's-complement platforms.)
+ */
+ if (st.st_blksize <= (long)MAX_READ_BUF_SIZE)
+ want = (guint)st.st_blksize;
+ else
+ want = MAX_READ_BUF_SIZE;
+ /* XXX, verify result? */
+ }
+#endif
+#ifdef HAVE_ZSTD
+ /* we should have separate input and output buf sizes */
+ zstd_buf_size = ZSTD_DStreamInSize();
+ if (zstd_buf_size > want) {
+ if (zstd_buf_size <= MAX_READ_BUF_SIZE)
+ want = (guint)zstd_buf_size;
+ else
+ want = MAX_READ_BUF_SIZE;
+ }
+ zstd_buf_size = ZSTD_DStreamOutSize();
+ if (zstd_buf_size > want) {
+ if (zstd_buf_size <= MAX_READ_BUF_SIZE)
+ want = (guint)zstd_buf_size;
+ else
+ want = MAX_READ_BUF_SIZE;
+ }
+#endif
+ /* allocate buffers */
+ state->in.buf = (unsigned char *)g_try_malloc(want);
+ state->in.next = state->in.buf;
+ state->in.avail = 0;
+ state->out.buf = (unsigned char *)g_try_malloc(want << 1);
+ state->out.next = state->out.buf;
+ state->out.avail = 0;
+ state->size = want;
+ if (state->in.buf == NULL || state->out.buf == NULL) {
+ goto err;
+ }
+
+#ifdef HAVE_ZLIB
+ /* allocate inflate memory */
+ state->strm.zalloc = Z_NULL;
+ state->strm.zfree = Z_NULL;
+ state->strm.opaque = Z_NULL;
+ state->strm.avail_in = 0;
+ state->strm.next_in = Z_NULL;
+ if (inflateInit2(&(state->strm), -15) != Z_OK) { /* raw inflate */
+ goto err;
+ }
+
+ /* for now, assume we should check the crc */
+ state->dont_check_crc = FALSE;
+#endif
+
+#ifdef HAVE_ZSTD
+ state->zstd_dctx = ZSTD_createDCtx();
+ if (state->zstd_dctx == NULL) {
+ goto err;
+ }
+#endif
+
+#ifdef USE_LZ4
+ ret = LZ4F_createDecompressionContext(&state->lz4_dctx, LZ4F_VERSION);
+ if (LZ4F_isError(ret)) {
+ goto err;
+ }
+#endif
+
+ /* return stream */
+ return state;
+
+err:
+#ifdef HAVE_ZLIB
+ inflateEnd(&state->strm);
+#endif
+#ifdef HAVE_ZSTD
+ ZSTD_freeDCtx(state->zstd_dctx);
+#endif
+#ifdef USE_LZ4
+ LZ4F_freeDecompressionContext(state->lz4_dctx);
+#endif
+ g_free(state->out.buf);
+ g_free(state->in.buf);
+ g_free(state);
+ errno = ENOMEM;
+ return NULL;
+}
+
+FILE_T
+file_open(const char *path)
+{
+ int fd;
+ FILE_T ft;
+#ifdef HAVE_ZLIB
+ const char *suffixp;
+#endif
+
+ /* open file and do correct filename conversions.
+
+ XXX - do we need O_LARGEFILE? On UN*X, if we need to do
+ something special to get large file support, the configure
+ script should have set us up with the appropriate #defines,
+ so we should be getting a large-file-enabled file descriptor
+ here. Pre-Large File Summit UN*Xes, and possibly even some
+ post-LFS UN*Xes, might require O_LARGEFILE here, though.
+ If so, we should probably handle that in ws_open(). */
+ if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1)
+ return NULL;
+
+ /* open file handle */
+ ft = file_fdopen(fd);
+ if (ft == NULL) {
+ ws_close(fd);
+ return NULL;
+ }
+
+#ifdef HAVE_ZLIB
+ /*
+ * If this file's name ends in ".caz", it's probably a compressed
+ * Windows Sniffer file. The compression is gzip, but if we
+ * process the CRC as specified by RFC 1952, the computed CRC
+ * doesn't match the stored CRC.
+ *
+ * Compressed Windows Sniffer files don't all have the same CRC
+ * value; is it just random crap, or are they running the CRC on
+ * a different set of data than you're supposed to (e.g., not
+ * CRCing some of the data), or something such as that?
+ *
+ * For now, we just set a flag to ignore CRC errors.
+ */
+ suffixp = strrchr(path, '.');
+ if (suffixp != NULL) {
+ if (g_ascii_strcasecmp(suffixp, ".caz") == 0)
+ ft->dont_check_crc = TRUE;
+ }
+#endif
+
+ return ft;
+}
+
+void
+file_set_random_access(FILE_T stream, gboolean random_flag _U_, GPtrArray *seek)
+{
+ stream->fast_seek = seek;
+}
+
+gint64
+file_seek(FILE_T file, gint64 offset, int whence, int *err)
+{
+ struct fast_seek_point *here;
+ guint n;
+
+ if (whence != SEEK_SET && whence != SEEK_CUR && whence != SEEK_END) {
+ ws_assert_not_reached();
+/*
+ *err = EINVAL;
+ return -1;
+*/
+ }
+
+ /* Normalize offset to a SEEK_CUR specification */
+ if (whence == SEEK_END) {
+ /* Seek relative to the end of the file; given that we might be
+ reading from a compressed file, we do that by seeking to the
+ end of the file, making an offset relative to the end of
+ the file an offset relative to the current position.
+
+ XXX - we don't actually use this yet, but, for uncompressed
+ files, we could optimize it, if desired, by directly using
+ ws_lseek64(). */
+ if (gz_skip(file, G_MAXINT64) == -1) {
+ *err = file->err;
+ return -1;
+ }
+ if (offset == 0) {
+ /* We are done */
+ return file->pos;
+ }
+ } else if (whence == SEEK_SET)
+ offset -= file->pos;
+ else if (file->seek_pending) {
+ /* There's a forward-skip pending, so file->pos doesn't reflect
+ the actual file position, it represents the position from
+ which we're skipping; update the offset to include that. */
+ offset += file->skip;
+ }
+ file->seek_pending = FALSE;
+
+ /*
+ * Are we moving at all?
+ */
+ if (offset == 0) {
+ /* No. Just return the current position. */
+ return file->pos;
+ }
+
+ /*
+ * Are we seeking backwards?
+ */
+ if (offset < 0) {
+ /*
+ * Yes.
+ *
+ * Do we have enough data before the current position in the
+ * buffer that we can seek backwards within the buffer?
+ */
+ if (-offset <= offset_in_buffer(&file->out)) {
+ /*
+ * Yes. Adjust appropriately.
+ *
+ * offset is negative, so -offset is non-negative, and
+ * -offset is <= an unsigned and thus fits in an unsigned.
+ * Get that value and adjust appropriately.
+ *
+ * (Casting offset to unsigned makes it positive, which
+ * is not what we would want, so we cast -offset instead.)
+ *
+ * XXX - this won't work with -offset = 2^63, as its
+ * negative isn't a valid 64-bit integer, but we are
+ * not at all likely to see files big enough to ever
+ * see a negative offset that large.
+ */
+ guint adjustment = (unsigned)(-offset);
+
+ file->out.avail += adjustment;
+ file->out.next -= adjustment;
+ file->pos -= adjustment;
+ return file->pos;
+ }
+ } else {
+ /*
+ * No. Offset is positive; we're seeking forwards.
+ *
+ * Do we have enough data after the current position in the
+ * buffer that we can seek forwards within the buffer?
+ */
+ if (offset < file->out.avail) {
+ /*
+ * Yes. Adjust appropriately.
+ *
+ * offset is < an unsigned and thus fits in an unsigned,
+ * so we can cast it to guint safely.
+ */
+ file->out.avail -= (guint)offset;
+ file->out.next += offset;
+ file->pos += offset;
+ return file->pos;
+ }
+ }
+
+ /*
+ * We're not seeking within the buffer. Do we have "fast seek" data
+ * for the location to which we will be seeking, and is the offset
+ * outside the span for compressed files or is this an uncompressed
+ * file?
+ *
+ * XXX, profile
+ */
+ if ((here = fast_seek_find(file, file->pos + offset)) &&
+ (offset < 0 || offset > SPAN || here->compression == UNCOMPRESSED)) {
+ gint64 off, off2;
+
+ /*
+ * Yes. Use that data to do the seek.
+ * Note that this will be true only if file_set_random_access()
+ * has been called on this file, which should never be the case
+ * for a pipe.
+ */
+#ifdef HAVE_ZLIB
+ if (here->compression == ZLIB) {
+#ifdef HAVE_INFLATEPRIME
+ off = here->in - (here->data.zlib.bits ? 1 : 0);
+#else
+ off = here->in;
+#endif
+ off2 = here->out;
+ } else if (here->compression == GZIP_AFTER_HEADER) {
+ off = here->in;
+ off2 = here->out;
+ } else
+#endif
+ {
+ off2 = (file->pos + offset);
+ off = here->in + (off2 - here->out);
+ }
+
+ if (ws_lseek64(file->fd, off, SEEK_SET) == -1) {
+ *err = errno;
+ return -1;
+ }
+ fast_seek_reset(file);
+
+ file->raw_pos = off;
+ buf_reset(&file->out);
+ file->eof = FALSE;
+ file->seek_pending = FALSE;
+ file->err = 0;
+ file->err_info = NULL;
+ buf_reset(&file->in);
+
+#ifdef HAVE_ZLIB
+ if (here->compression == ZLIB) {
+ z_stream *strm = &file->strm;
+
+ inflateReset(strm);
+ strm->adler = here->data.zlib.adler;
+ strm->total_out = here->data.zlib.total_out;
+#ifdef HAVE_INFLATEPRIME
+ if (here->data.zlib.bits) {
+ FILE_T state = file;
+ int ret = GZ_GETC();
+
+ if (ret == -1) {
+ if (state->err == 0) {
+ /* EOF */
+ *err = WTAP_ERR_SHORT_READ;
+ } else
+ *err = state->err;
+ return -1;
+ }
+ (void)inflatePrime(strm, here->data.zlib.bits, ret >> (8 - here->data.zlib.bits));
+ }
+#endif
+ (void)inflateSetDictionary(strm, here->data.zlib.window, ZLIB_WINSIZE);
+ file->compression = ZLIB;
+ } else if (here->compression == GZIP_AFTER_HEADER) {
+ z_stream *strm = &file->strm;
+
+ inflateReset(strm);
+ strm->adler = crc32(0L, Z_NULL, 0);
+ file->compression = ZLIB;
+ } else
+#endif
+ file->compression = here->compression;
+
+ offset = (file->pos + offset) - off2;
+ file->pos = off2;
+ /* g_print("OK! %ld\n", offset); */
+
+ if (offset) {
+ /* Don't skip forward yet, wait until we want to read from
+ the file; that way, if we do multiple seeks in a row,
+ all involving forward skips, they will be combined. */
+ file->seek_pending = TRUE;
+ file->skip = offset;
+ }
+ return file->pos + offset;
+ }
+
+ /*
+ * Is this an uncompressed file, are we within the raw area,
+ * are we either seeking backwards or seeking past the end
+ * of the buffer, and are we set up for random access with
+ * file_set_random_access()?
+ *
+ * Again, note that this will never be true on a pipe, as
+ * file_set_random_access() should never be called if we're
+ * reading from a pipe.
+ */
+ if (file->compression == UNCOMPRESSED && file->pos + offset >= file->raw
+ && (offset < 0 || offset >= file->out.avail)
+ && (file->fast_seek != NULL))
+ {
+ /*
+ * Yes. Just seek there within the file.
+ */
+ if (ws_lseek64(file->fd, offset - file->out.avail, SEEK_CUR) == -1) {
+ *err = errno;
+ return -1;
+ }
+ file->raw_pos += (offset - file->out.avail);
+ buf_reset(&file->out);
+ file->eof = FALSE;
+ file->seek_pending = FALSE;
+ file->err = 0;
+ file->err_info = NULL;
+ buf_reset(&file->in);
+ file->pos += offset;
+ return file->pos;
+ }
+
+ /*
+ * Are we seeking backwards?
+ */
+ if (offset < 0) {
+ /*
+ * Yes. We have no fast seek data, so we have to rewind and
+ * seek forward.
+ * XXX - true only for compressed files.
+ *
+ * Calculate the amount to skip forward after rewinding.
+ */
+ offset += file->pos;
+ if (offset < 0) { /* before start of file! */
+ *err = EINVAL;
+ return -1;
+ }
+ /* rewind, then skip to offset */
+
+ /* back up and start over */
+ if (ws_lseek64(file->fd, file->start, SEEK_SET) == -1) {
+ *err = errno;
+ return -1;
+ }
+ fast_seek_reset(file);
+ file->raw_pos = file->start;
+ gz_reset(file);
+ }
+
+ /*
+ * Either we're seeking backwards, but have rewound and now need to
+ * skip forwards, or we're seeking forwards.
+ *
+ * Skip what's in output buffer (one less gzgetc() check).
+ */
+ n = (gint64)file->out.avail > offset ? (unsigned)offset : file->out.avail;
+ file->out.avail -= n;
+ file->out.next += n;
+ file->pos += n;
+ offset -= n;
+
+ /* request skip (if not zero) */
+ if (offset) {
+ /* Don't skip forward yet, wait until we want to read from
+ the file; that way, if we do multiple seeks in a row,
+ all involving forward skips, they will be combined. */
+ file->seek_pending = TRUE;
+ file->skip = offset;
+ }
+ return file->pos + offset;
+}
+
+gint64
+file_tell(FILE_T stream)
+{
+ /* return position */
+ return stream->pos + (stream->seek_pending ? stream->skip : 0);
+}
+
+gint64
+file_tell_raw(FILE_T stream)
+{
+ return stream->raw_pos;
+}
+
+int
+file_fstat(FILE_T stream, ws_statb64 *statb, int *err)
+{
+ if (ws_fstat64(stream->fd, statb) == -1) {
+ if (err != NULL)
+ *err = errno;
+ return -1;
+ }
+ return 0;
+}
+
+gboolean
+file_iscompressed(FILE_T stream)
+{
+ return stream->is_compressed;
+}
+
+/* Returns a wtap compression type. If we don't know the compression type,
+ * return WTAP_UNCOMPRESSED, but if our compression state is temporarily
+ * UNKNOWN because we need to reread compression headers, return the last
+ * known compression type.
+ */
+static wtap_compression_type
+file_get_compression_type(FILE_T stream)
+{
+ if (stream->is_compressed) {
+ switch ((stream->compression == UNKNOWN) ? stream->last_compression : stream->compression) {
+
+ case ZLIB:
+ case GZIP_AFTER_HEADER:
+ return WTAP_GZIP_COMPRESSED;
+
+ case ZSTD:
+ return WTAP_ZSTD_COMPRESSED;
+
+ case LZ4:
+ return WTAP_LZ4_COMPRESSED;
+
+ case UNCOMPRESSED:
+ return WTAP_UNCOMPRESSED;
+
+ default: /* UNKNOWN, should never happen if is_compressed is set */
+ ws_assert_not_reached();
+ return WTAP_UNCOMPRESSED;
+ }
+ }
+ return WTAP_UNCOMPRESSED;
+}
+
+int
+file_read(void *buf, unsigned int len, FILE_T file)
+{
+ guint got, n;
+
+ /* if len is zero, avoid unnecessary operations */
+ if (len == 0)
+ return 0;
+
+ /* process a skip request */
+ if (file->seek_pending) {
+ file->seek_pending = FALSE;
+ if (gz_skip(file, file->skip) == -1)
+ return -1;
+ }
+
+ /*
+ * Get len bytes to buf, or less than len if at the end;
+ * if buf is null, just throw the bytes away.
+ */
+ got = 0;
+ do {
+ if (file->out.avail != 0) {
+ /* We have stuff in the output buffer; copy
+ what we have. */
+ n = file->out.avail > len ? len : file->out.avail;
+ if (buf != NULL) {
+ memcpy(buf, file->out.next, n);
+ buf = (char *)buf + n;
+ }
+ file->out.next += n;
+ file->out.avail -= n;
+ len -= n;
+ got += n;
+ file->pos += n;
+ } else if (file->err != 0) {
+ /* We have nothing in the output buffer, and
+ we have an error that may not have been
+ reported yet; that means we can't generate
+ any more data into the output buffer, so
+ return an error indication. */
+ return -1;
+ } else if (file->eof && file->in.avail == 0) {
+ /* We have nothing in the output buffer, and
+ we're at the end of the input; just return
+ with what we've gotten so far. */
+ break;
+ } else {
+ /* We have nothing in the output buffer, and
+ we can generate more data; get more output,
+ looking for header if required, and
+ keep looping to process the new stuff
+ in the output buffer. */
+ if (fill_out_buffer(file) == -1)
+ return -1;
+ }
+ } while (len);
+
+ return (int)got;
+}
+
+/*
+ * XXX - this *peeks* at next byte, not a character.
+ */
+int
+file_peekc(FILE_T file)
+{
+ int ret = 0;
+
+ /* check that we're reading and that there's no error */
+ if (file->err != 0)
+ return -1;
+
+ /* try output buffer (no need to check for skip request) */
+ if (file->out.avail != 0) {
+ return *(file->out.next);
+ }
+
+ /* process a skip request */
+ if (file->seek_pending) {
+ file->seek_pending = FALSE;
+ if (gz_skip(file, file->skip) == -1)
+ return -1;
+ }
+ /* if we processed a skip request, there may be data in the buffer,
+ * or an error could have occurred; likewise if we didn't do seek but
+ * now call fill_out_buffer, the errors can occur. So we do this while
+ * loop to check before and after - this is basically the logic from
+ * file_read() but only for peeking not consuming a byte
+ */
+ while (1) {
+ if (file->out.avail != 0) {
+ return *(file->out.next);
+ }
+ else if (file->err != 0) {
+ return -1;
+ }
+ else if (file->eof && file->in.avail == 0) {
+ return -1;
+ }
+ else if (fill_out_buffer(file) == -1) {
+ return -1;
+ }
+ }
+ /* it's actually impossible to get here */
+ return ret;
+}
+
+/*
+ * XXX - this gets a byte, not a character.
+ */
+int
+file_getc(FILE_T file)
+{
+ unsigned char buf[1];
+ int ret;
+
+ /* check that we're reading and that there's no error */
+ if (file->err != 0)
+ return -1;
+
+ /* try output buffer (no need to check for skip request) */
+ if (file->out.avail != 0) {
+ file->out.avail--;
+ file->pos++;
+ return *(file->out.next)++;
+ }
+
+ ret = file_read(buf, 1, file);
+ return ret < 1 ? -1 : buf[0];
+}
+
+/* Like file_gets, but returns a pointer to the terminating NUL. */
+char *
+file_getsp(char *buf, int len, FILE_T file)
+{
+ guint left, n;
+ char *str;
+ unsigned char *eol;
+
+ /* check parameters */
+ if (buf == NULL || len < 1)
+ return NULL;
+
+ /* check that there's no error */
+ if (file->err != 0)
+ return NULL;
+
+ /* process a skip request */
+ if (file->seek_pending) {
+ file->seek_pending = FALSE;
+ if (gz_skip(file, file->skip) == -1)
+ return NULL;
+ }
+
+ /* copy output bytes up to new line or len - 1, whichever comes first --
+ append a terminating zero to the string (we don't check for a zero in
+ the contents, let the user worry about that) */
+ str = buf;
+ left = (unsigned)len - 1;
+ if (left) do {
+ /* assure that something is in the output buffer */
+ if (file->out.avail == 0) {
+ /* We have nothing in the output buffer. */
+ if (file->err != 0) {
+ /* We have an error that may not have
+ been reported yet; that means we
+ can't generate any more data into
+ the output buffer, so return an
+ error indication. */
+ return NULL;
+ }
+ if (fill_out_buffer(file) == -1)
+ return NULL; /* error */
+ if (file->out.avail == 0) { /* end of file */
+ if (buf == str) /* got bupkus */
+ return NULL;
+ break; /* got something -- return it */
+ }
+ }
+
+ /* look for end-of-line in current output buffer */
+ n = file->out.avail > left ? left : file->out.avail;
+ eol = (unsigned char *)memchr(file->out.next, '\n', n);
+ if (eol != NULL)
+ n = (unsigned)(eol - file->out.next) + 1;
+
+ /* copy through end-of-line, or remainder if not found */
+ memcpy(buf, file->out.next, n);
+ file->out.avail -= n;
+ file->out.next += n;
+ file->pos += n;
+ left -= n;
+ buf += n;
+ } while (left && eol == NULL);
+
+ /* found end-of-line or out of space -- add a terminator and return
+ a pointer to it */
+ buf[0] = 0;
+ return buf;
+}
+
+char *
+file_gets(char *buf, int len, FILE_T file)
+{
+ if (!file_getsp(buf, len, file)) return NULL;
+ return buf;
+}
+
+int
+file_eof(FILE_T file)
+{
+ /* return end-of-file state */
+ return (file->eof && file->in.avail == 0 && file->out.avail == 0);
+}
+
+/*
+ * Routine to return a Wiretap error code (0 for no error, an errno
+ * for a file error, or a WTAP_ERR_ code for other errors) for an
+ * I/O stream. Also returns an error string for some errors.
+ */
+int
+file_error(FILE_T fh, gchar **err_info)
+{
+ if (fh->err!=0 && err_info) {
+ /* g_strdup() returns NULL for NULL argument */
+ *err_info = g_strdup(fh->err_info);
+ }
+ return fh->err;
+}
+
+void
+file_clearerr(FILE_T stream)
+{
+ /* clear error and end-of-file */
+ stream->err = 0;
+ stream->err_info = NULL;
+ stream->eof = FALSE;
+}
+
+void
+file_fdclose(FILE_T file)
+{
+ if (file->fd != -1)
+ ws_close(file->fd);
+ file->fd = -1;
+}
+
+gboolean
+file_fdreopen(FILE_T file, const char *path)
+{
+ int fd;
+
+ if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1)
+ return FALSE;
+ file->fd = fd;
+ return TRUE;
+}
+
+void
+file_close(FILE_T file)
+{
+ int fd = file->fd;
+
+ /* free memory and close file */
+ if (file->size) {
+#ifdef HAVE_ZLIB
+ inflateEnd(&(file->strm));
+#endif
+#ifdef HAVE_ZSTD
+ ZSTD_freeDCtx(file->zstd_dctx);
+#endif
+#ifdef USE_LZ4
+ LZ4F_freeDecompressionContext(file->lz4_dctx);
+#endif
+ g_free(file->out.buf);
+ g_free(file->in.buf);
+ }
+ g_free(file->fast_seek_cur);
+ file->err = 0;
+ file->err_info = NULL;
+ g_free(file);
+ /*
+ * If fd is -1, somebody's done a file_closefd() on us, so
+ * we don't need to close the FD itself, and shouldn't do
+ * so.
+ */
+ if (fd != -1)
+ ws_close(fd);
+}
+
+#ifdef HAVE_ZLIB
+/* internal gzip file state data structure for writing */
+struct wtap_writer {
+ int fd; /* file descriptor */
+ gint64 pos; /* current position in uncompressed data */
+ guint size; /* buffer size, zero if not allocated yet */
+ guint want; /* requested buffer size, default is GZBUFSIZE */
+ unsigned char *in; /* input buffer */
+ unsigned char *out; /* output buffer (double-sized when reading) */
+ unsigned char *next; /* next output data to deliver or write */
+ int level; /* compression level */
+ int strategy; /* compression strategy */
+ int err; /* error code */
+ const char *err_info; /* additional error information string for some errors */
+ /* zlib deflate stream */
+ z_stream strm; /* stream structure in-place (not a pointer) */
+};
+
+GZWFILE_T
+gzwfile_open(const char *path)
+{
+ int fd;
+ GZWFILE_T state;
+ int save_errno;
+
+ fd = ws_open(path, O_BINARY|O_WRONLY|O_CREAT|O_TRUNC, 0666);
+ if (fd == -1)
+ return NULL;
+ state = gzwfile_fdopen(fd);
+ if (state == NULL) {
+ save_errno = errno;
+ ws_close(fd);
+ errno = save_errno;
+ }
+ return state;
+}
+
+GZWFILE_T
+gzwfile_fdopen(int fd)
+{
+ GZWFILE_T state;
+
+ /* allocate wtap_writer structure to return */
+ state = (GZWFILE_T)g_try_malloc(sizeof *state);
+ if (state == NULL)
+ return NULL;
+ state->fd = fd;
+ state->size = 0; /* no buffers allocated yet */
+ state->want = GZBUFSIZE; /* requested buffer size */
+
+ state->level = Z_DEFAULT_COMPRESSION;
+ state->strategy = Z_DEFAULT_STRATEGY;
+
+ /* initialize stream */
+ state->err = Z_OK; /* clear error */
+ state->err_info = NULL; /* clear additional error information */
+ state->pos = 0; /* no uncompressed data yet */
+ state->strm.avail_in = 0; /* no input data yet */
+
+ /* return stream */
+ return state;
+}
+
+/* Initialize state for writing a gzip file. Mark initialization by setting
+ state->size to non-zero. Return -1, and set state->err and possibly
+ state->err_info, on failure; return 0 on success. */
+static int
+gz_init(GZWFILE_T state)
+{
+ int ret;
+ z_streamp strm = &(state->strm);
+
+ /* allocate input and output buffers */
+ state->in = (unsigned char *)g_try_malloc(state->want);
+ state->out = (unsigned char *)g_try_malloc(state->want);
+ if (state->in == NULL || state->out == NULL) {
+ g_free(state->out);
+ g_free(state->in);
+ state->err = ENOMEM;
+ return -1;
+ }
+
+ /* allocate deflate memory, set up for gzip compression */
+ strm->zalloc = Z_NULL;
+ strm->zfree = Z_NULL;
+ strm->opaque = Z_NULL;
+ ret = deflateInit2(strm, state->level, Z_DEFLATED,
+ 15 + 16, 8, state->strategy);
+ if (ret != Z_OK) {
+ g_free(state->out);
+ g_free(state->in);
+ if (ret == Z_MEM_ERROR) {
+ /* This means "not enough memory". */
+ state->err = ENOMEM;
+ } else {
+ /* This "shouldn't happen". */
+ state->err = WTAP_ERR_INTERNAL;
+ state->err_info = "Unknown error from deflateInit2()";
+ }
+ return -1;
+ }
+
+ /* mark state as initialized */
+ state->size = state->want;
+
+ /* initialize write buffer */
+ strm->avail_out = state->size;
+ strm->next_out = state->out;
+ state->next = strm->next_out;
+ return 0;
+}
+
+/* Compress whatever is at avail_in and next_in and write to the output file.
+ Return -1, and set state->err and possibly state->err_info, if there is
+ an error writing to the output file; return 0 on success.
+ flush is assumed to be a valid deflate() flush value. If flush is Z_FINISH,
+ then the deflate() state is reset to start a new gzip stream. */
+static int
+gz_comp(GZWFILE_T state, int flush)
+{
+ int ret;
+ ssize_t got;
+ ptrdiff_t have;
+ z_streamp strm = &(state->strm);
+
+ /* allocate memory if this is the first time through */
+ if (state->size == 0 && gz_init(state) == -1)
+ return -1;
+
+ /* run deflate() on provided input until it produces no more output */
+ ret = Z_OK;
+ do {
+ /* write out current buffer contents if full, or if flushing, but if
+ doing Z_FINISH then don't write until we get to Z_STREAM_END */
+ if (strm->avail_out == 0 || (flush != Z_NO_FLUSH &&
+ (flush != Z_FINISH || ret == Z_STREAM_END))) {
+ have = strm->next_out - state->next;
+ if (have) {
+ got = ws_write(state->fd, state->next, (unsigned int)have);
+ if (got < 0) {
+ state->err = errno;
+ return -1;
+ }
+ if ((ptrdiff_t)got != have) {
+ state->err = WTAP_ERR_SHORT_WRITE;
+ return -1;
+ }
+ }
+ if (strm->avail_out == 0) {
+ strm->avail_out = state->size;
+ strm->next_out = state->out;
+ }
+ state->next = strm->next_out;
+ }
+
+ /* compress */
+ have = strm->avail_out;
+ ret = deflate(strm, flush);
+ if (ret == Z_STREAM_ERROR) {
+ /* This "shouldn't happen". */
+ state->err = WTAP_ERR_INTERNAL;
+ state->err_info = "Z_STREAM_ERROR from deflate()";
+ return -1;
+ }
+ have -= strm->avail_out;
+ } while (have);
+
+ /* if that completed a deflate stream, allow another to start */
+ if (flush == Z_FINISH)
+ deflateReset(strm);
+
+ /* all done, no errors */
+ return 0;
+}
+
+/* Write out len bytes from buf. Return 0, and set state->err, on
+ failure or on an attempt to write 0 bytes (in which case state->err
+ is Z_OK); return the number of bytes written on success. */
+unsigned
+gzwfile_write(GZWFILE_T state, const void *buf, guint len)
+{
+ guint put = len;
+ guint n;
+ z_streamp strm;
+
+ strm = &(state->strm);
+
+ /* check that there's no error */
+ if (state->err != Z_OK)
+ return 0;
+
+ /* if len is zero, avoid unnecessary operations */
+ if (len == 0)
+ return 0;
+
+ /* allocate memory if this is the first time through */
+ if (state->size == 0 && gz_init(state) == -1)
+ return 0;
+
+ /* for small len, copy to input buffer, otherwise compress directly */
+ if (len < state->size) {
+ /* copy to input buffer, compress when full */
+ do {
+ if (strm->avail_in == 0)
+ strm->next_in = state->in;
+ n = state->size - strm->avail_in;
+ if (n > len)
+ n = len;
+#ifdef z_const
+DIAG_OFF(cast-qual)
+ memcpy((Bytef *)strm->next_in + strm->avail_in, buf, n);
+DIAG_ON(cast-qual)
+#else
+ memcpy(strm->next_in + strm->avail_in, buf, n);
+#endif
+ strm->avail_in += n;
+ state->pos += n;
+ buf = (const char *)buf + n;
+ len -= n;
+ if (len && gz_comp(state, Z_NO_FLUSH) == -1)
+ return 0;
+ } while (len);
+ }
+ else {
+ /* consume whatever's left in the input buffer */
+ if (strm->avail_in != 0 && gz_comp(state, Z_NO_FLUSH) == -1)
+ return 0;
+
+ /* directly compress user buffer to file */
+ strm->avail_in = len;
+#ifdef z_const
+ strm->next_in = (z_const Bytef *)buf;
+#else
+DIAG_OFF(cast-qual)
+ strm->next_in = (Bytef *)buf;
+DIAG_ON(cast-qual)
+#endif
+ state->pos += len;
+ if (gz_comp(state, Z_NO_FLUSH) == -1)
+ return 0;
+ }
+
+ /* input was all buffered or compressed (put will fit in int) */
+ return (int)put;
+}
+
+/* Flush out what we've written so far. Returns -1, and sets state->err,
+ on failure; returns 0 on success. */
+int
+gzwfile_flush(GZWFILE_T state)
+{
+ /* check that there's no error */
+ if (state->err != Z_OK)
+ return -1;
+
+ /* compress remaining data with Z_SYNC_FLUSH */
+ gz_comp(state, Z_SYNC_FLUSH);
+ if (state->err != Z_OK)
+ return -1;
+ return 0;
+}
+
+/* Flush out all data written, and close the file. Returns a Wiretap
+ error on failure; returns 0 on success. */
+int
+gzwfile_close(GZWFILE_T state)
+{
+ int ret = 0;
+
+ /* flush, free memory, and close file */
+ if (gz_comp(state, Z_FINISH) == -1)
+ ret = state->err;
+ (void)deflateEnd(&(state->strm));
+ g_free(state->out);
+ g_free(state->in);
+ state->err = Z_OK;
+ if (ws_close(state->fd) == -1 && ret == 0)
+ ret = errno;
+ g_free(state);
+ return ret;
+}
+
+int
+gzwfile_geterr(GZWFILE_T state)
+{
+ return state->err;
+}
+#endif
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 4
+ * tab-width: 8
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vi: set shiftwidth=4 tabstop=8 expandtab:
+ * :indentSize=4:tabSize=8:noTabs=true:
+ */