summaryrefslogtreecommitdiffstats
path: root/src/bin/pg_dump/compress_io.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 13:44:03 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 13:44:03 +0000
commit293913568e6a7a86fd1479e1cff8e2ecb58d6568 (patch)
treefc3b469a3ec5ab71b36ea97cc7aaddb838423a0c /src/bin/pg_dump/compress_io.c
parentInitial commit. (diff)
downloadpostgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.tar.xz
postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.zip
Adding upstream version 16.2.upstream/16.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/bin/pg_dump/compress_io.c')
-rw-r--r--src/bin/pg_dump/compress_io.c299
1 files changed, 299 insertions, 0 deletions
diff --git a/src/bin/pg_dump/compress_io.c b/src/bin/pg_dump/compress_io.c
new file mode 100644
index 0000000..efedc53
--- /dev/null
+++ b/src/bin/pg_dump/compress_io.c
@@ -0,0 +1,299 @@
+/*-------------------------------------------------------------------------
+ *
+ * compress_io.c
+ * Routines for archivers to write an uncompressed or compressed data
+ * stream.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * This file includes two APIs for dealing with compressed data. The first
+ * provides more flexibility, using callbacks to read/write data from the
+ * underlying stream. The second API is a wrapper around fopen and
+ * friends, providing an interface similar to those, but abstracts away
+ * the possible compression. The second API is aimed for the resulting
+ * files to be easily manipulated with an external compression utility
+ * program.
+ *
+ * Compressor API
+ * --------------
+ *
+ * The interface for writing to an archive consists of three functions:
+ * AllocateCompressor, writeData, and EndCompressor. First you call
+ * AllocateCompressor, then write all the data by calling writeData as many
+ * times as needed, and finally EndCompressor. writeData will call the
+ * WriteFunc that was provided to AllocateCompressor for each chunk of
+ * compressed data.
+ *
+ * The interface for reading an archive consists of the same three functions:
+ * AllocateCompressor, readData, and EndCompressor. First you call
+ * AllocateCompressor, then read all the data by calling readData to read the
+ * whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
+ * returns the compressed data one chunk at a time. Then readData decompresses
+ * it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
+ * to signal EOF. The interface is the same for compressed and uncompressed
+ * streams.
+ *
+ * Compressed stream API
+ * ----------------------
+ *
+ * The compressed stream API is providing a set of function pointers for
+ * opening, reading, writing, and finally closing files. The implemented
+ * function pointers are documented in the corresponding header file and are
+ * common for all streams. It allows the caller to use the same functions for
+ * both compressed and uncompressed streams.
+ *
+ * The interface consists of three functions, InitCompressFileHandle,
+ * InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
+ * compression is known, then start by calling InitCompressFileHandle,
+ * otherwise discover it by using InitDiscoverCompressFileHandle. Then call
+ * the function pointers as required for the read/write operations. Finally
+ * call EndCompressFileHandle to end the stream.
+ *
+ * InitDiscoverCompressFileHandle tries to infer the compression by the
+ * filename suffix. If the suffix is not yet known then it tries to simply
+ * open the file and if it fails, it tries to open the same file with
+ * compressed suffixes (.gz, .lz4 and .zst, in this order).
+ *
+ * IDENTIFICATION
+ * src/bin/pg_dump/compress_io.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "compress_gzip.h"
+#include "compress_io.h"
+#include "compress_lz4.h"
+#include "compress_none.h"
+#include "compress_zstd.h"
+#include "pg_backup_utils.h"
+
+/*----------------------
+ * Generic functions
+ *----------------------
+ */
+
+/*
+ * Checks whether support for a compression algorithm is implemented in
+ * pg_dump/restore.
+ *
+ * On success returns NULL, otherwise returns a malloc'ed string which can be
+ * used by the caller in an error message.
+ */
+char *
+supports_compression(const pg_compress_specification compression_spec)
+{
+ const pg_compress_algorithm algorithm = compression_spec.algorithm;
+ bool supported = false;
+
+ if (algorithm == PG_COMPRESSION_NONE)
+ supported = true;
+#ifdef HAVE_LIBZ
+ if (algorithm == PG_COMPRESSION_GZIP)
+ supported = true;
+#endif
+#ifdef USE_LZ4
+ if (algorithm == PG_COMPRESSION_LZ4)
+ supported = true;
+#endif
+#ifdef USE_ZSTD
+ if (algorithm == PG_COMPRESSION_ZSTD)
+ supported = true;
+#endif
+
+ if (!supported)
+ return psprintf(_("this build does not support compression with %s"),
+ get_compress_algorithm_name(algorithm));
+
+ return NULL;
+}
+
+/*----------------------
+ * Compressor API
+ *----------------------
+ */
+
+/*
+ * Allocate a new compressor.
+ */
+CompressorState *
+AllocateCompressor(const pg_compress_specification compression_spec,
+ ReadFunc readF, WriteFunc writeF)
+{
+ CompressorState *cs;
+
+ cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
+ cs->readF = readF;
+ cs->writeF = writeF;
+
+ if (compression_spec.algorithm == PG_COMPRESSION_NONE)
+ InitCompressorNone(cs, compression_spec);
+ else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
+ InitCompressorGzip(cs, compression_spec);
+ else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
+ InitCompressorLZ4(cs, compression_spec);
+ else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
+ InitCompressorZstd(cs, compression_spec);
+
+ return cs;
+}
+
+/*
+ * Terminate compression library context and flush its buffers.
+ */
+void
+EndCompressor(ArchiveHandle *AH, CompressorState *cs)
+{
+ cs->end(AH, cs);
+ pg_free(cs);
+}
+
+/*----------------------
+ * Compressed stream API
+ *----------------------
+ */
+
+/*
+ * Private routines
+ */
+static int
+hasSuffix(const char *filename, const char *suffix)
+{
+ int filenamelen = strlen(filename);
+ int suffixlen = strlen(suffix);
+
+ if (filenamelen < suffixlen)
+ return 0;
+
+ return memcmp(&filename[filenamelen - suffixlen],
+ suffix,
+ suffixlen) == 0;
+}
+
+/* free() without changing errno; useful in several places below */
+static void
+free_keep_errno(void *p)
+{
+ int save_errno = errno;
+
+ free(p);
+ errno = save_errno;
+}
+
+/*
+ * Public interface
+ */
+
+/*
+ * Initialize a compress file handle for the specified compression algorithm.
+ */
+CompressFileHandle *
+InitCompressFileHandle(const pg_compress_specification compression_spec)
+{
+ CompressFileHandle *CFH;
+
+ CFH = pg_malloc0(sizeof(CompressFileHandle));
+
+ if (compression_spec.algorithm == PG_COMPRESSION_NONE)
+ InitCompressFileHandleNone(CFH, compression_spec);
+ else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
+ InitCompressFileHandleGzip(CFH, compression_spec);
+ else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
+ InitCompressFileHandleLZ4(CFH, compression_spec);
+ else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
+ InitCompressFileHandleZstd(CFH, compression_spec);
+
+ return CFH;
+}
+
+/*
+ * Checks if a compressed file (with the specified extension) exists.
+ *
+ * The filename of the tested file is stored to fname buffer (the existing
+ * buffer is freed, new buffer is allocated and returned through the pointer).
+ */
+static bool
+check_compressed_file(const char *path, char **fname, char *ext)
+{
+ free_keep_errno(*fname);
+ *fname = psprintf("%s.%s", path, ext);
+ return (access(*fname, F_OK) == 0);
+}
+
+/*
+ * Open a file for reading. 'path' is the file to open, and 'mode' should
+ * be either "r" or "rb".
+ *
+ * If the file at 'path' contains the suffix of a supported compression method,
+ * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
+ * throughout. Otherwise the compression will be inferred by iteratively trying
+ * to open the file at 'path', first as is, then by appending known compression
+ * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
+ * "foo.{gz,lz4,zst}", trying in that order.
+ *
+ * On failure, return NULL with an error code in errno.
+ */
+CompressFileHandle *
+InitDiscoverCompressFileHandle(const char *path, const char *mode)
+{
+ CompressFileHandle *CFH = NULL;
+ struct stat st;
+ char *fname;
+ pg_compress_specification compression_spec = {0};
+
+ compression_spec.algorithm = PG_COMPRESSION_NONE;
+
+ Assert(strcmp(mode, PG_BINARY_R) == 0);
+
+ fname = pg_strdup(path);
+
+ if (hasSuffix(fname, ".gz"))
+ compression_spec.algorithm = PG_COMPRESSION_GZIP;
+ else if (hasSuffix(fname, ".lz4"))
+ compression_spec.algorithm = PG_COMPRESSION_LZ4;
+ else if (hasSuffix(fname, ".zst"))
+ compression_spec.algorithm = PG_COMPRESSION_ZSTD;
+ else
+ {
+ if (stat(path, &st) == 0)
+ compression_spec.algorithm = PG_COMPRESSION_NONE;
+ else if (check_compressed_file(path, &fname, "gz"))
+ compression_spec.algorithm = PG_COMPRESSION_GZIP;
+ else if (check_compressed_file(path, &fname, "lz4"))
+ compression_spec.algorithm = PG_COMPRESSION_LZ4;
+ else if (check_compressed_file(path, &fname, "zst"))
+ compression_spec.algorithm = PG_COMPRESSION_ZSTD;
+ }
+
+ CFH = InitCompressFileHandle(compression_spec);
+ if (!CFH->open_func(fname, -1, mode, CFH))
+ {
+ free_keep_errno(CFH);
+ CFH = NULL;
+ }
+ free_keep_errno(fname);
+
+ return CFH;
+}
+
+/*
+ * Close an open file handle and release its memory.
+ *
+ * On failure, returns false and sets errno appropriately.
+ */
+bool
+EndCompressFileHandle(CompressFileHandle *CFH)
+{
+ bool ret = false;
+
+ if (CFH->private_data)
+ ret = CFH->close_func(CFH);
+
+ free_keep_errno(CFH);
+
+ return ret;
+}