diff options
Diffstat (limited to 'src/bin/pg_basebackup/bbstreamer_tar.c')
-rw-r--r-- | src/bin/pg_basebackup/bbstreamer_tar.c | 516 |
1 files changed, 516 insertions, 0 deletions
diff --git a/src/bin/pg_basebackup/bbstreamer_tar.c b/src/bin/pg_basebackup/bbstreamer_tar.c new file mode 100644 index 0000000..ef5586c --- /dev/null +++ b/src/bin/pg_basebackup/bbstreamer_tar.c @@ -0,0 +1,516 @@ +/*------------------------------------------------------------------------- + * + * bbstreamer_tar.c + * + * This module implements three types of tar processing. A tar parser + * expects unlabelled chunks of data (e.g. BBSTREAMER_UNKNOWN) and splits + * it into labelled chunks (any other value of bbstreamer_archive_context). + * A tar archiver does the reverse: it takes a bunch of labelled chunks + * and produces a tarfile, optionally replacing member headers and trailers + * so that upstream bbstreamer objects can perform surgery on the tarfile + * contents without knowing the details of the tar format. A tar terminator + * just adds two blocks of NUL bytes to the end of the file, since older + * server versions produce files with this terminator omitted. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/bin/pg_basebackup/bbstreamer_tar.c + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include <time.h> + +#include "bbstreamer.h" +#include "common/logging.h" +#include "pgtar.h" + +typedef struct bbstreamer_tar_parser +{ + bbstreamer base; + bbstreamer_archive_context next_context; + bbstreamer_member member; + size_t file_bytes_sent; + size_t pad_bytes_expected; +} bbstreamer_tar_parser; + +typedef struct bbstreamer_tar_archiver +{ + bbstreamer base; + bool rearchive_member; +} bbstreamer_tar_archiver; + +static void bbstreamer_tar_parser_content(bbstreamer *streamer, + bbstreamer_member *member, + const char *data, int len, + bbstreamer_archive_context context); +static void bbstreamer_tar_parser_finalize(bbstreamer *streamer); +static void bbstreamer_tar_parser_free(bbstreamer *streamer); +static bool bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer); + +const bbstreamer_ops bbstreamer_tar_parser_ops = { + .content = bbstreamer_tar_parser_content, + .finalize = bbstreamer_tar_parser_finalize, + .free = bbstreamer_tar_parser_free +}; + +static void bbstreamer_tar_archiver_content(bbstreamer *streamer, + bbstreamer_member *member, + const char *data, int len, + bbstreamer_archive_context context); +static void bbstreamer_tar_archiver_finalize(bbstreamer *streamer); +static void bbstreamer_tar_archiver_free(bbstreamer *streamer); + +const bbstreamer_ops bbstreamer_tar_archiver_ops = { + .content = bbstreamer_tar_archiver_content, + .finalize = bbstreamer_tar_archiver_finalize, + .free = bbstreamer_tar_archiver_free +}; + +static void bbstreamer_tar_terminator_content(bbstreamer *streamer, + bbstreamer_member *member, + const char *data, int len, + bbstreamer_archive_context context); +static void bbstreamer_tar_terminator_finalize(bbstreamer *streamer); +static void bbstreamer_tar_terminator_free(bbstreamer *streamer); + +const bbstreamer_ops bbstreamer_tar_terminator_ops = { + .content = bbstreamer_tar_terminator_content, + .finalize = bbstreamer_tar_terminator_finalize, + .free = bbstreamer_tar_terminator_free +}; + +/* + * Create a bbstreamer that can parse a stream of content as tar data. + * + * The input should be a series of BBSTREAMER_UNKNOWN chunks; the bbstreamer + * specified by 'next' will receive a series of typed chunks, as per the + * conventions described in bbstreamer.h. + */ +extern bbstreamer * +bbstreamer_tar_parser_new(bbstreamer *next) +{ + bbstreamer_tar_parser *streamer; + + streamer = palloc0(sizeof(bbstreamer_tar_parser)); + *((const bbstreamer_ops **) &streamer->base.bbs_ops) = + &bbstreamer_tar_parser_ops; + streamer->base.bbs_next = next; + initStringInfo(&streamer->base.bbs_buffer); + streamer->next_context = BBSTREAMER_MEMBER_HEADER; + + return &streamer->base; +} + +/* + * Parse unknown content as tar data. + */ +static void +bbstreamer_tar_parser_content(bbstreamer *streamer, bbstreamer_member *member, + const char *data, int len, + bbstreamer_archive_context context) +{ + bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer; + size_t nbytes; + + /* Expect unparsed input. */ + Assert(member == NULL); + Assert(context == BBSTREAMER_UNKNOWN); + + while (len > 0) + { + switch (mystreamer->next_context) + { + case BBSTREAMER_MEMBER_HEADER: + + /* + * If we're expecting an archive member header, accumulate a + * full block of data before doing anything further. + */ + if (!bbstreamer_buffer_until(streamer, &data, &len, + TAR_BLOCK_SIZE)) + return; + + /* + * Now we can process the header and get ready to process the + * file contents; however, we might find out that what we + * thought was the next file header is actually the start of + * the archive trailer. Switch modes accordingly. + */ + if (bbstreamer_tar_header(mystreamer)) + { + if (mystreamer->member.size == 0) + { + /* No content; trailer is zero-length. */ + bbstreamer_content(mystreamer->base.bbs_next, + &mystreamer->member, + NULL, 0, + BBSTREAMER_MEMBER_TRAILER); + + /* Expect next header. */ + mystreamer->next_context = BBSTREAMER_MEMBER_HEADER; + } + else + { + /* Expect contents. */ + mystreamer->next_context = BBSTREAMER_MEMBER_CONTENTS; + } + mystreamer->base.bbs_buffer.len = 0; + mystreamer->file_bytes_sent = 0; + } + else + mystreamer->next_context = BBSTREAMER_ARCHIVE_TRAILER; + break; + + case BBSTREAMER_MEMBER_CONTENTS: + + /* + * Send as much content as we have, but not more than the + * remaining file length. + */ + Assert(mystreamer->file_bytes_sent < mystreamer->member.size); + nbytes = mystreamer->member.size - mystreamer->file_bytes_sent; + nbytes = Min(nbytes, len); + Assert(nbytes > 0); + bbstreamer_content(mystreamer->base.bbs_next, + &mystreamer->member, + data, nbytes, + BBSTREAMER_MEMBER_CONTENTS); + mystreamer->file_bytes_sent += nbytes; + data += nbytes; + len -= nbytes; + + /* + * If we've not yet sent the whole file, then there's more + * content to come; otherwise, it's time to expect the file + * trailer. + */ + Assert(mystreamer->file_bytes_sent <= mystreamer->member.size); + if (mystreamer->file_bytes_sent == mystreamer->member.size) + { + if (mystreamer->pad_bytes_expected == 0) + { + /* Trailer is zero-length. */ + bbstreamer_content(mystreamer->base.bbs_next, + &mystreamer->member, + NULL, 0, + BBSTREAMER_MEMBER_TRAILER); + + /* Expect next header. */ + mystreamer->next_context = BBSTREAMER_MEMBER_HEADER; + } + else + { + /* Trailer is not zero-length. */ + mystreamer->next_context = BBSTREAMER_MEMBER_TRAILER; + } + mystreamer->base.bbs_buffer.len = 0; + } + break; + + case BBSTREAMER_MEMBER_TRAILER: + + /* + * If we're expecting an archive member trailer, accumulate + * the expected number of padding bytes before sending + * anything onward. + */ + if (!bbstreamer_buffer_until(streamer, &data, &len, + mystreamer->pad_bytes_expected)) + return; + + /* OK, now we can send it. */ + bbstreamer_content(mystreamer->base.bbs_next, + &mystreamer->member, + data, mystreamer->pad_bytes_expected, + BBSTREAMER_MEMBER_TRAILER); + + /* Expect next file header. */ + mystreamer->next_context = BBSTREAMER_MEMBER_HEADER; + mystreamer->base.bbs_buffer.len = 0; + break; + + case BBSTREAMER_ARCHIVE_TRAILER: + + /* + * We've seen an end-of-archive indicator, so anything more is + * buffered and sent as part of the archive trailer. But we + * don't expect more than 2 blocks. + */ + bbstreamer_buffer_bytes(streamer, &data, &len, len); + if (len > 2 * TAR_BLOCK_SIZE) + pg_fatal("tar file trailer exceeds 2 blocks"); + return; + + default: + /* Shouldn't happen. */ + pg_fatal("unexpected state while parsing tar archive"); + } + } +} + +/* + * Parse a file header within a tar stream. + * + * The return value is true if we found a file header and passed it on to the + * next bbstreamer; it is false if we have reached the archive trailer. + */ +static bool +bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer) +{ + bool has_nonzero_byte = false; + int i; + bbstreamer_member *member = &mystreamer->member; + char *buffer = mystreamer->base.bbs_buffer.data; + + Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE); + + /* Check whether we've got a block of all zero bytes. */ + for (i = 0; i < TAR_BLOCK_SIZE; ++i) + { + if (buffer[i] != '\0') + { + has_nonzero_byte = true; + break; + } + } + + /* + * If the entire block was zeros, this is the end of the archive, not the + * start of the next file. + */ + if (!has_nonzero_byte) + return false; + + /* + * Parse key fields out of the header. + * + * FIXME: It's terrible that we use hard-coded values here instead of some + * more principled approach. It's been like this for a long time, but we + * ought to do better. + */ + strlcpy(member->pathname, &buffer[0], MAXPGPATH); + if (member->pathname[0] == '\0') + pg_fatal("tar member has empty name"); + member->size = read_tar_number(&buffer[124], 12); + member->mode = read_tar_number(&buffer[100], 8); + member->uid = read_tar_number(&buffer[108], 8); + member->gid = read_tar_number(&buffer[116], 8); + member->is_directory = (buffer[156] == '5'); + member->is_link = (buffer[156] == '2'); + if (member->is_link) + strlcpy(member->linktarget, &buffer[157], 100); + + /* Compute number of padding bytes. */ + mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size); + + /* Forward the entire header to the next bbstreamer. */ + bbstreamer_content(mystreamer->base.bbs_next, member, + buffer, TAR_BLOCK_SIZE, + BBSTREAMER_MEMBER_HEADER); + + return true; +} + +/* + * End-of-stream processing for a tar parser. + */ +static void +bbstreamer_tar_parser_finalize(bbstreamer *streamer) +{ + bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer; + + if (mystreamer->next_context != BBSTREAMER_ARCHIVE_TRAILER && + (mystreamer->next_context != BBSTREAMER_MEMBER_HEADER || + mystreamer->base.bbs_buffer.len > 0)) + pg_fatal("COPY stream ended before last file was finished"); + + /* Send the archive trailer, even if empty. */ + bbstreamer_content(streamer->bbs_next, NULL, + streamer->bbs_buffer.data, streamer->bbs_buffer.len, + BBSTREAMER_ARCHIVE_TRAILER); + + /* Now finalize successor. */ + bbstreamer_finalize(streamer->bbs_next); +} + +/* + * Free memory associated with a tar parser. + */ +static void +bbstreamer_tar_parser_free(bbstreamer *streamer) +{ + pfree(streamer->bbs_buffer.data); + bbstreamer_free(streamer->bbs_next); +} + +/* + * Create an bbstreamer that can generate a tar archive. + * + * This is intended to be usable either for generating a brand-new tar archive + * or for modifying one on the fly. The input should be a series of typed + * chunks (i.e. not BBSTREAMER_UNKNOWN). See also the comments for + * bbstreamer_tar_parser_content. + */ +extern bbstreamer * +bbstreamer_tar_archiver_new(bbstreamer *next) +{ + bbstreamer_tar_archiver *streamer; + + streamer = palloc0(sizeof(bbstreamer_tar_archiver)); + *((const bbstreamer_ops **) &streamer->base.bbs_ops) = + &bbstreamer_tar_archiver_ops; + streamer->base.bbs_next = next; + + return &streamer->base; +} + +/* + * Fix up the stream of input chunks to create a valid tar file. + * + * If a BBSTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a + * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is + * passed through without change. Any other size is a fatal error (and + * indicates a bug). + * + * Whenever a new BBSTREAMER_MEMBER_HEADER chunk is constructed, the + * corresponding BBSTREAMER_MEMBER_TRAILER chunk is also constructed from + * scratch. Specifically, we construct a block of zero bytes sufficient to + * pad out to a block boundary, as required by the tar format. Other + * BBSTREAMER_MEMBER_TRAILER chunks are passed through without change. + * + * Any BBSTREAMER_MEMBER_CONTENTS chunks are passed through without change. + * + * The BBSTREAMER_ARCHIVE_TRAILER chunk is replaced with two + * blocks of zero bytes. Not all tar programs require this, but apparently + * some do. The server does not supply this trailer. If no archive trailer is + * present, one will be added by bbstreamer_tar_parser_finalize. + */ +static void +bbstreamer_tar_archiver_content(bbstreamer *streamer, + bbstreamer_member *member, + const char *data, int len, + bbstreamer_archive_context context) +{ + bbstreamer_tar_archiver *mystreamer = (bbstreamer_tar_archiver *) streamer; + char buffer[2 * TAR_BLOCK_SIZE]; + + Assert(context != BBSTREAMER_UNKNOWN); + + if (context == BBSTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE) + { + Assert(len == 0); + + /* Replace zero-length tar header with a newly constructed one. */ + tarCreateHeader(buffer, member->pathname, NULL, + member->size, member->mode, member->uid, member->gid, + time(NULL)); + data = buffer; + len = TAR_BLOCK_SIZE; + + /* Also make a note to replace padding, in case size changed. */ + mystreamer->rearchive_member = true; + } + else if (context == BBSTREAMER_MEMBER_TRAILER && + mystreamer->rearchive_member) + { + int pad_bytes = tarPaddingBytesRequired(member->size); + + /* Also replace padding, if we regenerated the header. */ + memset(buffer, 0, pad_bytes); + data = buffer; + len = pad_bytes; + + /* Don't do this again unless we replace another header. */ + mystreamer->rearchive_member = false; + } + else if (context == BBSTREAMER_ARCHIVE_TRAILER) + { + /* Trailer should always be two blocks of zero bytes. */ + memset(buffer, 0, 2 * TAR_BLOCK_SIZE); + data = buffer; + len = 2 * TAR_BLOCK_SIZE; + } + + bbstreamer_content(streamer->bbs_next, member, data, len, context); +} + +/* + * End-of-stream processing for a tar archiver. + */ +static void +bbstreamer_tar_archiver_finalize(bbstreamer *streamer) +{ + bbstreamer_finalize(streamer->bbs_next); +} + +/* + * Free memory associated with a tar archiver. + */ +static void +bbstreamer_tar_archiver_free(bbstreamer *streamer) +{ + bbstreamer_free(streamer->bbs_next); + pfree(streamer); +} + +/* + * Create a bbstreamer that blindly adds two blocks of NUL bytes to the + * end of an incomplete tarfile that the server might send us. + */ +bbstreamer * +bbstreamer_tar_terminator_new(bbstreamer *next) +{ + bbstreamer *streamer; + + streamer = palloc0(sizeof(bbstreamer)); + *((const bbstreamer_ops **) &streamer->bbs_ops) = + &bbstreamer_tar_terminator_ops; + streamer->bbs_next = next; + + return streamer; +} + +/* + * Pass all the content through without change. + */ +static void +bbstreamer_tar_terminator_content(bbstreamer *streamer, + bbstreamer_member *member, + const char *data, int len, + bbstreamer_archive_context context) +{ + /* Expect unparsed input. */ + Assert(member == NULL); + Assert(context == BBSTREAMER_UNKNOWN); + + /* Just forward it. */ + bbstreamer_content(streamer->bbs_next, member, data, len, context); +} + +/* + * At the end, blindly add the two blocks of NUL bytes which the server fails + * to supply. + */ +static void +bbstreamer_tar_terminator_finalize(bbstreamer *streamer) +{ + char buffer[2 * TAR_BLOCK_SIZE]; + + memset(buffer, 0, 2 * TAR_BLOCK_SIZE); + bbstreamer_content(streamer->bbs_next, NULL, buffer, + 2 * TAR_BLOCK_SIZE, BBSTREAMER_UNKNOWN); + bbstreamer_finalize(streamer->bbs_next); +} + +/* + * Free memory associated with a tar terminator. + */ +static void +bbstreamer_tar_terminator_free(bbstreamer *streamer) +{ + bbstreamer_free(streamer->bbs_next); + pfree(streamer); +} |