summaryrefslogtreecommitdiffstats
path: root/src/include/access/xlogrecord.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/include/access/xlogrecord.h
parentInitial commit. (diff)
downloadpostgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz
postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/include/access/xlogrecord.h')
-rw-r--r--src/include/access/xlogrecord.h229
1 files changed, 229 insertions, 0 deletions
diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
new file mode 100644
index 0000000..f68cb18
--- /dev/null
+++ b/src/include/access/xlogrecord.h
@@ -0,0 +1,229 @@
+/*
+ * xlogrecord.h
+ *
+ * Definitions for the WAL record format.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/xlogrecord.h
+ */
+#ifndef XLOGRECORD_H
+#define XLOGRECORD_H
+
+#include "access/rmgr.h"
+#include "access/xlogdefs.h"
+#include "port/pg_crc32c.h"
+#include "storage/block.h"
+#include "storage/relfilenode.h"
+
+/*
+ * The overall layout of an XLOG record is:
+ * Fixed-size header (XLogRecord struct)
+ * XLogRecordBlockHeader struct
+ * XLogRecordBlockHeader struct
+ * ...
+ * XLogRecordDataHeader[Short|Long] struct
+ * block data
+ * block data
+ * ...
+ * main data
+ *
+ * There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of
+ * rmgr-specific data not associated with a block. XLogRecord structs
+ * always start on MAXALIGN boundaries in the WAL files, but the rest of
+ * the fields are not aligned.
+ *
+ * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
+ * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
+ * used to distinguish between block references, and the main data structs.
+ */
+typedef struct XLogRecord
+{
+ uint32 xl_tot_len; /* total len of entire record */
+ TransactionId xl_xid; /* xact id */
+ XLogRecPtr xl_prev; /* ptr to previous record in log */
+ uint8 xl_info; /* flag bits, see below */
+ RmgrId xl_rmid; /* resource manager for this record */
+ /* 2 bytes of padding here, initialize to zero */
+ pg_crc32c xl_crc; /* CRC for this record */
+
+ /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
+
+} XLogRecord;
+
+#define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c))
+
+/*
+ * The high 4 bits in xl_info may be used freely by rmgr. The
+ * XLR_SPECIAL_REL_UPDATE and XLR_CHECK_CONSISTENCY bits can be passed by
+ * XLogInsert caller. The rest are set internally by XLogInsert.
+ */
+#define XLR_INFO_MASK 0x0F
+#define XLR_RMGR_INFO_MASK 0xF0
+
+/*
+ * If a WAL record modifies any relation files, in ways not covered by the
+ * usual block references, this flag is set. This is not used for anything
+ * by PostgreSQL itself, but it allows external tools that read WAL and keep
+ * track of modified blocks to recognize such special record types.
+ */
+#define XLR_SPECIAL_REL_UPDATE 0x01
+
+/*
+ * Enforces consistency checks of replayed WAL at recovery. If enabled,
+ * each record will log a full-page write for each block modified by the
+ * record and will reuse it afterwards for consistency checks. The caller
+ * of XLogInsert can use this value if necessary, but if
+ * wal_consistency_checking is enabled for a rmgr this is set unconditionally.
+ */
+#define XLR_CHECK_CONSISTENCY 0x02
+
+/*
+ * Header info for block data appended to an XLOG record.
+ *
+ * 'data_length' is the length of the rmgr-specific payload data associated
+ * with this block. It does not include the possible full page image, nor
+ * XLogRecordBlockHeader struct itself.
+ *
+ * Note that we don't attempt to align the XLogRecordBlockHeader struct!
+ * So, the struct must be copied to aligned local storage before use.
+ */
+typedef struct XLogRecordBlockHeader
+{
+ uint8 id; /* block reference ID */
+ uint8 fork_flags; /* fork within the relation, and flags */
+ uint16 data_length; /* number of payload bytes (not including page
+ * image) */
+
+ /* If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows */
+ /* If BKPBLOCK_SAME_REL is not set, a RelFileNode follows */
+ /* BlockNumber follows */
+} XLogRecordBlockHeader;
+
+#define SizeOfXLogRecordBlockHeader (offsetof(XLogRecordBlockHeader, data_length) + sizeof(uint16))
+
+/*
+ * Additional header information when a full-page image is included
+ * (i.e. when BKPBLOCK_HAS_IMAGE is set).
+ *
+ * The XLOG code is aware that PG data pages usually contain an unused "hole"
+ * in the middle, which contains only zero bytes. Since we know that the
+ * "hole" is all zeros, we remove it from the stored data (and it's not counted
+ * in the XLOG record's CRC, either). Hence, the amount of block data actually
+ * present is (BLCKSZ - <length of "hole" bytes>).
+ *
+ * Additionally, when wal_compression is enabled, we will try to compress full
+ * page images using the PGLZ compression algorithm, after removing the "hole".
+ * This can reduce the WAL volume, but at some extra cost of CPU spent
+ * on the compression during WAL logging. In this case, since the "hole"
+ * length cannot be calculated by subtracting the number of page image bytes
+ * from BLCKSZ, basically it needs to be stored as an extra information.
+ * But when no "hole" exists, we can assume that the "hole" length is zero
+ * and no such an extra information needs to be stored. Note that
+ * the original version of page image is stored in WAL instead of the
+ * compressed one if the number of bytes saved by compression is less than
+ * the length of extra information. Hence, when a page image is successfully
+ * compressed, the amount of block data actually present is less than
+ * BLCKSZ - the length of "hole" bytes - the length of extra information.
+ */
+typedef struct XLogRecordBlockImageHeader
+{
+ uint16 length; /* number of page image bytes */
+ uint16 hole_offset; /* number of bytes before "hole" */
+ uint8 bimg_info; /* flag bits, see below */
+
+ /*
+ * If BKPIMAGE_HAS_HOLE and BKPIMAGE_IS_COMPRESSED, an
+ * XLogRecordBlockCompressHeader struct follows.
+ */
+} XLogRecordBlockImageHeader;
+
+#define SizeOfXLogRecordBlockImageHeader \
+ (offsetof(XLogRecordBlockImageHeader, bimg_info) + sizeof(uint8))
+
+/* Information stored in bimg_info */
+#define BKPIMAGE_HAS_HOLE 0x01 /* page image has "hole" */
+#define BKPIMAGE_IS_COMPRESSED 0x02 /* page image is compressed */
+#define BKPIMAGE_APPLY 0x04 /* page image should be restored during
+ * replay */
+
+/*
+ * Extra header information used when page image has "hole" and
+ * is compressed.
+ */
+typedef struct XLogRecordBlockCompressHeader
+{
+ uint16 hole_length; /* number of bytes in "hole" */
+} XLogRecordBlockCompressHeader;
+
+#define SizeOfXLogRecordBlockCompressHeader \
+ sizeof(XLogRecordBlockCompressHeader)
+
+/*
+ * Maximum size of the header for a block reference. This is used to size a
+ * temporary buffer for constructing the header.
+ */
+#define MaxSizeOfXLogRecordBlockHeader \
+ (SizeOfXLogRecordBlockHeader + \
+ SizeOfXLogRecordBlockImageHeader + \
+ SizeOfXLogRecordBlockCompressHeader + \
+ sizeof(RelFileNode) + \
+ sizeof(BlockNumber))
+
+/*
+ * The fork number fits in the lower 4 bits in the fork_flags field. The upper
+ * bits are used for flags.
+ */
+#define BKPBLOCK_FORK_MASK 0x0F
+#define BKPBLOCK_FLAG_MASK 0xF0
+#define BKPBLOCK_HAS_IMAGE 0x10 /* block data is an XLogRecordBlockImage */
+#define BKPBLOCK_HAS_DATA 0x20
+#define BKPBLOCK_WILL_INIT 0x40 /* redo will re-init the page */
+#define BKPBLOCK_SAME_REL 0x80 /* RelFileNode omitted, same as previous */
+
+/*
+ * XLogRecordDataHeaderShort/Long are used for the "main data" portion of
+ * the record. If the length of the data is less than 256 bytes, the short
+ * form is used, with a single byte to hold the length. Otherwise the long
+ * form is used.
+ *
+ * (These structs are currently not used in the code, they are here just for
+ * documentation purposes).
+ */
+typedef struct XLogRecordDataHeaderShort
+{
+ uint8 id; /* XLR_BLOCK_ID_DATA_SHORT */
+ uint8 data_length; /* number of payload bytes */
+} XLogRecordDataHeaderShort;
+
+#define SizeOfXLogRecordDataHeaderShort (sizeof(uint8) * 2)
+
+typedef struct XLogRecordDataHeaderLong
+{
+ uint8 id; /* XLR_BLOCK_ID_DATA_LONG */
+ /* followed by uint32 data_length, unaligned */
+} XLogRecordDataHeaderLong;
+
+#define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
+
+/*
+ * Block IDs used to distinguish different kinds of record fragments. Block
+ * references are numbered from 0 to XLR_MAX_BLOCK_ID. A rmgr is free to use
+ * any ID number in that range (although you should stick to small numbers,
+ * because the WAL machinery is optimized for that case). A few ID
+ * numbers are reserved to denote the "main" data portion of the record,
+ * as well as replication-supporting transaction metadata.
+ *
+ * The maximum is currently set at 32, quite arbitrarily. Most records only
+ * need a handful of block references, but there are a few exceptions that
+ * need more.
+ */
+#define XLR_MAX_BLOCK_ID 32
+
+#define XLR_BLOCK_ID_DATA_SHORT 255
+#define XLR_BLOCK_ID_DATA_LONG 254
+#define XLR_BLOCK_ID_ORIGIN 253
+#define XLR_BLOCK_ID_TOPLEVEL_XID 252
+
+#endif /* XLOGRECORD_H */