diff options
Diffstat (limited to 'src/include/access/xlogreader.h')
-rw-r--r-- | src/include/access/xlogreader.h | 443 |
1 files changed, 443 insertions, 0 deletions
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h new file mode 100644 index 0000000..9e63162 --- /dev/null +++ b/src/include/access/xlogreader.h @@ -0,0 +1,443 @@ +/*------------------------------------------------------------------------- + * + * xlogreader.h + * Definitions for the generic XLog reading facility + * + * Portions Copyright (c) 2013-2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/include/access/xlogreader.h + * + * NOTES + * See the definition of the XLogReaderState struct for instructions on + * how to use the XLogReader infrastructure. + * + * The basic idea is to allocate an XLogReaderState via + * XLogReaderAllocate(), position the reader to the first record with + * XLogBeginRead() or XLogFindNextRecord(), and call XLogReadRecord() + * until it returns NULL. + * + * Callers supply a page_read callback if they want to call + * XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL + * otherwise. The WALRead function can be used as a helper to write + * page_read callbacks, but it is not mandatory; callers that use it, + * must supply segment_open callbacks. The segment_close callback + * must always be supplied. + * + * After reading a record with XLogReadRecord(), it's decomposed into + * the per-block and main data parts, and the parts can be accessed + * with the XLogRec* macros and functions. You can also decode a + * record that's already constructed in memory, without reading from + * disk, by calling the DecodeXLogRecord() function. + *------------------------------------------------------------------------- + */ +#ifndef XLOGREADER_H +#define XLOGREADER_H + +#ifndef FRONTEND +#include "access/transam.h" +#endif + +#include "access/xlogrecord.h" +#include "storage/buf.h" + +/* WALOpenSegment represents a WAL segment being read. */ +typedef struct WALOpenSegment +{ + int ws_file; /* segment file descriptor */ + XLogSegNo ws_segno; /* segment number */ + TimeLineID ws_tli; /* timeline ID of the currently open file */ +} WALOpenSegment; + +/* WALSegmentContext carries context information about WAL segments to read */ +typedef struct WALSegmentContext +{ + char ws_dir[MAXPGPATH]; + int ws_segsize; +} WALSegmentContext; + +typedef struct XLogReaderState XLogReaderState; + +/* Function type definitions for various xlogreader interactions */ +typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader, + XLogRecPtr targetPagePtr, + int reqLen, + XLogRecPtr targetRecPtr, + char *readBuf); +typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader, + XLogSegNo nextSegNo, + TimeLineID *tli_p); +typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader); + +typedef struct XLogReaderRoutine +{ + /* + * Data input callback + * + * This callback shall read at least reqLen valid bytes of the xlog page + * starting at targetPagePtr, and store them in readBuf. The callback + * shall return the number of bytes read (never more than XLOG_BLCKSZ), or + * -1 on failure. The callback shall sleep, if necessary, to wait for the + * requested bytes to become available. The callback will not be invoked + * again for the same page unless more than the returned number of bytes + * are needed. + * + * targetRecPtr is the position of the WAL record we're reading. Usually + * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs + * to read and verify the page or segment header, before it reads the + * actual WAL record it's interested in. In that case, targetRecPtr can + * be used to determine which timeline to read the page from. + * + * The callback shall set ->seg.ws_tli to the TLI of the file the page was + * read from. + */ + XLogPageReadCB page_read; + + /* + * Callback to open the specified WAL segment for reading. ->seg.ws_file + * shall be set to the file descriptor of the opened segment. In case of + * failure, an error shall be raised by the callback and it shall not + * return. + * + * "nextSegNo" is the number of the segment to be opened. + * + * "tli_p" is an input/output argument. WALRead() uses it to pass the + * timeline in which the new segment should be found, but the callback can + * use it to return the TLI that it actually opened. + */ + WALSegmentOpenCB segment_open; + + /* + * WAL segment close callback. ->seg.ws_file shall be set to a negative + * number. + */ + WALSegmentCloseCB segment_close; +} XLogReaderRoutine; + +#define XL_ROUTINE(...) &(XLogReaderRoutine){__VA_ARGS__} + +typedef struct +{ + /* Is this block ref in use? */ + bool in_use; + + /* Identify the block this refers to */ + RelFileNode rnode; + ForkNumber forknum; + BlockNumber blkno; + + /* Prefetching workspace. */ + Buffer prefetch_buffer; + + /* copy of the fork_flags field from the XLogRecordBlockHeader */ + uint8 flags; + + /* Information on full-page image, if any */ + bool has_image; /* has image, even for consistency checking */ + bool apply_image; /* has image that should be restored */ + char *bkp_image; + uint16 hole_offset; + uint16 hole_length; + uint16 bimg_len; + uint8 bimg_info; + + /* Buffer holding the rmgr-specific data associated with this block */ + bool has_data; + char *data; + uint16 data_len; + uint16 data_bufsz; +} DecodedBkpBlock; + +/* + * The decoded contents of a record. This occupies a contiguous region of + * memory, with main_data and blocks[n].data pointing to memory after the + * members declared here. + */ +typedef struct DecodedXLogRecord +{ + /* Private member used for resource management. */ + size_t size; /* total size of decoded record */ + bool oversized; /* outside the regular decode buffer? */ + struct DecodedXLogRecord *next; /* decoded record queue link */ + + /* Public members. */ + XLogRecPtr lsn; /* location */ + XLogRecPtr next_lsn; /* location of next record */ + XLogRecord header; /* header */ + RepOriginId record_origin; + TransactionId toplevel_xid; /* XID of top-level transaction */ + char *main_data; /* record's main data portion */ + uint32 main_data_len; /* main data portion's length */ + int max_block_id; /* highest block_id in use (-1 if none) */ + DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER]; +} DecodedXLogRecord; + +struct XLogReaderState +{ + /* + * Operational callbacks + */ + XLogReaderRoutine routine; + + /* ---------------------------------------- + * Public parameters + * ---------------------------------------- + */ + + /* + * System identifier of the xlog files we're about to read. Set to zero + * (the default value) if unknown or unimportant. + */ + uint64 system_identifier; + + /* + * Opaque data for callbacks to use. Not used by XLogReader. + */ + void *private_data; + + /* + * Start and end point of last record read. EndRecPtr is also used as the + * position to read next. Calling XLogBeginRead() sets EndRecPtr to the + * starting position and ReadRecPtr to invalid. + * + * Start and end point of last record returned by XLogReadRecord(). These + * are also available as record->lsn and record->next_lsn. + */ + XLogRecPtr ReadRecPtr; /* start of last record read */ + XLogRecPtr EndRecPtr; /* end+1 of last record read */ + + /* + * Set at the end of recovery: the start point of a partial record at the + * end of WAL (InvalidXLogRecPtr if there wasn't one), and the start + * location of its first contrecord that went missing. + */ + XLogRecPtr abortedRecPtr; + XLogRecPtr missingContrecPtr; + /* Set when XLP_FIRST_IS_OVERWRITE_CONTRECORD is found */ + XLogRecPtr overwrittenRecPtr; + + + /* ---------------------------------------- + * Decoded representation of current record + * + * Use XLogRecGet* functions to investigate the record; these fields + * should not be accessed directly. + * ---------------------------------------- + * Start and end point of the last record read and decoded by + * XLogReadRecordInternal(). NextRecPtr is also used as the position to + * decode next. Calling XLogBeginRead() sets NextRecPtr and EndRecPtr to + * the requested starting position. + */ + XLogRecPtr DecodeRecPtr; /* start of last record decoded */ + XLogRecPtr NextRecPtr; /* end+1 of last record decoded */ + XLogRecPtr PrevRecPtr; /* start of previous record decoded */ + + /* Last record returned by XLogReadRecord(). */ + DecodedXLogRecord *record; + + /* ---------------------------------------- + * private/internal state + * ---------------------------------------- + */ + + /* + * Buffer for decoded records. This is a circular buffer, though + * individual records can't be split in the middle, so some space is often + * wasted at the end. Oversized records that don't fit in this space are + * allocated separately. + */ + char *decode_buffer; + size_t decode_buffer_size; + bool free_decode_buffer; /* need to free? */ + char *decode_buffer_head; /* data is read from the head */ + char *decode_buffer_tail; /* new data is written at the tail */ + + /* + * Queue of records that have been decoded. This is a linked list that + * usually consists of consecutive records in decode_buffer, but may also + * contain oversized records allocated with palloc(). + */ + DecodedXLogRecord *decode_queue_head; /* oldest decoded record */ + DecodedXLogRecord *decode_queue_tail; /* newest decoded record */ + + /* + * Buffer for currently read page (XLOG_BLCKSZ bytes, valid up to at least + * readLen bytes) + */ + char *readBuf; + uint32 readLen; + + /* last read XLOG position for data currently in readBuf */ + WALSegmentContext segcxt; + WALOpenSegment seg; + uint32 segoff; + + /* + * beginning of prior page read, and its TLI. Doesn't necessarily + * correspond to what's in readBuf; used for timeline sanity checks. + */ + XLogRecPtr latestPagePtr; + TimeLineID latestPageTLI; + + /* beginning of the WAL record being read. */ + XLogRecPtr currRecPtr; + /* timeline to read it from, 0 if a lookup is required */ + TimeLineID currTLI; + + /* + * Safe point to read to in currTLI if current TLI is historical + * (tliSwitchPoint) or InvalidXLogRecPtr if on current timeline. + * + * Actually set to the start of the segment containing the timeline switch + * that ends currTLI's validity, not the LSN of the switch its self, since + * we can't assume the old segment will be present. + */ + XLogRecPtr currTLIValidUntil; + + /* + * If currTLI is not the most recent known timeline, the next timeline to + * read from when currTLIValidUntil is reached. + */ + TimeLineID nextTLI; + + /* + * Buffer for current ReadRecord result (expandable), used when a record + * crosses a page boundary. + */ + char *readRecordBuf; + uint32 readRecordBufSize; + + /* Buffer to hold error message */ + char *errormsg_buf; + bool errormsg_deferred; + + /* + * Flag to indicate to XLogPageReadCB that it should not block waiting for + * data. + */ + bool nonblocking; +}; + +/* + * Check if XLogNextRecord() has any more queued records or an error to return. + */ +static inline bool +XLogReaderHasQueuedRecordOrError(XLogReaderState *state) +{ + return (state->decode_queue_head != NULL) || state->errormsg_deferred; +} + +/* Get a new XLogReader */ +extern XLogReaderState *XLogReaderAllocate(int wal_segment_size, + const char *waldir, + XLogReaderRoutine *routine, + void *private_data); +extern XLogReaderRoutine *LocalXLogReaderRoutine(void); + +/* Free an XLogReader */ +extern void XLogReaderFree(XLogReaderState *state); + +/* Optionally provide a circular decoding buffer to allow readahead. */ +extern void XLogReaderSetDecodeBuffer(XLogReaderState *state, + void *buffer, + size_t size); + +/* Position the XLogReader to given record */ +extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr); +extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr); + +/* Return values from XLogPageReadCB. */ +typedef enum XLogPageReadResult +{ + XLREAD_SUCCESS = 0, /* record is successfully read */ + XLREAD_FAIL = -1, /* failed during reading a record */ + XLREAD_WOULDBLOCK = -2 /* nonblocking mode only, no data */ +} XLogPageReadResult; + +/* Read the next XLog record. Returns NULL on end-of-WAL or failure */ +extern struct XLogRecord *XLogReadRecord(XLogReaderState *state, + char **errormsg); + +/* Consume the next record or error. */ +extern DecodedXLogRecord *XLogNextRecord(XLogReaderState *state, + char **errormsg); + +/* Release the previously returned record, if necessary. */ +extern XLogRecPtr XLogReleasePreviousRecord(XLogReaderState *state); + +/* Try to read ahead, if there is data and space. */ +extern DecodedXLogRecord *XLogReadAhead(XLogReaderState *state, + bool nonblocking); + +/* Validate a page */ +extern bool XLogReaderValidatePageHeader(XLogReaderState *state, + XLogRecPtr recptr, char *phdr); + +/* Forget error produced by XLogReaderValidatePageHeader(). */ +extern void XLogReaderResetError(XLogReaderState *state); + +/* + * Error information from WALRead that both backend and frontend caller can + * process. Currently only errors from pg_pread can be reported. + */ +typedef struct WALReadError +{ + int wre_errno; /* errno set by the last pg_pread() */ + int wre_off; /* Offset we tried to read from. */ + int wre_req; /* Bytes requested to be read. */ + int wre_read; /* Bytes read by the last read(). */ + WALOpenSegment wre_seg; /* Segment we tried to read from. */ +} WALReadError; + +extern bool WALRead(XLogReaderState *state, + char *buf, XLogRecPtr startptr, Size count, + TimeLineID tli, WALReadError *errinfo); + +/* Functions for decoding an XLogRecord */ + +extern size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len); +extern bool DecodeXLogRecord(XLogReaderState *state, + DecodedXLogRecord *decoded, + XLogRecord *record, + XLogRecPtr lsn, + char **errmsg); + +/* + * Macros that provide access to parts of the record most recently returned by + * XLogReadRecord() or XLogNextRecord(). + */ +#define XLogRecGetTotalLen(decoder) ((decoder)->record->header.xl_tot_len) +#define XLogRecGetPrev(decoder) ((decoder)->record->header.xl_prev) +#define XLogRecGetInfo(decoder) ((decoder)->record->header.xl_info) +#define XLogRecGetRmid(decoder) ((decoder)->record->header.xl_rmid) +#define XLogRecGetXid(decoder) ((decoder)->record->header.xl_xid) +#define XLogRecGetOrigin(decoder) ((decoder)->record->record_origin) +#define XLogRecGetTopXid(decoder) ((decoder)->record->toplevel_xid) +#define XLogRecGetData(decoder) ((decoder)->record->main_data) +#define XLogRecGetDataLen(decoder) ((decoder)->record->main_data_len) +#define XLogRecHasAnyBlockRefs(decoder) ((decoder)->record->max_block_id >= 0) +#define XLogRecMaxBlockId(decoder) ((decoder)->record->max_block_id) +#define XLogRecGetBlock(decoder, i) (&(decoder)->record->blocks[(i)]) +#define XLogRecHasBlockRef(decoder, block_id) \ + (((decoder)->record->max_block_id >= (block_id)) && \ + ((decoder)->record->blocks[block_id].in_use)) +#define XLogRecHasBlockImage(decoder, block_id) \ + ((decoder)->record->blocks[block_id].has_image) +#define XLogRecBlockImageApply(decoder, block_id) \ + ((decoder)->record->blocks[block_id].apply_image) + +#ifndef FRONTEND +extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record); +#endif + +extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page); +extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len); +extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, + RelFileNode *rnode, ForkNumber *forknum, + BlockNumber *blknum); +extern bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id, + RelFileNode *rnode, ForkNumber *forknum, + BlockNumber *blknum, + Buffer *prefetch_buffer); + +#endif /* XLOGREADER_H */ |