diff options
Diffstat (limited to 'src/backend/access/transam/generic_xlog.c')
-rw-r--r-- | src/backend/access/transam/generic_xlog.c | 544 |
1 files changed, 544 insertions, 0 deletions
diff --git a/src/backend/access/transam/generic_xlog.c b/src/backend/access/transam/generic_xlog.c new file mode 100644 index 0000000..5164a1c --- /dev/null +++ b/src/backend/access/transam/generic_xlog.c @@ -0,0 +1,544 @@ +/*------------------------------------------------------------------------- + * + * generic_xlog.c + * Implementation of generic xlog records. + * + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/access/transam/generic_xlog.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/bufmask.h" +#include "access/generic_xlog.h" +#include "access/xlogutils.h" +#include "miscadmin.h" +#include "utils/memutils.h" + +/*------------------------------------------------------------------------- + * Internally, a delta between pages consists of a set of fragments. Each + * fragment represents changes made in a given region of a page. A fragment + * is made up as follows: + * + * - offset of page region (OffsetNumber) + * - length of page region (OffsetNumber) + * - data - the data to place into the region ('length' number of bytes) + * + * Unchanged regions of a page are not represented in its delta. As a result, + * a delta can be more compact than the full page image. But having an + * unchanged region between two fragments that is smaller than the fragment + * header (offset+length) does not pay off in terms of the overall size of + * the delta. For this reason, we merge adjacent fragments if the unchanged + * region between them is <= MATCH_THRESHOLD bytes. + * + * We do not bother to merge fragments across the "lower" and "upper" parts + * of a page; it's very seldom the case that pd_lower and pd_upper are within + * MATCH_THRESHOLD bytes of each other, and handling that infrequent case + * would complicate and slow down the delta-computation code unduly. + * Therefore, the worst-case delta size includes two fragment headers plus + * a full page's worth of data. + *------------------------------------------------------------------------- + */ +#define FRAGMENT_HEADER_SIZE (2 * sizeof(OffsetNumber)) +#define MATCH_THRESHOLD FRAGMENT_HEADER_SIZE +#define MAX_DELTA_SIZE (BLCKSZ + 2 * FRAGMENT_HEADER_SIZE) + +/* Struct of generic xlog data for single page */ +typedef struct +{ + Buffer buffer; /* registered buffer */ + int flags; /* flags for this buffer */ + int deltaLen; /* space consumed in delta field */ + char *image; /* copy of page image for modification, do not + * do it in-place to have aligned memory chunk */ + char delta[MAX_DELTA_SIZE]; /* delta between page images */ +} PageData; + +/* State of generic xlog record construction */ +struct GenericXLogState +{ + /* Info about each page, see above */ + PageData pages[MAX_GENERIC_XLOG_PAGES]; + bool isLogged; + /* Page images (properly aligned) */ + PGAlignedBlock images[MAX_GENERIC_XLOG_PAGES]; +}; + +static void writeFragment(PageData *pageData, OffsetNumber offset, + OffsetNumber len, const char *data); +static void computeRegionDelta(PageData *pageData, + const char *curpage, const char *targetpage, + int targetStart, int targetEnd, + int validStart, int validEnd); +static void computeDelta(PageData *pageData, Page curpage, Page targetpage); +static void applyPageRedo(Page page, const char *delta, Size deltaSize); + + +/* + * Write next fragment into pageData's delta. + * + * The fragment has the given offset and length, and data points to the + * actual data (of length length). + */ +static void +writeFragment(PageData *pageData, OffsetNumber offset, OffsetNumber length, + const char *data) +{ + char *ptr = pageData->delta + pageData->deltaLen; + + /* Verify we have enough space */ + Assert(pageData->deltaLen + sizeof(offset) + + sizeof(length) + length <= sizeof(pageData->delta)); + + /* Write fragment data */ + memcpy(ptr, &offset, sizeof(offset)); + ptr += sizeof(offset); + memcpy(ptr, &length, sizeof(length)); + ptr += sizeof(length); + memcpy(ptr, data, length); + ptr += length; + + pageData->deltaLen = ptr - pageData->delta; +} + +/* + * Compute the XLOG fragments needed to transform a region of curpage into the + * corresponding region of targetpage, and append them to pageData's delta + * field. The region to transform runs from targetStart to targetEnd-1. + * Bytes in curpage outside the range validStart to validEnd-1 should be + * considered invalid, and always overwritten with target data. + * + * This function is a hot spot, so it's worth being as tense as possible + * about the data-matching loops. + */ +static void +computeRegionDelta(PageData *pageData, + const char *curpage, const char *targetpage, + int targetStart, int targetEnd, + int validStart, int validEnd) +{ + int i, + loopEnd, + fragmentBegin = -1, + fragmentEnd = -1; + + /* Deal with any invalid start region by including it in first fragment */ + if (validStart > targetStart) + { + fragmentBegin = targetStart; + targetStart = validStart; + } + + /* We'll deal with any invalid end region after the main loop */ + loopEnd = Min(targetEnd, validEnd); + + /* Examine all the potentially matchable bytes */ + i = targetStart; + while (i < loopEnd) + { + if (curpage[i] != targetpage[i]) + { + /* On unmatched byte, start new fragment if not already in one */ + if (fragmentBegin < 0) + fragmentBegin = i; + /* Mark unmatched-data endpoint as uncertain */ + fragmentEnd = -1; + /* Extend the fragment as far as possible in a tight loop */ + i++; + while (i < loopEnd && curpage[i] != targetpage[i]) + i++; + if (i >= loopEnd) + break; + } + + /* Found a matched byte, so remember end of unmatched fragment */ + fragmentEnd = i; + + /* + * Extend the match as far as possible in a tight loop. (On typical + * workloads, this inner loop is the bulk of this function's runtime.) + */ + i++; + while (i < loopEnd && curpage[i] == targetpage[i]) + i++; + + /* + * There are several possible cases at this point: + * + * 1. We have no unwritten fragment (fragmentBegin < 0). There's + * nothing to write; and it doesn't matter what fragmentEnd is. + * + * 2. We found more than MATCH_THRESHOLD consecutive matching bytes. + * Dump out the unwritten fragment, stopping at fragmentEnd. + * + * 3. The match extends to loopEnd. We'll do nothing here, exit the + * loop, and then dump the unwritten fragment, after merging it with + * the invalid end region if any. If we don't so merge, fragmentEnd + * establishes how much the final writeFragment call needs to write. + * + * 4. We found an unmatched byte before loopEnd. The loop will repeat + * and will enter the unmatched-byte stanza above. So in this case + * also, it doesn't matter what fragmentEnd is. The matched bytes + * will get merged into the continuing unmatched fragment. + * + * Only in case 3 do we reach the bottom of the loop with a meaningful + * fragmentEnd value, which is why it's OK that we unconditionally + * assign "fragmentEnd = i" above. + */ + if (fragmentBegin >= 0 && i - fragmentEnd > MATCH_THRESHOLD) + { + writeFragment(pageData, fragmentBegin, + fragmentEnd - fragmentBegin, + targetpage + fragmentBegin); + fragmentBegin = -1; + fragmentEnd = -1; /* not really necessary */ + } + } + + /* Deal with any invalid end region by including it in final fragment */ + if (loopEnd < targetEnd) + { + if (fragmentBegin < 0) + fragmentBegin = loopEnd; + fragmentEnd = targetEnd; + } + + /* Write final fragment if any */ + if (fragmentBegin >= 0) + { + if (fragmentEnd < 0) + fragmentEnd = targetEnd; + writeFragment(pageData, fragmentBegin, + fragmentEnd - fragmentBegin, + targetpage + fragmentBegin); + } +} + +/* + * Compute the XLOG delta record needed to transform curpage into targetpage, + * and store it in pageData's delta field. + */ +static void +computeDelta(PageData *pageData, Page curpage, Page targetpage) +{ + int targetLower = ((PageHeader) targetpage)->pd_lower, + targetUpper = ((PageHeader) targetpage)->pd_upper, + curLower = ((PageHeader) curpage)->pd_lower, + curUpper = ((PageHeader) curpage)->pd_upper; + + pageData->deltaLen = 0; + + /* Compute delta records for lower part of page ... */ + computeRegionDelta(pageData, curpage, targetpage, + 0, targetLower, + 0, curLower); + /* ... and for upper part, ignoring what's between */ + computeRegionDelta(pageData, curpage, targetpage, + targetUpper, BLCKSZ, + curUpper, BLCKSZ); + + /* + * If xlog debug is enabled, then check produced delta. Result of delta + * application to curpage should be equivalent to targetpage. + */ +#ifdef WAL_DEBUG + if (XLOG_DEBUG) + { + PGAlignedBlock tmp; + + memcpy(tmp.data, curpage, BLCKSZ); + applyPageRedo(tmp.data, pageData->delta, pageData->deltaLen); + if (memcmp(tmp.data, targetpage, targetLower) != 0 || + memcmp(tmp.data + targetUpper, targetpage + targetUpper, + BLCKSZ - targetUpper) != 0) + elog(ERROR, "result of generic xlog apply does not match"); + } +#endif +} + +/* + * Start new generic xlog record for modifications to specified relation. + */ +GenericXLogState * +GenericXLogStart(Relation relation) +{ + GenericXLogState *state; + int i; + + state = (GenericXLogState *) palloc(sizeof(GenericXLogState)); + state->isLogged = RelationNeedsWAL(relation); + + for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++) + { + state->pages[i].image = state->images[i].data; + state->pages[i].buffer = InvalidBuffer; + } + + return state; +} + +/* + * Register new buffer for generic xlog record. + * + * Returns pointer to the page's image in the GenericXLogState, which + * is what the caller should modify. + * + * If the buffer is already registered, just return its existing entry. + * (It's not very clear what to do with the flags in such a case, but + * for now we stay with the original flags.) + */ +Page +GenericXLogRegisterBuffer(GenericXLogState *state, Buffer buffer, int flags) +{ + int block_id; + + /* Search array for existing entry or first unused slot */ + for (block_id = 0; block_id < MAX_GENERIC_XLOG_PAGES; block_id++) + { + PageData *page = &state->pages[block_id]; + + if (BufferIsInvalid(page->buffer)) + { + /* Empty slot, so use it (there cannot be a match later) */ + page->buffer = buffer; + page->flags = flags; + memcpy(page->image, BufferGetPage(buffer), BLCKSZ); + return (Page) page->image; + } + else if (page->buffer == buffer) + { + /* + * Buffer is already registered. Just return the image, which is + * already prepared. + */ + return (Page) page->image; + } + } + + elog(ERROR, "maximum number %d of generic xlog buffers is exceeded", + MAX_GENERIC_XLOG_PAGES); + /* keep compiler quiet */ + return NULL; +} + +/* + * Apply changes represented by GenericXLogState to the actual buffers, + * and emit a generic xlog record. + */ +XLogRecPtr +GenericXLogFinish(GenericXLogState *state) +{ + XLogRecPtr lsn; + int i; + + if (state->isLogged) + { + /* Logged relation: make xlog record in critical section. */ + XLogBeginInsert(); + + START_CRIT_SECTION(); + + for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++) + { + PageData *pageData = &state->pages[i]; + Page page; + PageHeader pageHeader; + + if (BufferIsInvalid(pageData->buffer)) + continue; + + page = BufferGetPage(pageData->buffer); + pageHeader = (PageHeader) pageData->image; + + if (pageData->flags & GENERIC_XLOG_FULL_IMAGE) + { + /* + * A full-page image does not require us to supply any xlog + * data. Just apply the image, being careful to zero the + * "hole" between pd_lower and pd_upper in order to avoid + * divergence between actual page state and what replay would + * produce. + */ + memcpy(page, pageData->image, pageHeader->pd_lower); + memset(page + pageHeader->pd_lower, 0, + pageHeader->pd_upper - pageHeader->pd_lower); + memcpy(page + pageHeader->pd_upper, + pageData->image + pageHeader->pd_upper, + BLCKSZ - pageHeader->pd_upper); + + XLogRegisterBuffer(i, pageData->buffer, + REGBUF_FORCE_IMAGE | REGBUF_STANDARD); + } + else + { + /* + * In normal mode, calculate delta and write it as xlog data + * associated with this page. + */ + computeDelta(pageData, page, (Page) pageData->image); + + /* Apply the image, with zeroed "hole" as above */ + memcpy(page, pageData->image, pageHeader->pd_lower); + memset(page + pageHeader->pd_lower, 0, + pageHeader->pd_upper - pageHeader->pd_lower); + memcpy(page + pageHeader->pd_upper, + pageData->image + pageHeader->pd_upper, + BLCKSZ - pageHeader->pd_upper); + + XLogRegisterBuffer(i, pageData->buffer, REGBUF_STANDARD); + XLogRegisterBufData(i, pageData->delta, pageData->deltaLen); + } + } + + /* Insert xlog record */ + lsn = XLogInsert(RM_GENERIC_ID, 0); + + /* Set LSN and mark buffers dirty */ + for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++) + { + PageData *pageData = &state->pages[i]; + + if (BufferIsInvalid(pageData->buffer)) + continue; + PageSetLSN(BufferGetPage(pageData->buffer), lsn); + MarkBufferDirty(pageData->buffer); + } + END_CRIT_SECTION(); + } + else + { + /* Unlogged relation: skip xlog-related stuff */ + START_CRIT_SECTION(); + for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++) + { + PageData *pageData = &state->pages[i]; + + if (BufferIsInvalid(pageData->buffer)) + continue; + memcpy(BufferGetPage(pageData->buffer), + pageData->image, + BLCKSZ); + /* We don't worry about zeroing the "hole" in this case */ + MarkBufferDirty(pageData->buffer); + } + END_CRIT_SECTION(); + /* We don't have a LSN to return, in this case */ + lsn = InvalidXLogRecPtr; + } + + pfree(state); + + return lsn; +} + +/* + * Abort generic xlog record construction. No changes are applied to buffers. + * + * Note: caller is responsible for releasing locks/pins on buffers, if needed. + */ +void +GenericXLogAbort(GenericXLogState *state) +{ + pfree(state); +} + +/* + * Apply delta to given page image. + */ +static void +applyPageRedo(Page page, const char *delta, Size deltaSize) +{ + const char *ptr = delta; + const char *end = delta + deltaSize; + + while (ptr < end) + { + OffsetNumber offset, + length; + + memcpy(&offset, ptr, sizeof(offset)); + ptr += sizeof(offset); + memcpy(&length, ptr, sizeof(length)); + ptr += sizeof(length); + + memcpy(page + offset, ptr, length); + + ptr += length; + } +} + +/* + * Redo function for generic xlog record. + */ +void +generic_redo(XLogReaderState *record) +{ + XLogRecPtr lsn = record->EndRecPtr; + Buffer buffers[MAX_GENERIC_XLOG_PAGES]; + uint8 block_id; + + /* Protect limited size of buffers[] array */ + Assert(record->max_block_id < MAX_GENERIC_XLOG_PAGES); + + /* Iterate over blocks */ + for (block_id = 0; block_id <= record->max_block_id; block_id++) + { + XLogRedoAction action; + + if (!XLogRecHasBlockRef(record, block_id)) + { + buffers[block_id] = InvalidBuffer; + continue; + } + + action = XLogReadBufferForRedo(record, block_id, &buffers[block_id]); + + /* Apply redo to given block if needed */ + if (action == BLK_NEEDS_REDO) + { + Page page; + PageHeader pageHeader; + char *blockDelta; + Size blockDeltaSize; + + page = BufferGetPage(buffers[block_id]); + blockDelta = XLogRecGetBlockData(record, block_id, &blockDeltaSize); + applyPageRedo(page, blockDelta, blockDeltaSize); + + /* + * Since the delta contains no information about what's in the + * "hole" between pd_lower and pd_upper, set that to zero to + * ensure we produce the same page state that application of the + * logged action by GenericXLogFinish did. + */ + pageHeader = (PageHeader) page; + memset(page + pageHeader->pd_lower, 0, + pageHeader->pd_upper - pageHeader->pd_lower); + + PageSetLSN(page, lsn); + MarkBufferDirty(buffers[block_id]); + } + } + + /* Changes are done: unlock and release all buffers */ + for (block_id = 0; block_id <= record->max_block_id; block_id++) + { + if (BufferIsValid(buffers[block_id])) + UnlockReleaseBuffer(buffers[block_id]); + } +} + +/* + * Mask a generic page before performing consistency checks on it. + */ +void +generic_mask(char *page, BlockNumber blkno) +{ + mask_page_lsn_and_checksum(page); + + mask_unused_space(page); +} |