diff options
Diffstat (limited to 'src/backend/storage/freespace/fsmpage.c')
-rw-r--r-- | src/backend/storage/freespace/fsmpage.c | 374 |
1 files changed, 374 insertions, 0 deletions
diff --git a/src/backend/storage/freespace/fsmpage.c b/src/backend/storage/freespace/fsmpage.c new file mode 100644 index 0000000..50f0ada --- /dev/null +++ b/src/backend/storage/freespace/fsmpage.c @@ -0,0 +1,374 @@ +/*------------------------------------------------------------------------- + * + * fsmpage.c + * routines to search and manipulate one FSM page. + * + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/storage/freespace/fsmpage.c + * + * NOTES: + * + * The public functions in this file form an API that hides the internal + * structure of a FSM page. This allows freespace.c to treat each FSM page + * as a black box with SlotsPerPage "slots". fsm_set_avail() and + * fsm_get_avail() let you get/set the value of a slot, and + * fsm_search_avail() lets you search for a slot with value >= X. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/bufmgr.h" +#include "storage/fsm_internals.h" + +/* Macros to navigate the tree within a page. Root has index zero. */ +#define leftchild(x) (2 * (x) + 1) +#define rightchild(x) (2 * (x) + 2) +#define parentof(x) (((x) - 1) / 2) + +/* + * Find right neighbor of x, wrapping around within the level + */ +static int +rightneighbor(int x) +{ + /* + * Move right. This might wrap around, stepping to the leftmost node at + * the next level. + */ + x++; + + /* + * Check if we stepped to the leftmost node at next level, and correct if + * so. The leftmost nodes at each level are numbered x = 2^level - 1, so + * check if (x + 1) is a power of two, using a standard + * twos-complement-arithmetic trick. + */ + if (((x + 1) & x) == 0) + x = parentof(x); + + return x; +} + +/* + * Sets the value of a slot on page. Returns true if the page was modified. + * + * The caller must hold an exclusive lock on the page. + */ +bool +fsm_set_avail(Page page, int slot, uint8 value) +{ + int nodeno = NonLeafNodesPerPage + slot; + FSMPage fsmpage = (FSMPage) PageGetContents(page); + uint8 oldvalue; + + Assert(slot < LeafNodesPerPage); + + oldvalue = fsmpage->fp_nodes[nodeno]; + + /* If the value hasn't changed, we don't need to do anything */ + if (oldvalue == value && value <= fsmpage->fp_nodes[0]) + return false; + + fsmpage->fp_nodes[nodeno] = value; + + /* + * Propagate up, until we hit the root or a node that doesn't need to be + * updated. + */ + do + { + uint8 newvalue = 0; + int lchild; + int rchild; + + nodeno = parentof(nodeno); + lchild = leftchild(nodeno); + rchild = lchild + 1; + + newvalue = fsmpage->fp_nodes[lchild]; + if (rchild < NodesPerPage) + newvalue = Max(newvalue, + fsmpage->fp_nodes[rchild]); + + oldvalue = fsmpage->fp_nodes[nodeno]; + if (oldvalue == newvalue) + break; + + fsmpage->fp_nodes[nodeno] = newvalue; + } while (nodeno > 0); + + /* + * sanity check: if the new value is (still) higher than the value at the + * top, the tree is corrupt. If so, rebuild. + */ + if (value > fsmpage->fp_nodes[0]) + fsm_rebuild_page(page); + + return true; +} + +/* + * Returns the value of given slot on page. + * + * Since this is just a read-only access of a single byte, the page doesn't + * need to be locked. + */ +uint8 +fsm_get_avail(Page page, int slot) +{ + FSMPage fsmpage = (FSMPage) PageGetContents(page); + + Assert(slot < LeafNodesPerPage); + + return fsmpage->fp_nodes[NonLeafNodesPerPage + slot]; +} + +/* + * Returns the value at the root of a page. + * + * Since this is just a read-only access of a single byte, the page doesn't + * need to be locked. + */ +uint8 +fsm_get_max_avail(Page page) +{ + FSMPage fsmpage = (FSMPage) PageGetContents(page); + + return fsmpage->fp_nodes[0]; +} + +/* + * Searches for a slot with category at least minvalue. + * Returns slot number, or -1 if none found. + * + * The caller must hold at least a shared lock on the page, and this + * function can unlock and lock the page again in exclusive mode if it + * needs to be updated. exclusive_lock_held should be set to true if the + * caller is already holding an exclusive lock, to avoid extra work. + * + * If advancenext is false, fp_next_slot is set to point to the returned + * slot, and if it's true, to the slot after the returned slot. + */ +int +fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext, + bool exclusive_lock_held) +{ + Page page = BufferGetPage(buf); + FSMPage fsmpage = (FSMPage) PageGetContents(page); + int nodeno; + int target; + uint16 slot; + +restart: + + /* + * Check the root first, and exit quickly if there's no leaf with enough + * free space + */ + if (fsmpage->fp_nodes[0] < minvalue) + return -1; + + /* + * Start search using fp_next_slot. It's just a hint, so check that it's + * sane. (This also handles wrapping around when the prior call returned + * the last slot on the page.) + */ + target = fsmpage->fp_next_slot; + if (target < 0 || target >= LeafNodesPerPage) + target = 0; + target += NonLeafNodesPerPage; + + /*---------- + * Start the search from the target slot. At every step, move one + * node to the right, then climb up to the parent. Stop when we reach + * a node with enough free space (as we must, since the root has enough + * space). + * + * The idea is to gradually expand our "search triangle", that is, all + * nodes covered by the current node, and to be sure we search to the + * right from the start point. At the first step, only the target slot + * is examined. When we move up from a left child to its parent, we are + * adding the right-hand subtree of that parent to the search triangle. + * When we move right then up from a right child, we are dropping the + * current search triangle (which we know doesn't contain any suitable + * page) and instead looking at the next-larger-size triangle to its + * right. So we never look left from our original start point, and at + * each step the size of the search triangle doubles, ensuring it takes + * only log2(N) work to search N pages. + * + * The "move right" operation will wrap around if it hits the right edge + * of the tree, so the behavior is still good if we start near the right. + * Note also that the move-and-climb behavior ensures that we can't end + * up on one of the missing nodes at the right of the leaf level. + * + * For example, consider this tree: + * + * 7 + * 7 6 + * 5 7 6 5 + * 4 5 5 7 2 6 5 2 + * T + * + * Assume that the target node is the node indicated by the letter T, + * and we're searching for a node with value of 6 or higher. The search + * begins at T. At the first iteration, we move to the right, then to the + * parent, arriving at the rightmost 5. At the second iteration, we move + * to the right, wrapping around, then climb up, arriving at the 7 on the + * third level. 7 satisfies our search, so we descend down to the bottom, + * following the path of sevens. This is in fact the first suitable page + * to the right of (allowing for wraparound) our start point. + *---------- + */ + nodeno = target; + while (nodeno > 0) + { + if (fsmpage->fp_nodes[nodeno] >= minvalue) + break; + + /* + * Move to the right, wrapping around on same level if necessary, then + * climb up. + */ + nodeno = parentof(rightneighbor(nodeno)); + } + + /* + * We're now at a node with enough free space, somewhere in the middle of + * the tree. Descend to the bottom, following a path with enough free + * space, preferring to move left if there's a choice. + */ + while (nodeno < NonLeafNodesPerPage) + { + int childnodeno = leftchild(nodeno); + + if (childnodeno < NodesPerPage && + fsmpage->fp_nodes[childnodeno] >= minvalue) + { + nodeno = childnodeno; + continue; + } + childnodeno++; /* point to right child */ + if (childnodeno < NodesPerPage && + fsmpage->fp_nodes[childnodeno] >= minvalue) + { + nodeno = childnodeno; + } + else + { + /* + * Oops. The parent node promised that either left or right child + * has enough space, but neither actually did. This can happen in + * case of a "torn page", IOW if we crashed earlier while writing + * the page to disk, and only part of the page made it to disk. + * + * Fix the corruption and restart. + */ + RelFileNode rnode; + ForkNumber forknum; + BlockNumber blknum; + + BufferGetTag(buf, &rnode, &forknum, &blknum); + elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u", + blknum, rnode.spcNode, rnode.dbNode, rnode.relNode); + + /* make sure we hold an exclusive lock */ + if (!exclusive_lock_held) + { + LockBuffer(buf, BUFFER_LOCK_UNLOCK); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + exclusive_lock_held = true; + } + fsm_rebuild_page(page); + MarkBufferDirtyHint(buf, false); + goto restart; + } + } + + /* We're now at the bottom level, at a node with enough space. */ + slot = nodeno - NonLeafNodesPerPage; + + /* + * Update the next-target pointer. Note that we do this even if we're only + * holding a shared lock, on the grounds that it's better to use a shared + * lock and get a garbled next pointer every now and then, than take the + * concurrency hit of an exclusive lock. + * + * Wrap-around is handled at the beginning of this function. + */ + fsmpage->fp_next_slot = slot + (advancenext ? 1 : 0); + + return slot; +} + +/* + * Sets the available space to zero for all slots numbered >= nslots. + * Returns true if the page was modified. + */ +bool +fsm_truncate_avail(Page page, int nslots) +{ + FSMPage fsmpage = (FSMPage) PageGetContents(page); + uint8 *ptr; + bool changed = false; + + Assert(nslots >= 0 && nslots < LeafNodesPerPage); + + /* Clear all truncated leaf nodes */ + ptr = &fsmpage->fp_nodes[NonLeafNodesPerPage + nslots]; + for (; ptr < &fsmpage->fp_nodes[NodesPerPage]; ptr++) + { + if (*ptr != 0) + changed = true; + *ptr = 0; + } + + /* Fix upper nodes. */ + if (changed) + fsm_rebuild_page(page); + + return changed; +} + +/* + * Reconstructs the upper levels of a page. Returns true if the page + * was modified. + */ +bool +fsm_rebuild_page(Page page) +{ + FSMPage fsmpage = (FSMPage) PageGetContents(page); + bool changed = false; + int nodeno; + + /* + * Start from the lowest non-leaf level, at last node, working our way + * backwards, through all non-leaf nodes at all levels, up to the root. + */ + for (nodeno = NonLeafNodesPerPage - 1; nodeno >= 0; nodeno--) + { + int lchild = leftchild(nodeno); + int rchild = lchild + 1; + uint8 newvalue = 0; + + /* The first few nodes we examine might have zero or one child. */ + if (lchild < NodesPerPage) + newvalue = fsmpage->fp_nodes[lchild]; + + if (rchild < NodesPerPage) + newvalue = Max(newvalue, + fsmpage->fp_nodes[rchild]); + + if (fsmpage->fp_nodes[nodeno] != newvalue) + { + fsmpage->fp_nodes[nodeno] = newvalue; + changed = true; + } + } + + return changed; +} |