summaryrefslogtreecommitdiffstats
path: root/src/backend/access/transam/timeline.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/timeline.c')
-rw-r--r--src/backend/access/transam/timeline.c600
1 files changed, 600 insertions, 0 deletions
diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c
new file mode 100644
index 0000000..be21968
--- /dev/null
+++ b/src/backend/access/transam/timeline.c
@@ -0,0 +1,600 @@
+/*-------------------------------------------------------------------------
+ *
+ * timeline.c
+ * Functions for reading and writing timeline history files.
+ *
+ * A timeline history file lists the timeline changes of the timeline, in
+ * a simple text format. They are archived along with the WAL segments.
+ *
+ * The files are named like "<tli>.history". For example, if the database
+ * starts up and switches to timeline 5, the timeline history file would be
+ * called "00000005.history".
+ *
+ * Each line in the file represents a timeline switch:
+ *
+ * <parentTLI> <switchpoint> <reason>
+ *
+ * parentTLI ID of the parent timeline
+ * switchpoint XLogRecPtr of the WAL location where the switch happened
+ * reason human-readable explanation of why the timeline was changed
+ *
+ * The fields are separated by tabs. Lines beginning with # are comments, and
+ * are ignored. Empty lines are also ignored.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/timeline.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "access/timeline.h"
+#include "access/xlog.h"
+#include "access/xlog_internal.h"
+#include "access/xlogarchive.h"
+#include "access/xlogdefs.h"
+#include "pgstat.h"
+#include "storage/fd.h"
+
+/*
+ * Copies all timeline history files with id's between 'begin' and 'end'
+ * from archive to pg_wal.
+ */
+void
+restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
+{
+ char path[MAXPGPATH];
+ char histfname[MAXFNAMELEN];
+ TimeLineID tli;
+
+ for (tli = begin; tli < end; tli++)
+ {
+ if (tli == 1)
+ continue;
+
+ TLHistoryFileName(histfname, tli);
+ if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
+ KeepFileRestoredFromArchive(path, histfname);
+ }
+}
+
+/*
+ * Try to read a timeline's history file.
+ *
+ * If successful, return the list of component TLIs (the given TLI followed by
+ * its ancestor TLIs). If we can't find the history file, assume that the
+ * timeline has no parents, and return a list of just the specified timeline
+ * ID.
+ */
+List *
+readTimeLineHistory(TimeLineID targetTLI)
+{
+ List *result;
+ char path[MAXPGPATH];
+ char histfname[MAXFNAMELEN];
+ FILE *fd;
+ TimeLineHistoryEntry *entry;
+ TimeLineID lasttli = 0;
+ XLogRecPtr prevend;
+ bool fromArchive = false;
+
+ /* Timeline 1 does not have a history file, so no need to check */
+ if (targetTLI == 1)
+ {
+ entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
+ entry->tli = targetTLI;
+ entry->begin = entry->end = InvalidXLogRecPtr;
+ return list_make1(entry);
+ }
+
+ if (ArchiveRecoveryRequested)
+ {
+ TLHistoryFileName(histfname, targetTLI);
+ fromArchive =
+ RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
+ }
+ else
+ TLHistoryFilePath(path, targetTLI);
+
+ fd = AllocateFile(path, "r");
+ if (fd == NULL)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", path)));
+ /* Not there, so assume no parents */
+ entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
+ entry->tli = targetTLI;
+ entry->begin = entry->end = InvalidXLogRecPtr;
+ return list_make1(entry);
+ }
+
+ result = NIL;
+
+ /*
+ * Parse the file...
+ */
+ prevend = InvalidXLogRecPtr;
+ for (;;)
+ {
+ char fline[MAXPGPATH];
+ char *res;
+ char *ptr;
+ TimeLineID tli;
+ uint32 switchpoint_hi;
+ uint32 switchpoint_lo;
+ int nfields;
+
+ pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
+ res = fgets(fline, sizeof(fline), fd);
+ pgstat_report_wait_end();
+ if (res == NULL)
+ {
+ if (ferror(fd))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m", path)));
+
+ break;
+ }
+
+ /* skip leading whitespace and check for # comment */
+ for (ptr = fline; *ptr; ptr++)
+ {
+ if (!isspace((unsigned char) *ptr))
+ break;
+ }
+ if (*ptr == '\0' || *ptr == '#')
+ continue;
+
+ nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
+
+ if (nfields < 1)
+ {
+ /* expect a numeric timeline ID as first field of line */
+ ereport(FATAL,
+ (errmsg("syntax error in history file: %s", fline),
+ errhint("Expected a numeric timeline ID.")));
+ }
+ if (nfields != 3)
+ ereport(FATAL,
+ (errmsg("syntax error in history file: %s", fline),
+ errhint("Expected a write-ahead log switchpoint location.")));
+
+ if (result && tli <= lasttli)
+ ereport(FATAL,
+ (errmsg("invalid data in history file: %s", fline),
+ errhint("Timeline IDs must be in increasing sequence.")));
+
+ lasttli = tli;
+
+ entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
+ entry->tli = tli;
+ entry->begin = prevend;
+ entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
+ prevend = entry->end;
+
+ /* Build list with newest item first */
+ result = lcons(entry, result);
+
+ /* we ignore the remainder of each line */
+ }
+
+ FreeFile(fd);
+
+ if (result && targetTLI <= lasttli)
+ ereport(FATAL,
+ (errmsg("invalid data in history file \"%s\"", path),
+ errhint("Timeline IDs must be less than child timeline's ID.")));
+
+ /*
+ * Create one more entry for the "tip" of the timeline, which has no entry
+ * in the history file.
+ */
+ entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
+ entry->tli = targetTLI;
+ entry->begin = prevend;
+ entry->end = InvalidXLogRecPtr;
+
+ result = lcons(entry, result);
+
+ /*
+ * If the history file was fetched from archive, save it in pg_wal for
+ * future reference.
+ */
+ if (fromArchive)
+ KeepFileRestoredFromArchive(path, histfname);
+
+ return result;
+}
+
+/*
+ * Probe whether a timeline history file exists for the given timeline ID
+ */
+bool
+existsTimeLineHistory(TimeLineID probeTLI)
+{
+ char path[MAXPGPATH];
+ char histfname[MAXFNAMELEN];
+ FILE *fd;
+
+ /* Timeline 1 does not have a history file, so no need to check */
+ if (probeTLI == 1)
+ return false;
+
+ if (ArchiveRecoveryRequested)
+ {
+ TLHistoryFileName(histfname, probeTLI);
+ RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
+ }
+ else
+ TLHistoryFilePath(path, probeTLI);
+
+ fd = AllocateFile(path, "r");
+ if (fd != NULL)
+ {
+ FreeFile(fd);
+ return true;
+ }
+ else
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", path)));
+ return false;
+ }
+}
+
+/*
+ * Find the newest existing timeline, assuming that startTLI exists.
+ *
+ * Note: while this is somewhat heuristic, it does positively guarantee
+ * that (result + 1) is not a known timeline, and therefore it should
+ * be safe to assign that ID to a new timeline.
+ */
+TimeLineID
+findNewestTimeLine(TimeLineID startTLI)
+{
+ TimeLineID newestTLI;
+ TimeLineID probeTLI;
+
+ /*
+ * The algorithm is just to probe for the existence of timeline history
+ * files. XXX is it useful to allow gaps in the sequence?
+ */
+ newestTLI = startTLI;
+
+ for (probeTLI = startTLI + 1;; probeTLI++)
+ {
+ if (existsTimeLineHistory(probeTLI))
+ {
+ newestTLI = probeTLI; /* probeTLI exists */
+ }
+ else
+ {
+ /* doesn't exist, assume we're done */
+ break;
+ }
+ }
+
+ return newestTLI;
+}
+
+/*
+ * Create a new timeline history file.
+ *
+ * newTLI: ID of the new timeline
+ * parentTLI: ID of its immediate parent
+ * switchpoint: WAL location where the system switched to the new timeline
+ * reason: human-readable explanation of why the timeline was switched
+ *
+ * Currently this is only used at the end recovery, and so there are no locking
+ * considerations. But we should be just as tense as XLogFileInit to avoid
+ * emplacing a bogus file.
+ */
+void
+writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
+ XLogRecPtr switchpoint, char *reason)
+{
+ char path[MAXPGPATH];
+ char tmppath[MAXPGPATH];
+ char histfname[MAXFNAMELEN];
+ char buffer[BLCKSZ];
+ int srcfd;
+ int fd;
+ int nbytes;
+
+ Assert(newTLI > parentTLI); /* else bad selection of newTLI */
+
+ /*
+ * Write into a temp file name.
+ */
+ snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
+
+ unlink(tmppath);
+
+ /* do not use get_sync_bit() here --- want to fsync only at end of fill */
+ fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
+ if (fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m", tmppath)));
+
+ /*
+ * If a history file exists for the parent, copy it verbatim
+ */
+ if (ArchiveRecoveryRequested)
+ {
+ TLHistoryFileName(histfname, parentTLI);
+ RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
+ }
+ else
+ TLHistoryFilePath(path, parentTLI);
+
+ srcfd = OpenTransientFile(path, O_RDONLY);
+ if (srcfd < 0)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", path)));
+ /* Not there, so assume parent has no parents */
+ }
+ else
+ {
+ for (;;)
+ {
+ errno = 0;
+ pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
+ nbytes = (int) read(srcfd, buffer, sizeof(buffer));
+ pgstat_report_wait_end();
+ if (nbytes < 0 || errno != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m", path)));
+ if (nbytes == 0)
+ break;
+ errno = 0;
+ pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
+ if ((int) write(fd, buffer, nbytes) != nbytes)
+ {
+ int save_errno = errno;
+
+ /*
+ * If we fail to make the file, delete it to release disk
+ * space
+ */
+ unlink(tmppath);
+
+ /*
+ * if write didn't set errno, assume problem is no disk space
+ */
+ errno = save_errno ? save_errno : ENOSPC;
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\": %m", tmppath)));
+ }
+ pgstat_report_wait_end();
+ }
+
+ if (CloseTransientFile(srcfd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m", path)));
+ }
+
+ /*
+ * Append one line with the details of this timeline split.
+ *
+ * If we did have a parent file, insert an extra newline just in case the
+ * parent file failed to end with one.
+ */
+ snprintf(buffer, sizeof(buffer),
+ "%s%u\t%X/%X\t%s\n",
+ (srcfd < 0) ? "" : "\n",
+ parentTLI,
+ LSN_FORMAT_ARGS(switchpoint),
+ reason);
+
+ nbytes = strlen(buffer);
+ errno = 0;
+ pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
+ if ((int) write(fd, buffer, nbytes) != nbytes)
+ {
+ int save_errno = errno;
+
+ /*
+ * If we fail to make the file, delete it to release disk space
+ */
+ unlink(tmppath);
+ /* if write didn't set errno, assume problem is no disk space */
+ errno = save_errno ? save_errno : ENOSPC;
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\": %m", tmppath)));
+ }
+ pgstat_report_wait_end();
+
+ pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
+ if (pg_fsync(fd) != 0)
+ ereport(data_sync_elevel(ERROR),
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m", tmppath)));
+ pgstat_report_wait_end();
+
+ if (CloseTransientFile(fd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m", tmppath)));
+
+ /*
+ * Now move the completed history file into place with its final name.
+ */
+ TLHistoryFilePath(path, newTLI);
+
+ /*
+ * Perform the rename using link if available, paranoidly trying to avoid
+ * overwriting an existing file (there shouldn't be one).
+ */
+ durable_rename_excl(tmppath, path, ERROR);
+
+ /* The history file can be archived immediately. */
+ if (XLogArchivingActive())
+ {
+ TLHistoryFileName(histfname, newTLI);
+ XLogArchiveNotify(histfname);
+ }
+}
+
+/*
+ * Writes a history file for given timeline and contents.
+ *
+ * Currently this is only used in the walreceiver process, and so there are
+ * no locking considerations. But we should be just as tense as XLogFileInit
+ * to avoid emplacing a bogus file.
+ */
+void
+writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
+{
+ char path[MAXPGPATH];
+ char tmppath[MAXPGPATH];
+ int fd;
+
+ /*
+ * Write into a temp file name.
+ */
+ snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
+
+ unlink(tmppath);
+
+ /* do not use get_sync_bit() here --- want to fsync only at end of fill */
+ fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
+ if (fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m", tmppath)));
+
+ errno = 0;
+ pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
+ if ((int) write(fd, content, size) != size)
+ {
+ int save_errno = errno;
+
+ /*
+ * If we fail to make the file, delete it to release disk space
+ */
+ unlink(tmppath);
+ /* if write didn't set errno, assume problem is no disk space */
+ errno = save_errno ? save_errno : ENOSPC;
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\": %m", tmppath)));
+ }
+ pgstat_report_wait_end();
+
+ pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
+ if (pg_fsync(fd) != 0)
+ ereport(data_sync_elevel(ERROR),
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m", tmppath)));
+ pgstat_report_wait_end();
+
+ if (CloseTransientFile(fd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m", tmppath)));
+
+ /*
+ * Now move the completed history file into place with its final name.
+ */
+ TLHistoryFilePath(path, tli);
+
+ /*
+ * Perform the rename using link if available, paranoidly trying to avoid
+ * overwriting an existing file (there shouldn't be one).
+ */
+ durable_rename_excl(tmppath, path, ERROR);
+}
+
+/*
+ * Returns true if 'expectedTLEs' contains a timeline with id 'tli'
+ */
+bool
+tliInHistory(TimeLineID tli, List *expectedTLEs)
+{
+ ListCell *cell;
+
+ foreach(cell, expectedTLEs)
+ {
+ if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Returns the ID of the timeline in use at a particular point in time, in
+ * the given timeline history.
+ */
+TimeLineID
+tliOfPointInHistory(XLogRecPtr ptr, List *history)
+{
+ ListCell *cell;
+
+ foreach(cell, history)
+ {
+ TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
+
+ if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
+ (XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
+ {
+ /* found it */
+ return tle->tli;
+ }
+ }
+
+ /* shouldn't happen. */
+ elog(ERROR, "timeline history was not contiguous");
+ return 0; /* keep compiler quiet */
+}
+
+/*
+ * Returns the point in history where we branched off the given timeline,
+ * and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
+ * the timeline is current, ie. we have not branched off from it, and throws
+ * an error if the timeline is not part of this server's history.
+ */
+XLogRecPtr
+tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
+{
+ ListCell *cell;
+
+ if (nextTLI)
+ *nextTLI = 0;
+ foreach(cell, history)
+ {
+ TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
+
+ if (tle->tli == tli)
+ return tle->end;
+ if (nextTLI)
+ *nextTLI = tle->tli;
+ }
+
+ ereport(ERROR,
+ (errmsg("requested timeline %u is not in this server's history",
+ tli)));
+ return InvalidXLogRecPtr; /* keep compiler quiet */
+}