summaryrefslogtreecommitdiffstats
path: root/src/backend/access/transam/xlogfuncs.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/xlogfuncs.c')
-rw-r--r--src/backend/access/transam/xlogfuncs.c648
1 files changed, 648 insertions, 0 deletions
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
new file mode 100644
index 0000000..02bd919
--- /dev/null
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -0,0 +1,648 @@
+/*-------------------------------------------------------------------------
+ *
+ * xlogfuncs.c
+ *
+ * PostgreSQL write-ahead log manager user interface functions
+ *
+ * This file contains WAL control and information functions.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/xlogfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <unistd.h>
+
+#include "access/htup_details.h"
+#include "access/xlog_internal.h"
+#include "access/xlogrecovery.h"
+#include "access/xlogutils.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "replication/walreceiver.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/smgr.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/numeric.h"
+#include "utils/pg_lsn.h"
+#include "utils/timestamp.h"
+#include "utils/tuplestore.h"
+
+/*
+ * Store label file and tablespace map during backups.
+ */
+static StringInfo label_file;
+static StringInfo tblspc_map_file;
+
+/*
+ * pg_backup_start: set up for taking an on-line backup dump
+ *
+ * Essentially what this does is to create a backup label file in $PGDATA,
+ * where it will be archived as part of the backup dump. The label file
+ * contains the user-supplied label string (typically this would be used
+ * to tell where the backup dump will be stored) and the starting time and
+ * starting WAL location for the dump.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_backup_start(PG_FUNCTION_ARGS)
+{
+ text *backupid = PG_GETARG_TEXT_PP(0);
+ bool fast = PG_GETARG_BOOL(1);
+ char *backupidstr;
+ XLogRecPtr startpoint;
+ SessionBackupState status = get_backup_status();
+ MemoryContext oldcontext;
+
+ backupidstr = text_to_cstring(backupid);
+
+ if (status == SESSION_BACKUP_RUNNING)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is already in progress in this session")));
+
+ /*
+ * Label file and tablespace map file need to be long-lived, since they
+ * are read in pg_backup_stop.
+ */
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+ label_file = makeStringInfo();
+ tblspc_map_file = makeStringInfo();
+ MemoryContextSwitchTo(oldcontext);
+
+ register_persistent_abort_backup_handler();
+
+ startpoint = do_pg_backup_start(backupidstr, fast, NULL, label_file,
+ NULL, tblspc_map_file);
+
+ PG_RETURN_LSN(startpoint);
+}
+
+
+/*
+ * pg_backup_stop: finish taking an on-line backup.
+ *
+ * The first parameter (variable 'waitforarchive'), which is optional,
+ * allows the user to choose if they want to wait for the WAL to be archived
+ * or if we should just return as soon as the WAL record is written.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_backup_stop(PG_FUNCTION_ARGS)
+{
+#define PG_STOP_BACKUP_V2_COLS 3
+ TupleDesc tupdesc;
+ Datum values[PG_STOP_BACKUP_V2_COLS];
+ bool nulls[PG_STOP_BACKUP_V2_COLS];
+
+ bool waitforarchive = PG_GETARG_BOOL(0);
+ XLogRecPtr stoppoint;
+ SessionBackupState status = get_backup_status();
+
+ /* Initialize attributes information in the tuple descriptor */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ if (status != SESSION_BACKUP_RUNNING)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("backup is not in progress"),
+ errhint("Did you call pg_backup_start()?")));
+
+ /*
+ * Stop the backup. Return a copy of the backup label and tablespace map
+ * so they can be written to disk by the caller.
+ */
+ stoppoint = do_pg_backup_stop(label_file->data, waitforarchive, NULL);
+
+ values[0] = LSNGetDatum(stoppoint);
+ values[1] = CStringGetTextDatum(label_file->data);
+ values[2] = CStringGetTextDatum(tblspc_map_file->data);
+
+ /* Free structures allocated in TopMemoryContext */
+ pfree(label_file->data);
+ pfree(label_file);
+ label_file = NULL;
+ pfree(tblspc_map_file->data);
+ pfree(tblspc_map_file);
+ tblspc_map_file = NULL;
+
+ /* Returns the record as Datum */
+ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
+
+/*
+ * pg_switch_wal: switch to next xlog file
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_switch_wal(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr switchpoint;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ switchpoint = RequestXLogSwitch(false);
+
+ /*
+ * As a convenience, return the WAL location of the switch record
+ */
+ PG_RETURN_LSN(switchpoint);
+}
+
+/*
+ * pg_create_restore_point: a named point for restore
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_create_restore_point(PG_FUNCTION_ARGS)
+{
+ text *restore_name = PG_GETARG_TEXT_PP(0);
+ char *restore_name_str;
+ XLogRecPtr restorepoint;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ if (!XLogIsNeeded())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("WAL level not sufficient for creating a restore point"),
+ errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
+
+ restore_name_str = text_to_cstring(restore_name);
+
+ if (strlen(restore_name_str) >= MAXFNAMELEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("value too long for restore point (maximum %d characters)", MAXFNAMELEN - 1)));
+
+ restorepoint = XLogRestorePoint(restore_name_str);
+
+ /*
+ * As a convenience, return the WAL location of the restore point record
+ */
+ PG_RETURN_LSN(restorepoint);
+}
+
+/*
+ * Report the current WAL write location (same format as pg_backup_start etc)
+ *
+ * This is useful for determining how much of WAL is visible to an external
+ * archiving process. Note that the data before this point is written out
+ * to the kernel, but is not necessarily synced to disk.
+ */
+Datum
+pg_current_wal_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr current_recptr;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ current_recptr = GetXLogWriteRecPtr();
+
+ PG_RETURN_LSN(current_recptr);
+}
+
+/*
+ * Report the current WAL insert location (same format as pg_backup_start etc)
+ *
+ * This function is mostly for debugging purposes.
+ */
+Datum
+pg_current_wal_insert_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr current_recptr;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ current_recptr = GetXLogInsertRecPtr();
+
+ PG_RETURN_LSN(current_recptr);
+}
+
+/*
+ * Report the current WAL flush location (same format as pg_backup_start etc)
+ *
+ * This function is mostly for debugging purposes.
+ */
+Datum
+pg_current_wal_flush_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr current_recptr;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ current_recptr = GetFlushRecPtr(NULL);
+
+ PG_RETURN_LSN(current_recptr);
+}
+
+/*
+ * Report the last WAL receive location (same format as pg_backup_start etc)
+ *
+ * This is useful for determining how much of WAL is guaranteed to be received
+ * and synced to disk by walreceiver.
+ */
+Datum
+pg_last_wal_receive_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr recptr;
+
+ recptr = GetWalRcvFlushRecPtr(NULL, NULL);
+
+ if (recptr == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_LSN(recptr);
+}
+
+/*
+ * Report the last WAL replay location (same format as pg_backup_start etc)
+ *
+ * This is useful for determining how much of WAL is visible to read-only
+ * connections during recovery.
+ */
+Datum
+pg_last_wal_replay_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr recptr;
+
+ recptr = GetXLogReplayRecPtr(NULL);
+
+ if (recptr == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_LSN(recptr);
+}
+
+/*
+ * Compute an xlog file name and decimal byte offset given a WAL location,
+ * such as is returned by pg_backup_stop() or pg_switch_wal().
+ *
+ * Note that a location exactly at a segment boundary is taken to be in
+ * the previous segment. This is usually the right thing, since the
+ * expected usage is to determine which xlog file(s) are ready to archive.
+ */
+Datum
+pg_walfile_name_offset(PG_FUNCTION_ARGS)
+{
+ XLogSegNo xlogsegno;
+ uint32 xrecoff;
+ XLogRecPtr locationpoint = PG_GETARG_LSN(0);
+ char xlogfilename[MAXFNAMELEN];
+ Datum values[2];
+ bool isnull[2];
+ TupleDesc resultTupleDesc;
+ HeapTuple resultHeapTuple;
+ Datum result;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("%s cannot be executed during recovery.",
+ "pg_walfile_name_offset()")));
+
+ /*
+ * Construct a tuple descriptor for the result row. This must match this
+ * function's pg_proc entry!
+ */
+ resultTupleDesc = CreateTemplateTupleDesc(2);
+ TupleDescInitEntry(resultTupleDesc, (AttrNumber) 1, "file_name",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(resultTupleDesc, (AttrNumber) 2, "file_offset",
+ INT4OID, -1, 0);
+
+ resultTupleDesc = BlessTupleDesc(resultTupleDesc);
+
+ /*
+ * xlogfilename
+ */
+ XLByteToPrevSeg(locationpoint, xlogsegno, wal_segment_size);
+ XLogFileName(xlogfilename, GetWALInsertionTimeLine(), xlogsegno,
+ wal_segment_size);
+
+ values[0] = CStringGetTextDatum(xlogfilename);
+ isnull[0] = false;
+
+ /*
+ * offset
+ */
+ xrecoff = XLogSegmentOffset(locationpoint, wal_segment_size);
+
+ values[1] = UInt32GetDatum(xrecoff);
+ isnull[1] = false;
+
+ /*
+ * Tuple jam: Having first prepared your Datums, then squash together
+ */
+ resultHeapTuple = heap_form_tuple(resultTupleDesc, values, isnull);
+
+ result = HeapTupleGetDatum(resultHeapTuple);
+
+ PG_RETURN_DATUM(result);
+}
+
+/*
+ * Compute an xlog file name given a WAL location,
+ * such as is returned by pg_backup_stop() or pg_switch_wal().
+ */
+Datum
+pg_walfile_name(PG_FUNCTION_ARGS)
+{
+ XLogSegNo xlogsegno;
+ XLogRecPtr locationpoint = PG_GETARG_LSN(0);
+ char xlogfilename[MAXFNAMELEN];
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("%s cannot be executed during recovery.",
+ "pg_walfile_name()")));
+
+ XLByteToPrevSeg(locationpoint, xlogsegno, wal_segment_size);
+ XLogFileName(xlogfilename, GetWALInsertionTimeLine(), xlogsegno,
+ wal_segment_size);
+
+ PG_RETURN_TEXT_P(cstring_to_text(xlogfilename));
+}
+
+/*
+ * pg_wal_replay_pause - Request to pause recovery
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_wal_replay_pause(PG_FUNCTION_ARGS)
+{
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ if (PromoteIsTriggered())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("standby promotion is ongoing"),
+ errhint("%s cannot be executed after promotion is triggered.",
+ "pg_wal_replay_pause()")));
+
+ SetRecoveryPause(true);
+
+ /* wake up the recovery process so that it can process the pause request */
+ WakeupRecovery();
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_wal_replay_resume - resume recovery now
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_wal_replay_resume(PG_FUNCTION_ARGS)
+{
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ if (PromoteIsTriggered())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("standby promotion is ongoing"),
+ errhint("%s cannot be executed after promotion is triggered.",
+ "pg_wal_replay_resume()")));
+
+ SetRecoveryPause(false);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_is_wal_replay_paused
+ */
+Datum
+pg_is_wal_replay_paused(PG_FUNCTION_ARGS)
+{
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ PG_RETURN_BOOL(GetRecoveryPauseState() != RECOVERY_NOT_PAUSED);
+}
+
+/*
+ * pg_get_wal_replay_pause_state - Returns the recovery pause state.
+ *
+ * Returned values:
+ *
+ * 'not paused' - if pause is not requested
+ * 'pause requested' - if pause is requested but recovery is not yet paused
+ * 'paused' - if recovery is paused
+ */
+Datum
+pg_get_wal_replay_pause_state(PG_FUNCTION_ARGS)
+{
+ char *statestr = NULL;
+
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ /* get the recovery pause state */
+ switch (GetRecoveryPauseState())
+ {
+ case RECOVERY_NOT_PAUSED:
+ statestr = "not paused";
+ break;
+ case RECOVERY_PAUSE_REQUESTED:
+ statestr = "pause requested";
+ break;
+ case RECOVERY_PAUSED:
+ statestr = "paused";
+ break;
+ }
+
+ Assert(statestr != NULL);
+ PG_RETURN_TEXT_P(cstring_to_text(statestr));
+}
+
+/*
+ * Returns timestamp of latest processed commit/abort record.
+ *
+ * When the server has been started normally without recovery the function
+ * returns NULL.
+ */
+Datum
+pg_last_xact_replay_timestamp(PG_FUNCTION_ARGS)
+{
+ TimestampTz xtime;
+
+ xtime = GetLatestXTime();
+ if (xtime == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TIMESTAMPTZ(xtime);
+}
+
+/*
+ * Returns bool with current recovery mode, a global state.
+ */
+Datum
+pg_is_in_recovery(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(RecoveryInProgress());
+}
+
+/*
+ * Compute the difference in bytes between two WAL locations.
+ */
+Datum
+pg_wal_lsn_diff(PG_FUNCTION_ARGS)
+{
+ Datum result;
+
+ result = DirectFunctionCall2(pg_lsn_mi,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1));
+
+ PG_RETURN_NUMERIC(result);
+}
+
+/*
+ * Promotes a standby server.
+ *
+ * A result of "true" means that promotion has been completed if "wait" is
+ * "true", or initiated if "wait" is false.
+ */
+Datum
+pg_promote(PG_FUNCTION_ARGS)
+{
+ bool wait = PG_GETARG_BOOL(0);
+ int wait_seconds = PG_GETARG_INT32(1);
+ FILE *promote_file;
+ int i;
+
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ if (wait_seconds <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("\"wait_seconds\" must not be negative or zero")));
+
+ /* create the promote signal file */
+ promote_file = AllocateFile(PROMOTE_SIGNAL_FILE, "w");
+ if (!promote_file)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ PROMOTE_SIGNAL_FILE)));
+
+ if (FreeFile(promote_file))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ PROMOTE_SIGNAL_FILE)));
+
+ /* signal the postmaster */
+ if (kill(PostmasterPid, SIGUSR1) != 0)
+ {
+ ereport(WARNING,
+ (errmsg("failed to send signal to postmaster: %m")));
+ (void) unlink(PROMOTE_SIGNAL_FILE);
+ PG_RETURN_BOOL(false);
+ }
+
+ /* return immediately if waiting was not requested */
+ if (!wait)
+ PG_RETURN_BOOL(true);
+
+ /* wait for the amount of time wanted until promotion */
+#define WAITS_PER_SECOND 10
+ for (i = 0; i < WAITS_PER_SECOND * wait_seconds; i++)
+ {
+ int rc;
+
+ ResetLatch(MyLatch);
+
+ if (!RecoveryInProgress())
+ PG_RETURN_BOOL(true);
+
+ CHECK_FOR_INTERRUPTS();
+
+ rc = WaitLatch(MyLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ 1000L / WAITS_PER_SECOND,
+ WAIT_EVENT_PROMOTE);
+
+ /*
+ * Emergency bailout if postmaster has died. This is to avoid the
+ * necessity for manual cleanup of all postmaster children.
+ */
+ if (rc & WL_POSTMASTER_DEATH)
+ PG_RETURN_BOOL(false);
+ }
+
+ ereport(WARNING,
+ (errmsg_plural("server did not promote within %d second",
+ "server did not promote within %d seconds",
+ wait_seconds,
+ wait_seconds)));
+ PG_RETURN_BOOL(false);
+}