summaryrefslogtreecommitdiffstats
path: root/src/backend/access/transam/xlogfuncs.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/xlogfuncs.c')
-rw-r--r--src/backend/access/transam/xlogfuncs.c830
1 files changed, 830 insertions, 0 deletions
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
new file mode 100644
index 0000000..b98deb7
--- /dev/null
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -0,0 +1,830 @@
+/*-------------------------------------------------------------------------
+ *
+ * xlogfuncs.c
+ *
+ * PostgreSQL write-ahead log manager user interface functions
+ *
+ * This file contains WAL control and information functions.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/xlogfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <unistd.h>
+
+#include "access/htup_details.h"
+#include "access/xlog.h"
+#include "access/xlog_internal.h"
+#include "access/xlogutils.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "replication/walreceiver.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/smgr.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/numeric.h"
+#include "utils/pg_lsn.h"
+#include "utils/timestamp.h"
+#include "utils/tuplestore.h"
+
+/*
+ * Store label file and tablespace map during non-exclusive backups.
+ */
+static StringInfo label_file;
+static StringInfo tblspc_map_file;
+
+/*
+ * pg_start_backup: set up for taking an on-line backup dump
+ *
+ * Essentially what this does is to create a backup label file in $PGDATA,
+ * where it will be archived as part of the backup dump. The label file
+ * contains the user-supplied label string (typically this would be used
+ * to tell where the backup dump will be stored) and the starting time and
+ * starting WAL location for the dump.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_start_backup(PG_FUNCTION_ARGS)
+{
+ text *backupid = PG_GETARG_TEXT_PP(0);
+ bool fast = PG_GETARG_BOOL(1);
+ bool exclusive = PG_GETARG_BOOL(2);
+ char *backupidstr;
+ XLogRecPtr startpoint;
+ SessionBackupState status = get_backup_status();
+
+ backupidstr = text_to_cstring(backupid);
+
+ if (status == SESSION_BACKUP_NON_EXCLUSIVE)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is already in progress in this session")));
+
+ if (exclusive)
+ {
+ startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
+ NULL, NULL);
+ }
+ else
+ {
+ MemoryContext oldcontext;
+
+ /*
+ * Label file and tablespace map file need to be long-lived, since
+ * they are read in pg_stop_backup.
+ */
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+ label_file = makeStringInfo();
+ tblspc_map_file = makeStringInfo();
+ MemoryContextSwitchTo(oldcontext);
+
+ register_persistent_abort_backup_handler();
+
+ startpoint = do_pg_start_backup(backupidstr, fast, NULL, label_file,
+ NULL, tblspc_map_file);
+ }
+
+ PG_RETURN_LSN(startpoint);
+}
+
+/*
+ * pg_stop_backup: finish taking an on-line backup dump
+ *
+ * We write an end-of-backup WAL record, and remove the backup label file
+ * created by pg_start_backup, creating a backup history file in pg_wal
+ * instead (whence it will immediately be archived). The backup history file
+ * contains the same info found in the label file, plus the backup-end time
+ * and WAL location. Before 9.0, the backup-end time was read from the backup
+ * history file at the beginning of archive recovery, but we now use the WAL
+ * record for that and the file is for informational and debug purposes only.
+ *
+ * Note: different from CancelBackup which just cancels online backup mode.
+ *
+ * Note: this version is only called to stop an exclusive backup. The function
+ * pg_stop_backup_v2 (overloaded as pg_stop_backup in SQL) is called to
+ * stop non-exclusive backups.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_stop_backup(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr stoppoint;
+ SessionBackupState status = get_backup_status();
+
+ if (status == SESSION_BACKUP_NON_EXCLUSIVE)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("non-exclusive backup in progress"),
+ errhint("Did you mean to use pg_stop_backup('f')?")));
+
+ /*
+ * Exclusive backups were typically started in a different connection, so
+ * don't try to verify that status of backup is set to
+ * SESSION_BACKUP_EXCLUSIVE in this function. Actual verification that an
+ * exclusive backup is in fact running is handled inside
+ * do_pg_stop_backup.
+ */
+ stoppoint = do_pg_stop_backup(NULL, true, NULL);
+
+ PG_RETURN_LSN(stoppoint);
+}
+
+
+/*
+ * pg_stop_backup_v2: finish taking exclusive or nonexclusive on-line backup.
+ *
+ * Works the same as pg_stop_backup, except for non-exclusive backups it returns
+ * the backup label and tablespace map files as text fields in as part of the
+ * resultset.
+ *
+ * The first parameter (variable 'exclusive') allows the user to tell us if
+ * this is an exclusive or a non-exclusive backup.
+ *
+ * The second parameter (variable 'waitforarchive'), which is optional,
+ * allows the user to choose if they want to wait for the WAL to be archived
+ * or if we should just return as soon as the WAL record is written.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_stop_backup_v2(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ TupleDesc tupdesc;
+ Tuplestorestate *tupstore;
+ MemoryContext per_query_ctx;
+ MemoryContext oldcontext;
+ Datum values[3];
+ bool nulls[3];
+
+ bool exclusive = PG_GETARG_BOOL(0);
+ bool waitforarchive = PG_GETARG_BOOL(1);
+ XLogRecPtr stoppoint;
+ SessionBackupState status = get_backup_status();
+
+ /* check to see if caller supports us returning a tuplestore */
+ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("set-valued function called in context that cannot accept a set")));
+ if (!(rsinfo->allowedModes & SFRM_Materialize))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("materialize mode required, but it is not allowed in this context")));
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+ oldcontext = MemoryContextSwitchTo(per_query_ctx);
+
+ tupstore = tuplestore_begin_heap(true, false, work_mem);
+ rsinfo->returnMode = SFRM_Materialize;
+ rsinfo->setResult = tupstore;
+ rsinfo->setDesc = tupdesc;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ if (exclusive)
+ {
+ if (status == SESSION_BACKUP_NON_EXCLUSIVE)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("non-exclusive backup in progress"),
+ errhint("Did you mean to use pg_stop_backup('f')?")));
+
+ /*
+ * Stop the exclusive backup, and since we're in an exclusive backup
+ * return NULL for both backup_label and tablespace_map.
+ */
+ stoppoint = do_pg_stop_backup(NULL, waitforarchive, NULL);
+
+ nulls[1] = true;
+ nulls[2] = true;
+ }
+ else
+ {
+ if (status != SESSION_BACKUP_NON_EXCLUSIVE)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("non-exclusive backup is not in progress"),
+ errhint("Did you mean to use pg_stop_backup('t')?")));
+
+ /*
+ * Stop the non-exclusive backup. Return a copy of the backup label
+ * and tablespace map so they can be written to disk by the caller.
+ */
+ stoppoint = do_pg_stop_backup(label_file->data, waitforarchive, NULL);
+
+ values[1] = CStringGetTextDatum(label_file->data);
+ values[2] = CStringGetTextDatum(tblspc_map_file->data);
+
+ /* Free structures allocated in TopMemoryContext */
+ pfree(label_file->data);
+ pfree(label_file);
+ label_file = NULL;
+ pfree(tblspc_map_file->data);
+ pfree(tblspc_map_file);
+ tblspc_map_file = NULL;
+ }
+
+ /* Stoppoint is included on both exclusive and nonexclusive backups */
+ values[0] = LSNGetDatum(stoppoint);
+
+ tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+ tuplestore_donestoring(tupstore);
+
+ return (Datum) 0;
+}
+
+/*
+ * pg_switch_wal: switch to next xlog file
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_switch_wal(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr switchpoint;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ switchpoint = RequestXLogSwitch(false);
+
+ /*
+ * As a convenience, return the WAL location of the switch record
+ */
+ PG_RETURN_LSN(switchpoint);
+}
+
+/*
+ * pg_create_restore_point: a named point for restore
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_create_restore_point(PG_FUNCTION_ARGS)
+{
+ text *restore_name = PG_GETARG_TEXT_PP(0);
+ char *restore_name_str;
+ XLogRecPtr restorepoint;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ if (!XLogIsNeeded())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("WAL level not sufficient for creating a restore point"),
+ errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
+
+ restore_name_str = text_to_cstring(restore_name);
+
+ if (strlen(restore_name_str) >= MAXFNAMELEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("value too long for restore point (maximum %d characters)", MAXFNAMELEN - 1)));
+
+ restorepoint = XLogRestorePoint(restore_name_str);
+
+ /*
+ * As a convenience, return the WAL location of the restore point record
+ */
+ PG_RETURN_LSN(restorepoint);
+}
+
+/*
+ * Report the current WAL write location (same format as pg_start_backup etc)
+ *
+ * This is useful for determining how much of WAL is visible to an external
+ * archiving process. Note that the data before this point is written out
+ * to the kernel, but is not necessarily synced to disk.
+ */
+Datum
+pg_current_wal_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr current_recptr;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ current_recptr = GetXLogWriteRecPtr();
+
+ PG_RETURN_LSN(current_recptr);
+}
+
+/*
+ * Report the current WAL insert location (same format as pg_start_backup etc)
+ *
+ * This function is mostly for debugging purposes.
+ */
+Datum
+pg_current_wal_insert_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr current_recptr;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ current_recptr = GetXLogInsertRecPtr();
+
+ PG_RETURN_LSN(current_recptr);
+}
+
+/*
+ * Report the current WAL flush location (same format as pg_start_backup etc)
+ *
+ * This function is mostly for debugging purposes.
+ */
+Datum
+pg_current_wal_flush_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr current_recptr;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ current_recptr = GetFlushRecPtr();
+
+ PG_RETURN_LSN(current_recptr);
+}
+
+/*
+ * Report the last WAL receive location (same format as pg_start_backup etc)
+ *
+ * This is useful for determining how much of WAL is guaranteed to be received
+ * and synced to disk by walreceiver.
+ */
+Datum
+pg_last_wal_receive_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr recptr;
+
+ recptr = GetWalRcvFlushRecPtr(NULL, NULL);
+
+ if (recptr == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_LSN(recptr);
+}
+
+/*
+ * Report the last WAL replay location (same format as pg_start_backup etc)
+ *
+ * This is useful for determining how much of WAL is visible to read-only
+ * connections during recovery.
+ */
+Datum
+pg_last_wal_replay_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr recptr;
+
+ recptr = GetXLogReplayRecPtr(NULL);
+
+ if (recptr == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_LSN(recptr);
+}
+
+/*
+ * Compute an xlog file name and decimal byte offset given a WAL location,
+ * such as is returned by pg_stop_backup() or pg_switch_wal().
+ *
+ * Note that a location exactly at a segment boundary is taken to be in
+ * the previous segment. This is usually the right thing, since the
+ * expected usage is to determine which xlog file(s) are ready to archive.
+ */
+Datum
+pg_walfile_name_offset(PG_FUNCTION_ARGS)
+{
+ XLogSegNo xlogsegno;
+ uint32 xrecoff;
+ XLogRecPtr locationpoint = PG_GETARG_LSN(0);
+ char xlogfilename[MAXFNAMELEN];
+ Datum values[2];
+ bool isnull[2];
+ TupleDesc resultTupleDesc;
+ HeapTuple resultHeapTuple;
+ Datum result;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("%s cannot be executed during recovery.",
+ "pg_walfile_name_offset()")));
+
+ /*
+ * Construct a tuple descriptor for the result row. This must match this
+ * function's pg_proc entry!
+ */
+ resultTupleDesc = CreateTemplateTupleDesc(2);
+ TupleDescInitEntry(resultTupleDesc, (AttrNumber) 1, "file_name",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(resultTupleDesc, (AttrNumber) 2, "file_offset",
+ INT4OID, -1, 0);
+
+ resultTupleDesc = BlessTupleDesc(resultTupleDesc);
+
+ /*
+ * xlogfilename
+ */
+ XLByteToPrevSeg(locationpoint, xlogsegno, wal_segment_size);
+ XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno, wal_segment_size);
+
+ values[0] = CStringGetTextDatum(xlogfilename);
+ isnull[0] = false;
+
+ /*
+ * offset
+ */
+ xrecoff = XLogSegmentOffset(locationpoint, wal_segment_size);
+
+ values[1] = UInt32GetDatum(xrecoff);
+ isnull[1] = false;
+
+ /*
+ * Tuple jam: Having first prepared your Datums, then squash together
+ */
+ resultHeapTuple = heap_form_tuple(resultTupleDesc, values, isnull);
+
+ result = HeapTupleGetDatum(resultHeapTuple);
+
+ PG_RETURN_DATUM(result);
+}
+
+/*
+ * Compute an xlog file name given a WAL location,
+ * such as is returned by pg_stop_backup() or pg_switch_wal().
+ */
+Datum
+pg_walfile_name(PG_FUNCTION_ARGS)
+{
+ XLogSegNo xlogsegno;
+ XLogRecPtr locationpoint = PG_GETARG_LSN(0);
+ char xlogfilename[MAXFNAMELEN];
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("%s cannot be executed during recovery.",
+ "pg_walfile_name()")));
+
+ XLByteToPrevSeg(locationpoint, xlogsegno, wal_segment_size);
+ XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno, wal_segment_size);
+
+ PG_RETURN_TEXT_P(cstring_to_text(xlogfilename));
+}
+
+/*
+ * pg_wal_replay_pause - Request to pause recovery
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_wal_replay_pause(PG_FUNCTION_ARGS)
+{
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ if (PromoteIsTriggered())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("standby promotion is ongoing"),
+ errhint("%s cannot be executed after promotion is triggered.",
+ "pg_wal_replay_pause()")));
+
+ SetRecoveryPause(true);
+
+ /* wake up the recovery process so that it can process the pause request */
+ WakeupRecovery();
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_wal_replay_resume - resume recovery now
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_wal_replay_resume(PG_FUNCTION_ARGS)
+{
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ if (PromoteIsTriggered())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("standby promotion is ongoing"),
+ errhint("%s cannot be executed after promotion is triggered.",
+ "pg_wal_replay_resume()")));
+
+ SetRecoveryPause(false);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_is_wal_replay_paused
+ */
+Datum
+pg_is_wal_replay_paused(PG_FUNCTION_ARGS)
+{
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ PG_RETURN_BOOL(GetRecoveryPauseState() != RECOVERY_NOT_PAUSED);
+}
+
+/*
+ * pg_get_wal_replay_pause_state - Returns the recovery pause state.
+ *
+ * Returned values:
+ *
+ * 'not paused' - if pause is not requested
+ * 'pause requested' - if pause is requested but recovery is not yet paused
+ * 'paused' - if recovery is paused
+ */
+Datum
+pg_get_wal_replay_pause_state(PG_FUNCTION_ARGS)
+{
+ char *statestr = NULL;
+
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ /* get the recovery pause state */
+ switch (GetRecoveryPauseState())
+ {
+ case RECOVERY_NOT_PAUSED:
+ statestr = "not paused";
+ break;
+ case RECOVERY_PAUSE_REQUESTED:
+ statestr = "pause requested";
+ break;
+ case RECOVERY_PAUSED:
+ statestr = "paused";
+ break;
+ }
+
+ Assert(statestr != NULL);
+ PG_RETURN_TEXT_P(cstring_to_text(statestr));
+}
+
+/*
+ * Returns timestamp of latest processed commit/abort record.
+ *
+ * When the server has been started normally without recovery the function
+ * returns NULL.
+ */
+Datum
+pg_last_xact_replay_timestamp(PG_FUNCTION_ARGS)
+{
+ TimestampTz xtime;
+
+ xtime = GetLatestXTime();
+ if (xtime == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TIMESTAMPTZ(xtime);
+}
+
+/*
+ * Returns bool with current recovery mode, a global state.
+ */
+Datum
+pg_is_in_recovery(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(RecoveryInProgress());
+}
+
+/*
+ * Compute the difference in bytes between two WAL locations.
+ */
+Datum
+pg_wal_lsn_diff(PG_FUNCTION_ARGS)
+{
+ Datum result;
+
+ result = DirectFunctionCall2(pg_lsn_mi,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1));
+
+ PG_RETURN_NUMERIC(result);
+}
+
+/*
+ * Returns bool with current on-line backup mode, a global state.
+ */
+Datum
+pg_is_in_backup(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(BackupInProgress());
+}
+
+/*
+ * Returns start time of an online exclusive backup.
+ *
+ * When there's no exclusive backup in progress, the function
+ * returns NULL.
+ */
+Datum
+pg_backup_start_time(PG_FUNCTION_ARGS)
+{
+ Datum xtime;
+ FILE *lfp;
+ char fline[MAXPGPATH];
+ char backup_start_time[30];
+
+ /*
+ * See if label file is present
+ */
+ lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
+ if (lfp == NULL)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ BACKUP_LABEL_FILE)));
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * Parse the file to find the START TIME line.
+ */
+ backup_start_time[0] = '\0';
+ while (fgets(fline, sizeof(fline), lfp) != NULL)
+ {
+ if (sscanf(fline, "START TIME: %25[^\n]\n", backup_start_time) == 1)
+ break;
+ }
+
+ /* Check for a read error. */
+ if (ferror(lfp))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m", BACKUP_LABEL_FILE)));
+
+ /* Close the backup label file. */
+ if (FreeFile(lfp))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m", BACKUP_LABEL_FILE)));
+
+ if (strlen(backup_start_time) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
+
+ /*
+ * Convert the time string read from file to TimestampTz form.
+ */
+ xtime = DirectFunctionCall3(timestamptz_in,
+ CStringGetDatum(backup_start_time),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1));
+
+ PG_RETURN_DATUM(xtime);
+}
+
+/*
+ * Promotes a standby server.
+ *
+ * A result of "true" means that promotion has been completed if "wait" is
+ * "true", or initiated if "wait" is false.
+ */
+Datum
+pg_promote(PG_FUNCTION_ARGS)
+{
+ bool wait = PG_GETARG_BOOL(0);
+ int wait_seconds = PG_GETARG_INT32(1);
+ FILE *promote_file;
+ int i;
+
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ if (wait_seconds <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("\"wait_seconds\" must not be negative or zero")));
+
+ /* create the promote signal file */
+ promote_file = AllocateFile(PROMOTE_SIGNAL_FILE, "w");
+ if (!promote_file)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ PROMOTE_SIGNAL_FILE)));
+
+ if (FreeFile(promote_file))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ PROMOTE_SIGNAL_FILE)));
+
+ /* signal the postmaster */
+ if (kill(PostmasterPid, SIGUSR1) != 0)
+ {
+ ereport(WARNING,
+ (errmsg("failed to send signal to postmaster: %m")));
+ (void) unlink(PROMOTE_SIGNAL_FILE);
+ PG_RETURN_BOOL(false);
+ }
+
+ /* return immediately if waiting was not requested */
+ if (!wait)
+ PG_RETURN_BOOL(true);
+
+ /* wait for the amount of time wanted until promotion */
+#define WAITS_PER_SECOND 10
+ for (i = 0; i < WAITS_PER_SECOND * wait_seconds; i++)
+ {
+ int rc;
+
+ ResetLatch(MyLatch);
+
+ if (!RecoveryInProgress())
+ PG_RETURN_BOOL(true);
+
+ CHECK_FOR_INTERRUPTS();
+
+ rc = WaitLatch(MyLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ 1000L / WAITS_PER_SECOND,
+ WAIT_EVENT_PROMOTE);
+
+ /*
+ * Emergency bailout if postmaster has died. This is to avoid the
+ * necessity for manual cleanup of all postmaster children.
+ */
+ if (rc & WL_POSTMASTER_DEATH)
+ PG_RETURN_BOOL(false);
+ }
+
+ ereport(WARNING,
+ (errmsg_plural("server did not promote within %d second",
+ "server did not promote within %d seconds",
+ wait_seconds,
+ wait_seconds)));
+ PG_RETURN_BOOL(false);
+}