summaryrefslogtreecommitdiffstats
path: root/src/backend/access/transam/xlogfuncs.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 13:44:03 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 13:44:03 +0000
commit293913568e6a7a86fd1479e1cff8e2ecb58d6568 (patch)
treefc3b469a3ec5ab71b36ea97cc7aaddb838423a0c /src/backend/access/transam/xlogfuncs.c
parentInitial commit. (diff)
downloadpostgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.tar.xz
postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.zip
Adding upstream version 16.2.upstream/16.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/access/transam/xlogfuncs.c')
-rw-r--r--src/backend/access/transam/xlogfuncs.c756
1 files changed, 756 insertions, 0 deletions
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
new file mode 100644
index 0000000..5044ff0
--- /dev/null
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -0,0 +1,756 @@
+/*-------------------------------------------------------------------------
+ *
+ * xlogfuncs.c
+ *
+ * PostgreSQL write-ahead log manager user interface functions
+ *
+ * This file contains WAL control and information functions.
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/xlogfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <unistd.h>
+
+#include "access/htup_details.h"
+#include "access/xlog_internal.h"
+#include "access/xlogbackup.h"
+#include "access/xlogrecovery.h"
+#include "access/xlogutils.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "replication/walreceiver.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/smgr.h"
+#include "storage/standby.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/numeric.h"
+#include "utils/pg_lsn.h"
+#include "utils/timestamp.h"
+#include "utils/tuplestore.h"
+
+/*
+ * Backup-related variables.
+ */
+static BackupState *backup_state = NULL;
+static StringInfo tablespace_map = NULL;
+
+/* Session-level context for the SQL-callable backup functions */
+static MemoryContext backupcontext = NULL;
+
+/*
+ * pg_backup_start: set up for taking an on-line backup dump
+ *
+ * Essentially what this does is to create the contents required for the
+ * backup_label file and the tablespace map.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_backup_start(PG_FUNCTION_ARGS)
+{
+ text *backupid = PG_GETARG_TEXT_PP(0);
+ bool fast = PG_GETARG_BOOL(1);
+ char *backupidstr;
+ SessionBackupState status = get_backup_status();
+ MemoryContext oldcontext;
+
+ backupidstr = text_to_cstring(backupid);
+
+ if (status == SESSION_BACKUP_RUNNING)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is already in progress in this session")));
+
+ /*
+ * backup_state and tablespace_map need to be long-lived as they are used
+ * in pg_backup_stop(). These are allocated in a dedicated memory context
+ * child of TopMemoryContext, deleted at the end of pg_backup_stop(). If
+ * an error happens before ending the backup, memory would be leaked in
+ * this context until pg_backup_start() is called again.
+ */
+ if (backupcontext == NULL)
+ {
+ backupcontext = AllocSetContextCreate(TopMemoryContext,
+ "on-line backup context",
+ ALLOCSET_START_SMALL_SIZES);
+ }
+ else
+ {
+ backup_state = NULL;
+ tablespace_map = NULL;
+ MemoryContextReset(backupcontext);
+ }
+
+ oldcontext = MemoryContextSwitchTo(backupcontext);
+ backup_state = (BackupState *) palloc0(sizeof(BackupState));
+ tablespace_map = makeStringInfo();
+ MemoryContextSwitchTo(oldcontext);
+
+ register_persistent_abort_backup_handler();
+ do_pg_backup_start(backupidstr, fast, NULL, backup_state, tablespace_map);
+
+ PG_RETURN_LSN(backup_state->startpoint);
+}
+
+
+/*
+ * pg_backup_stop: finish taking an on-line backup.
+ *
+ * The first parameter (variable 'waitforarchive'), which is optional,
+ * allows the user to choose if they want to wait for the WAL to be archived
+ * or if we should just return as soon as the WAL record is written.
+ *
+ * This function stops an in-progress backup, creates backup_label contents and
+ * it returns the backup stop LSN, backup_label and tablespace_map contents.
+ *
+ * The backup_label contains the user-supplied label string (typically this
+ * would be used to tell where the backup dump will be stored), the starting
+ * time, starting WAL location for the dump and so on. It is the caller's
+ * responsibility to write the backup_label and tablespace_map files in the
+ * data folder that will be restored from this backup.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_backup_stop(PG_FUNCTION_ARGS)
+{
+#define PG_BACKUP_STOP_V2_COLS 3
+ TupleDesc tupdesc;
+ Datum values[PG_BACKUP_STOP_V2_COLS] = {0};
+ bool nulls[PG_BACKUP_STOP_V2_COLS] = {0};
+ bool waitforarchive = PG_GETARG_BOOL(0);
+ char *backup_label;
+ SessionBackupState status = get_backup_status();
+
+ /* Initialize attributes information in the tuple descriptor */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ if (status != SESSION_BACKUP_RUNNING)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("backup is not in progress"),
+ errhint("Did you call pg_backup_start()?")));
+
+ Assert(backup_state != NULL);
+ Assert(tablespace_map != NULL);
+
+ /* Stop the backup */
+ do_pg_backup_stop(backup_state, waitforarchive);
+
+ /* Build the contents of backup_label */
+ backup_label = build_backup_content(backup_state, false);
+
+ values[0] = LSNGetDatum(backup_state->stoppoint);
+ values[1] = CStringGetTextDatum(backup_label);
+ values[2] = CStringGetTextDatum(tablespace_map->data);
+
+ /* Deallocate backup-related variables */
+ pfree(backup_label);
+
+ /* Clean up the session-level state and its memory context */
+ backup_state = NULL;
+ tablespace_map = NULL;
+ MemoryContextDelete(backupcontext);
+ backupcontext = NULL;
+
+ /* Returns the record as Datum */
+ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
+
+/*
+ * pg_switch_wal: switch to next xlog file
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_switch_wal(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr switchpoint;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ switchpoint = RequestXLogSwitch(false);
+
+ /*
+ * As a convenience, return the WAL location of the switch record
+ */
+ PG_RETURN_LSN(switchpoint);
+}
+
+/*
+ * pg_log_standby_snapshot: call LogStandbySnapshot()
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_log_standby_snapshot(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr recptr;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("%s cannot be executed during recovery.",
+ "pg_log_standby_snapshot()")));
+
+ if (!XLogStandbyInfoActive())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("pg_log_standby_snapshot() can only be used if wal_level >= replica")));
+
+ recptr = LogStandbySnapshot();
+
+ /*
+ * As a convenience, return the WAL location of the last inserted record
+ */
+ PG_RETURN_LSN(recptr);
+}
+
+/*
+ * pg_create_restore_point: a named point for restore
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_create_restore_point(PG_FUNCTION_ARGS)
+{
+ text *restore_name = PG_GETARG_TEXT_PP(0);
+ char *restore_name_str;
+ XLogRecPtr restorepoint;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ if (!XLogIsNeeded())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("WAL level not sufficient for creating a restore point"),
+ errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
+
+ restore_name_str = text_to_cstring(restore_name);
+
+ if (strlen(restore_name_str) >= MAXFNAMELEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("value too long for restore point (maximum %d characters)", MAXFNAMELEN - 1)));
+
+ restorepoint = XLogRestorePoint(restore_name_str);
+
+ /*
+ * As a convenience, return the WAL location of the restore point record
+ */
+ PG_RETURN_LSN(restorepoint);
+}
+
+/*
+ * Report the current WAL write location (same format as pg_backup_start etc)
+ *
+ * This is useful for determining how much of WAL is visible to an external
+ * archiving process. Note that the data before this point is written out
+ * to the kernel, but is not necessarily synced to disk.
+ */
+Datum
+pg_current_wal_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr current_recptr;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ current_recptr = GetXLogWriteRecPtr();
+
+ PG_RETURN_LSN(current_recptr);
+}
+
+/*
+ * Report the current WAL insert location (same format as pg_backup_start etc)
+ *
+ * This function is mostly for debugging purposes.
+ */
+Datum
+pg_current_wal_insert_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr current_recptr;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ current_recptr = GetXLogInsertRecPtr();
+
+ PG_RETURN_LSN(current_recptr);
+}
+
+/*
+ * Report the current WAL flush location (same format as pg_backup_start etc)
+ *
+ * This function is mostly for debugging purposes.
+ */
+Datum
+pg_current_wal_flush_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr current_recptr;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("WAL control functions cannot be executed during recovery.")));
+
+ current_recptr = GetFlushRecPtr(NULL);
+
+ PG_RETURN_LSN(current_recptr);
+}
+
+/*
+ * Report the last WAL receive location (same format as pg_backup_start etc)
+ *
+ * This is useful for determining how much of WAL is guaranteed to be received
+ * and synced to disk by walreceiver.
+ */
+Datum
+pg_last_wal_receive_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr recptr;
+
+ recptr = GetWalRcvFlushRecPtr(NULL, NULL);
+
+ if (recptr == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_LSN(recptr);
+}
+
+/*
+ * Report the last WAL replay location (same format as pg_backup_start etc)
+ *
+ * This is useful for determining how much of WAL is visible to read-only
+ * connections during recovery.
+ */
+Datum
+pg_last_wal_replay_lsn(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr recptr;
+
+ recptr = GetXLogReplayRecPtr(NULL);
+
+ if (recptr == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_LSN(recptr);
+}
+
+/*
+ * Compute an xlog file name and decimal byte offset given a WAL location,
+ * such as is returned by pg_backup_stop() or pg_switch_wal().
+ *
+ * Note that a location exactly at a segment boundary is taken to be in
+ * the previous segment. This is usually the right thing, since the
+ * expected usage is to determine which xlog file(s) are ready to archive.
+ */
+Datum
+pg_walfile_name_offset(PG_FUNCTION_ARGS)
+{
+ XLogSegNo xlogsegno;
+ uint32 xrecoff;
+ XLogRecPtr locationpoint = PG_GETARG_LSN(0);
+ char xlogfilename[MAXFNAMELEN];
+ Datum values[2];
+ bool isnull[2];
+ TupleDesc resultTupleDesc;
+ HeapTuple resultHeapTuple;
+ Datum result;
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("%s cannot be executed during recovery.",
+ "pg_walfile_name_offset()")));
+
+ /*
+ * Construct a tuple descriptor for the result row. This must match this
+ * function's pg_proc entry!
+ */
+ resultTupleDesc = CreateTemplateTupleDesc(2);
+ TupleDescInitEntry(resultTupleDesc, (AttrNumber) 1, "file_name",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(resultTupleDesc, (AttrNumber) 2, "file_offset",
+ INT4OID, -1, 0);
+
+ resultTupleDesc = BlessTupleDesc(resultTupleDesc);
+
+ /*
+ * xlogfilename
+ */
+ XLByteToPrevSeg(locationpoint, xlogsegno, wal_segment_size);
+ XLogFileName(xlogfilename, GetWALInsertionTimeLine(), xlogsegno,
+ wal_segment_size);
+
+ values[0] = CStringGetTextDatum(xlogfilename);
+ isnull[0] = false;
+
+ /*
+ * offset
+ */
+ xrecoff = XLogSegmentOffset(locationpoint, wal_segment_size);
+
+ values[1] = UInt32GetDatum(xrecoff);
+ isnull[1] = false;
+
+ /*
+ * Tuple jam: Having first prepared your Datums, then squash together
+ */
+ resultHeapTuple = heap_form_tuple(resultTupleDesc, values, isnull);
+
+ result = HeapTupleGetDatum(resultHeapTuple);
+
+ PG_RETURN_DATUM(result);
+}
+
+/*
+ * Compute an xlog file name given a WAL location,
+ * such as is returned by pg_backup_stop() or pg_switch_wal().
+ */
+Datum
+pg_walfile_name(PG_FUNCTION_ARGS)
+{
+ XLogSegNo xlogsegno;
+ XLogRecPtr locationpoint = PG_GETARG_LSN(0);
+ char xlogfilename[MAXFNAMELEN];
+
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is in progress"),
+ errhint("%s cannot be executed during recovery.",
+ "pg_walfile_name()")));
+
+ XLByteToPrevSeg(locationpoint, xlogsegno, wal_segment_size);
+ XLogFileName(xlogfilename, GetWALInsertionTimeLine(), xlogsegno,
+ wal_segment_size);
+
+ PG_RETURN_TEXT_P(cstring_to_text(xlogfilename));
+}
+
+/*
+ * Extract the sequence number and the timeline ID from given a WAL file
+ * name.
+ */
+Datum
+pg_split_walfile_name(PG_FUNCTION_ARGS)
+{
+#define PG_SPLIT_WALFILE_NAME_COLS 2
+ char *fname = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ char *fname_upper;
+ char *p;
+ TimeLineID tli;
+ XLogSegNo segno;
+ Datum values[PG_SPLIT_WALFILE_NAME_COLS] = {0};
+ bool isnull[PG_SPLIT_WALFILE_NAME_COLS] = {0};
+ TupleDesc tupdesc;
+ HeapTuple tuple;
+ char buf[256];
+ Datum result;
+
+ fname_upper = pstrdup(fname);
+
+ /* Capitalize WAL file name. */
+ for (p = fname_upper; *p; p++)
+ *p = pg_toupper((unsigned char) *p);
+
+ if (!IsXLogFileName(fname_upper))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid WAL file name \"%s\"", fname)));
+
+ XLogFromFileName(fname_upper, &tli, &segno, wal_segment_size);
+
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ /* Convert to numeric. */
+ snprintf(buf, sizeof buf, UINT64_FORMAT, segno);
+ values[0] = DirectFunctionCall3(numeric_in,
+ CStringGetDatum(buf),
+ ObjectIdGetDatum(0),
+ Int32GetDatum(-1));
+
+ values[1] = Int64GetDatum(tli);
+
+ tuple = heap_form_tuple(tupdesc, values, isnull);
+ result = HeapTupleGetDatum(tuple);
+
+ PG_RETURN_DATUM(result);
+
+#undef PG_SPLIT_WALFILE_NAME_COLS
+}
+
+/*
+ * pg_wal_replay_pause - Request to pause recovery
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_wal_replay_pause(PG_FUNCTION_ARGS)
+{
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ if (PromoteIsTriggered())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("standby promotion is ongoing"),
+ errhint("%s cannot be executed after promotion is triggered.",
+ "pg_wal_replay_pause()")));
+
+ SetRecoveryPause(true);
+
+ /* wake up the recovery process so that it can process the pause request */
+ WakeupRecovery();
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_wal_replay_resume - resume recovery now
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_wal_replay_resume(PG_FUNCTION_ARGS)
+{
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ if (PromoteIsTriggered())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("standby promotion is ongoing"),
+ errhint("%s cannot be executed after promotion is triggered.",
+ "pg_wal_replay_resume()")));
+
+ SetRecoveryPause(false);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_is_wal_replay_paused
+ */
+Datum
+pg_is_wal_replay_paused(PG_FUNCTION_ARGS)
+{
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ PG_RETURN_BOOL(GetRecoveryPauseState() != RECOVERY_NOT_PAUSED);
+}
+
+/*
+ * pg_get_wal_replay_pause_state - Returns the recovery pause state.
+ *
+ * Returned values:
+ *
+ * 'not paused' - if pause is not requested
+ * 'pause requested' - if pause is requested but recovery is not yet paused
+ * 'paused' - if recovery is paused
+ */
+Datum
+pg_get_wal_replay_pause_state(PG_FUNCTION_ARGS)
+{
+ char *statestr = NULL;
+
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ /* get the recovery pause state */
+ switch (GetRecoveryPauseState())
+ {
+ case RECOVERY_NOT_PAUSED:
+ statestr = "not paused";
+ break;
+ case RECOVERY_PAUSE_REQUESTED:
+ statestr = "pause requested";
+ break;
+ case RECOVERY_PAUSED:
+ statestr = "paused";
+ break;
+ }
+
+ Assert(statestr != NULL);
+ PG_RETURN_TEXT_P(cstring_to_text(statestr));
+}
+
+/*
+ * Returns timestamp of latest processed commit/abort record.
+ *
+ * When the server has been started normally without recovery the function
+ * returns NULL.
+ */
+Datum
+pg_last_xact_replay_timestamp(PG_FUNCTION_ARGS)
+{
+ TimestampTz xtime;
+
+ xtime = GetLatestXTime();
+ if (xtime == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TIMESTAMPTZ(xtime);
+}
+
+/*
+ * Returns bool with current recovery mode, a global state.
+ */
+Datum
+pg_is_in_recovery(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(RecoveryInProgress());
+}
+
+/*
+ * Compute the difference in bytes between two WAL locations.
+ */
+Datum
+pg_wal_lsn_diff(PG_FUNCTION_ARGS)
+{
+ Datum result;
+
+ result = DirectFunctionCall2(pg_lsn_mi,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1));
+
+ PG_RETURN_DATUM(result);
+}
+
+/*
+ * Promotes a standby server.
+ *
+ * A result of "true" means that promotion has been completed if "wait" is
+ * "true", or initiated if "wait" is false.
+ */
+Datum
+pg_promote(PG_FUNCTION_ARGS)
+{
+ bool wait = PG_GETARG_BOOL(0);
+ int wait_seconds = PG_GETARG_INT32(1);
+ FILE *promote_file;
+ int i;
+
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ if (wait_seconds <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("\"wait_seconds\" must not be negative or zero")));
+
+ /* create the promote signal file */
+ promote_file = AllocateFile(PROMOTE_SIGNAL_FILE, "w");
+ if (!promote_file)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ PROMOTE_SIGNAL_FILE)));
+
+ if (FreeFile(promote_file))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ PROMOTE_SIGNAL_FILE)));
+
+ /* signal the postmaster */
+ if (kill(PostmasterPid, SIGUSR1) != 0)
+ {
+ ereport(WARNING,
+ (errmsg("failed to send signal to postmaster: %m")));
+ (void) unlink(PROMOTE_SIGNAL_FILE);
+ PG_RETURN_BOOL(false);
+ }
+
+ /* return immediately if waiting was not requested */
+ if (!wait)
+ PG_RETURN_BOOL(true);
+
+ /* wait for the amount of time wanted until promotion */
+#define WAITS_PER_SECOND 10
+ for (i = 0; i < WAITS_PER_SECOND * wait_seconds; i++)
+ {
+ int rc;
+
+ ResetLatch(MyLatch);
+
+ if (!RecoveryInProgress())
+ PG_RETURN_BOOL(true);
+
+ CHECK_FOR_INTERRUPTS();
+
+ rc = WaitLatch(MyLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ 1000L / WAITS_PER_SECOND,
+ WAIT_EVENT_PROMOTE);
+
+ /*
+ * Emergency bailout if postmaster has died. This is to avoid the
+ * necessity for manual cleanup of all postmaster children.
+ */
+ if (rc & WL_POSTMASTER_DEATH)
+ PG_RETURN_BOOL(false);
+ }
+
+ ereport(WARNING,
+ (errmsg_plural("server did not promote within %d second",
+ "server did not promote within %d seconds",
+ wait_seconds,
+ wait_seconds)));
+ PG_RETURN_BOOL(false);
+}