diff options
Diffstat (limited to 'src/backend/access/transam/xlogfuncs.c')
-rw-r--r-- | src/backend/access/transam/xlogfuncs.c | 830 |
1 files changed, 830 insertions, 0 deletions
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c new file mode 100644 index 0000000..b98deb7 --- /dev/null +++ b/src/backend/access/transam/xlogfuncs.c @@ -0,0 +1,830 @@ +/*------------------------------------------------------------------------- + * + * xlogfuncs.c + * + * PostgreSQL write-ahead log manager user interface functions + * + * This file contains WAL control and information functions. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/access/transam/xlogfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <unistd.h> + +#include "access/htup_details.h" +#include "access/xlog.h" +#include "access/xlog_internal.h" +#include "access/xlogutils.h" +#include "catalog/pg_type.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "replication/walreceiver.h" +#include "storage/fd.h" +#include "storage/ipc.h" +#include "storage/smgr.h" +#include "utils/builtins.h" +#include "utils/guc.h" +#include "utils/memutils.h" +#include "utils/numeric.h" +#include "utils/pg_lsn.h" +#include "utils/timestamp.h" +#include "utils/tuplestore.h" + +/* + * Store label file and tablespace map during non-exclusive backups. + */ +static StringInfo label_file; +static StringInfo tblspc_map_file; + +/* + * pg_start_backup: set up for taking an on-line backup dump + * + * Essentially what this does is to create a backup label file in $PGDATA, + * where it will be archived as part of the backup dump. The label file + * contains the user-supplied label string (typically this would be used + * to tell where the backup dump will be stored) and the starting time and + * starting WAL location for the dump. + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_start_backup(PG_FUNCTION_ARGS) +{ + text *backupid = PG_GETARG_TEXT_PP(0); + bool fast = PG_GETARG_BOOL(1); + bool exclusive = PG_GETARG_BOOL(2); + char *backupidstr; + XLogRecPtr startpoint; + SessionBackupState status = get_backup_status(); + + backupidstr = text_to_cstring(backupid); + + if (status == SESSION_BACKUP_NON_EXCLUSIVE) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("a backup is already in progress in this session"))); + + if (exclusive) + { + startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL, + NULL, NULL); + } + else + { + MemoryContext oldcontext; + + /* + * Label file and tablespace map file need to be long-lived, since + * they are read in pg_stop_backup. + */ + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + label_file = makeStringInfo(); + tblspc_map_file = makeStringInfo(); + MemoryContextSwitchTo(oldcontext); + + register_persistent_abort_backup_handler(); + + startpoint = do_pg_start_backup(backupidstr, fast, NULL, label_file, + NULL, tblspc_map_file); + } + + PG_RETURN_LSN(startpoint); +} + +/* + * pg_stop_backup: finish taking an on-line backup dump + * + * We write an end-of-backup WAL record, and remove the backup label file + * created by pg_start_backup, creating a backup history file in pg_wal + * instead (whence it will immediately be archived). The backup history file + * contains the same info found in the label file, plus the backup-end time + * and WAL location. Before 9.0, the backup-end time was read from the backup + * history file at the beginning of archive recovery, but we now use the WAL + * record for that and the file is for informational and debug purposes only. + * + * Note: different from CancelBackup which just cancels online backup mode. + * + * Note: this version is only called to stop an exclusive backup. The function + * pg_stop_backup_v2 (overloaded as pg_stop_backup in SQL) is called to + * stop non-exclusive backups. + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_stop_backup(PG_FUNCTION_ARGS) +{ + XLogRecPtr stoppoint; + SessionBackupState status = get_backup_status(); + + if (status == SESSION_BACKUP_NON_EXCLUSIVE) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("non-exclusive backup in progress"), + errhint("Did you mean to use pg_stop_backup('f')?"))); + + /* + * Exclusive backups were typically started in a different connection, so + * don't try to verify that status of backup is set to + * SESSION_BACKUP_EXCLUSIVE in this function. Actual verification that an + * exclusive backup is in fact running is handled inside + * do_pg_stop_backup. + */ + stoppoint = do_pg_stop_backup(NULL, true, NULL); + + PG_RETURN_LSN(stoppoint); +} + + +/* + * pg_stop_backup_v2: finish taking exclusive or nonexclusive on-line backup. + * + * Works the same as pg_stop_backup, except for non-exclusive backups it returns + * the backup label and tablespace map files as text fields in as part of the + * resultset. + * + * The first parameter (variable 'exclusive') allows the user to tell us if + * this is an exclusive or a non-exclusive backup. + * + * The second parameter (variable 'waitforarchive'), which is optional, + * allows the user to choose if they want to wait for the WAL to be archived + * or if we should just return as soon as the WAL record is written. + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_stop_backup_v2(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Datum values[3]; + bool nulls[3]; + + bool exclusive = PG_GETARG_BOOL(0); + bool waitforarchive = PG_GETARG_BOOL(1); + XLogRecPtr stoppoint; + SessionBackupState status = get_backup_status(); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + if (exclusive) + { + if (status == SESSION_BACKUP_NON_EXCLUSIVE) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("non-exclusive backup in progress"), + errhint("Did you mean to use pg_stop_backup('f')?"))); + + /* + * Stop the exclusive backup, and since we're in an exclusive backup + * return NULL for both backup_label and tablespace_map. + */ + stoppoint = do_pg_stop_backup(NULL, waitforarchive, NULL); + + nulls[1] = true; + nulls[2] = true; + } + else + { + if (status != SESSION_BACKUP_NON_EXCLUSIVE) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("non-exclusive backup is not in progress"), + errhint("Did you mean to use pg_stop_backup('t')?"))); + + /* + * Stop the non-exclusive backup. Return a copy of the backup label + * and tablespace map so they can be written to disk by the caller. + */ + stoppoint = do_pg_stop_backup(label_file->data, waitforarchive, NULL); + + values[1] = CStringGetTextDatum(label_file->data); + values[2] = CStringGetTextDatum(tblspc_map_file->data); + + /* Free structures allocated in TopMemoryContext */ + pfree(label_file->data); + pfree(label_file); + label_file = NULL; + pfree(tblspc_map_file->data); + pfree(tblspc_map_file); + tblspc_map_file = NULL; + } + + /* Stoppoint is included on both exclusive and nonexclusive backups */ + values[0] = LSNGetDatum(stoppoint); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + tuplestore_donestoring(tupstore); + + return (Datum) 0; +} + +/* + * pg_switch_wal: switch to next xlog file + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_switch_wal(PG_FUNCTION_ARGS) +{ + XLogRecPtr switchpoint; + + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("WAL control functions cannot be executed during recovery."))); + + switchpoint = RequestXLogSwitch(false); + + /* + * As a convenience, return the WAL location of the switch record + */ + PG_RETURN_LSN(switchpoint); +} + +/* + * pg_create_restore_point: a named point for restore + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_create_restore_point(PG_FUNCTION_ARGS) +{ + text *restore_name = PG_GETARG_TEXT_PP(0); + char *restore_name_str; + XLogRecPtr restorepoint; + + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("WAL control functions cannot be executed during recovery."))); + + if (!XLogIsNeeded()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL level not sufficient for creating a restore point"), + errhint("wal_level must be set to \"replica\" or \"logical\" at server start."))); + + restore_name_str = text_to_cstring(restore_name); + + if (strlen(restore_name_str) >= MAXFNAMELEN) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("value too long for restore point (maximum %d characters)", MAXFNAMELEN - 1))); + + restorepoint = XLogRestorePoint(restore_name_str); + + /* + * As a convenience, return the WAL location of the restore point record + */ + PG_RETURN_LSN(restorepoint); +} + +/* + * Report the current WAL write location (same format as pg_start_backup etc) + * + * This is useful for determining how much of WAL is visible to an external + * archiving process. Note that the data before this point is written out + * to the kernel, but is not necessarily synced to disk. + */ +Datum +pg_current_wal_lsn(PG_FUNCTION_ARGS) +{ + XLogRecPtr current_recptr; + + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("WAL control functions cannot be executed during recovery."))); + + current_recptr = GetXLogWriteRecPtr(); + + PG_RETURN_LSN(current_recptr); +} + +/* + * Report the current WAL insert location (same format as pg_start_backup etc) + * + * This function is mostly for debugging purposes. + */ +Datum +pg_current_wal_insert_lsn(PG_FUNCTION_ARGS) +{ + XLogRecPtr current_recptr; + + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("WAL control functions cannot be executed during recovery."))); + + current_recptr = GetXLogInsertRecPtr(); + + PG_RETURN_LSN(current_recptr); +} + +/* + * Report the current WAL flush location (same format as pg_start_backup etc) + * + * This function is mostly for debugging purposes. + */ +Datum +pg_current_wal_flush_lsn(PG_FUNCTION_ARGS) +{ + XLogRecPtr current_recptr; + + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("WAL control functions cannot be executed during recovery."))); + + current_recptr = GetFlushRecPtr(); + + PG_RETURN_LSN(current_recptr); +} + +/* + * Report the last WAL receive location (same format as pg_start_backup etc) + * + * This is useful for determining how much of WAL is guaranteed to be received + * and synced to disk by walreceiver. + */ +Datum +pg_last_wal_receive_lsn(PG_FUNCTION_ARGS) +{ + XLogRecPtr recptr; + + recptr = GetWalRcvFlushRecPtr(NULL, NULL); + + if (recptr == 0) + PG_RETURN_NULL(); + + PG_RETURN_LSN(recptr); +} + +/* + * Report the last WAL replay location (same format as pg_start_backup etc) + * + * This is useful for determining how much of WAL is visible to read-only + * connections during recovery. + */ +Datum +pg_last_wal_replay_lsn(PG_FUNCTION_ARGS) +{ + XLogRecPtr recptr; + + recptr = GetXLogReplayRecPtr(NULL); + + if (recptr == 0) + PG_RETURN_NULL(); + + PG_RETURN_LSN(recptr); +} + +/* + * Compute an xlog file name and decimal byte offset given a WAL location, + * such as is returned by pg_stop_backup() or pg_switch_wal(). + * + * Note that a location exactly at a segment boundary is taken to be in + * the previous segment. This is usually the right thing, since the + * expected usage is to determine which xlog file(s) are ready to archive. + */ +Datum +pg_walfile_name_offset(PG_FUNCTION_ARGS) +{ + XLogSegNo xlogsegno; + uint32 xrecoff; + XLogRecPtr locationpoint = PG_GETARG_LSN(0); + char xlogfilename[MAXFNAMELEN]; + Datum values[2]; + bool isnull[2]; + TupleDesc resultTupleDesc; + HeapTuple resultHeapTuple; + Datum result; + + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("%s cannot be executed during recovery.", + "pg_walfile_name_offset()"))); + + /* + * Construct a tuple descriptor for the result row. This must match this + * function's pg_proc entry! + */ + resultTupleDesc = CreateTemplateTupleDesc(2); + TupleDescInitEntry(resultTupleDesc, (AttrNumber) 1, "file_name", + TEXTOID, -1, 0); + TupleDescInitEntry(resultTupleDesc, (AttrNumber) 2, "file_offset", + INT4OID, -1, 0); + + resultTupleDesc = BlessTupleDesc(resultTupleDesc); + + /* + * xlogfilename + */ + XLByteToPrevSeg(locationpoint, xlogsegno, wal_segment_size); + XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno, wal_segment_size); + + values[0] = CStringGetTextDatum(xlogfilename); + isnull[0] = false; + + /* + * offset + */ + xrecoff = XLogSegmentOffset(locationpoint, wal_segment_size); + + values[1] = UInt32GetDatum(xrecoff); + isnull[1] = false; + + /* + * Tuple jam: Having first prepared your Datums, then squash together + */ + resultHeapTuple = heap_form_tuple(resultTupleDesc, values, isnull); + + result = HeapTupleGetDatum(resultHeapTuple); + + PG_RETURN_DATUM(result); +} + +/* + * Compute an xlog file name given a WAL location, + * such as is returned by pg_stop_backup() or pg_switch_wal(). + */ +Datum +pg_walfile_name(PG_FUNCTION_ARGS) +{ + XLogSegNo xlogsegno; + XLogRecPtr locationpoint = PG_GETARG_LSN(0); + char xlogfilename[MAXFNAMELEN]; + + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("%s cannot be executed during recovery.", + "pg_walfile_name()"))); + + XLByteToPrevSeg(locationpoint, xlogsegno, wal_segment_size); + XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno, wal_segment_size); + + PG_RETURN_TEXT_P(cstring_to_text(xlogfilename)); +} + +/* + * pg_wal_replay_pause - Request to pause recovery + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_wal_replay_pause(PG_FUNCTION_ARGS) +{ + if (!RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is not in progress"), + errhint("Recovery control functions can only be executed during recovery."))); + + if (PromoteIsTriggered()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("standby promotion is ongoing"), + errhint("%s cannot be executed after promotion is triggered.", + "pg_wal_replay_pause()"))); + + SetRecoveryPause(true); + + /* wake up the recovery process so that it can process the pause request */ + WakeupRecovery(); + + PG_RETURN_VOID(); +} + +/* + * pg_wal_replay_resume - resume recovery now + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_wal_replay_resume(PG_FUNCTION_ARGS) +{ + if (!RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is not in progress"), + errhint("Recovery control functions can only be executed during recovery."))); + + if (PromoteIsTriggered()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("standby promotion is ongoing"), + errhint("%s cannot be executed after promotion is triggered.", + "pg_wal_replay_resume()"))); + + SetRecoveryPause(false); + + PG_RETURN_VOID(); +} + +/* + * pg_is_wal_replay_paused + */ +Datum +pg_is_wal_replay_paused(PG_FUNCTION_ARGS) +{ + if (!RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is not in progress"), + errhint("Recovery control functions can only be executed during recovery."))); + + PG_RETURN_BOOL(GetRecoveryPauseState() != RECOVERY_NOT_PAUSED); +} + +/* + * pg_get_wal_replay_pause_state - Returns the recovery pause state. + * + * Returned values: + * + * 'not paused' - if pause is not requested + * 'pause requested' - if pause is requested but recovery is not yet paused + * 'paused' - if recovery is paused + */ +Datum +pg_get_wal_replay_pause_state(PG_FUNCTION_ARGS) +{ + char *statestr = NULL; + + if (!RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is not in progress"), + errhint("Recovery control functions can only be executed during recovery."))); + + /* get the recovery pause state */ + switch (GetRecoveryPauseState()) + { + case RECOVERY_NOT_PAUSED: + statestr = "not paused"; + break; + case RECOVERY_PAUSE_REQUESTED: + statestr = "pause requested"; + break; + case RECOVERY_PAUSED: + statestr = "paused"; + break; + } + + Assert(statestr != NULL); + PG_RETURN_TEXT_P(cstring_to_text(statestr)); +} + +/* + * Returns timestamp of latest processed commit/abort record. + * + * When the server has been started normally without recovery the function + * returns NULL. + */ +Datum +pg_last_xact_replay_timestamp(PG_FUNCTION_ARGS) +{ + TimestampTz xtime; + + xtime = GetLatestXTime(); + if (xtime == 0) + PG_RETURN_NULL(); + + PG_RETURN_TIMESTAMPTZ(xtime); +} + +/* + * Returns bool with current recovery mode, a global state. + */ +Datum +pg_is_in_recovery(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(RecoveryInProgress()); +} + +/* + * Compute the difference in bytes between two WAL locations. + */ +Datum +pg_wal_lsn_diff(PG_FUNCTION_ARGS) +{ + Datum result; + + result = DirectFunctionCall2(pg_lsn_mi, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1)); + + PG_RETURN_NUMERIC(result); +} + +/* + * Returns bool with current on-line backup mode, a global state. + */ +Datum +pg_is_in_backup(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(BackupInProgress()); +} + +/* + * Returns start time of an online exclusive backup. + * + * When there's no exclusive backup in progress, the function + * returns NULL. + */ +Datum +pg_backup_start_time(PG_FUNCTION_ARGS) +{ + Datum xtime; + FILE *lfp; + char fline[MAXPGPATH]; + char backup_start_time[30]; + + /* + * See if label file is present + */ + lfp = AllocateFile(BACKUP_LABEL_FILE, "r"); + if (lfp == NULL) + { + if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + BACKUP_LABEL_FILE))); + PG_RETURN_NULL(); + } + + /* + * Parse the file to find the START TIME line. + */ + backup_start_time[0] = '\0'; + while (fgets(fline, sizeof(fline), lfp) != NULL) + { + if (sscanf(fline, "START TIME: %25[^\n]\n", backup_start_time) == 1) + break; + } + + /* Check for a read error. */ + if (ferror(lfp)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", BACKUP_LABEL_FILE))); + + /* Close the backup label file. */ + if (FreeFile(lfp)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", BACKUP_LABEL_FILE))); + + if (strlen(backup_start_time) == 0) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE))); + + /* + * Convert the time string read from file to TimestampTz form. + */ + xtime = DirectFunctionCall3(timestamptz_in, + CStringGetDatum(backup_start_time), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1)); + + PG_RETURN_DATUM(xtime); +} + +/* + * Promotes a standby server. + * + * A result of "true" means that promotion has been completed if "wait" is + * "true", or initiated if "wait" is false. + */ +Datum +pg_promote(PG_FUNCTION_ARGS) +{ + bool wait = PG_GETARG_BOOL(0); + int wait_seconds = PG_GETARG_INT32(1); + FILE *promote_file; + int i; + + if (!RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is not in progress"), + errhint("Recovery control functions can only be executed during recovery."))); + + if (wait_seconds <= 0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("\"wait_seconds\" must not be negative or zero"))); + + /* create the promote signal file */ + promote_file = AllocateFile(PROMOTE_SIGNAL_FILE, "w"); + if (!promote_file) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", + PROMOTE_SIGNAL_FILE))); + + if (FreeFile(promote_file)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", + PROMOTE_SIGNAL_FILE))); + + /* signal the postmaster */ + if (kill(PostmasterPid, SIGUSR1) != 0) + { + ereport(WARNING, + (errmsg("failed to send signal to postmaster: %m"))); + (void) unlink(PROMOTE_SIGNAL_FILE); + PG_RETURN_BOOL(false); + } + + /* return immediately if waiting was not requested */ + if (!wait) + PG_RETURN_BOOL(true); + + /* wait for the amount of time wanted until promotion */ +#define WAITS_PER_SECOND 10 + for (i = 0; i < WAITS_PER_SECOND * wait_seconds; i++) + { + int rc; + + ResetLatch(MyLatch); + + if (!RecoveryInProgress()) + PG_RETURN_BOOL(true); + + CHECK_FOR_INTERRUPTS(); + + rc = WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + 1000L / WAITS_PER_SECOND, + WAIT_EVENT_PROMOTE); + + /* + * Emergency bailout if postmaster has died. This is to avoid the + * necessity for manual cleanup of all postmaster children. + */ + if (rc & WL_POSTMASTER_DEATH) + PG_RETURN_BOOL(false); + } + + ereport(WARNING, + (errmsg_plural("server did not promote within %d second", + "server did not promote within %d seconds", + wait_seconds, + wait_seconds))); + PG_RETURN_BOOL(false); +} |