/*------------------------------------------------------------------------- * * procsignal.c * Routines for interprocess signaling * * * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * src/backend/storage/ipc/procsignal.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include #include #include "access/parallel.h" #include "commands/async.h" #include "miscadmin.h" #include "pgstat.h" #include "replication/walsender.h" #include "storage/ipc.h" #include "storage/latch.h" #include "storage/proc.h" #include "storage/shmem.h" #include "storage/sinval.h" #include "tcop/tcopprot.h" /* * The SIGUSR1 signal is multiplexed to support signaling multiple event * types. The specific reason is communicated via flags in shared memory. * We keep a boolean flag for each possible "reason", so that different * reasons can be signaled to a process concurrently. (However, if the same * reason is signaled more than once nearly simultaneously, the process may * observe it only once.) * * Each process that wants to receive signals registers its process ID * in the ProcSignalSlots array. The array is indexed by backend ID to make * slot allocation simple, and to avoid having to search the array when you * know the backend ID of the process you're signaling. (We do support * signaling without backend ID, but it's a bit less efficient.) * * The flags are actually declared as "volatile sig_atomic_t" for maximum * portability. This should ensure that loads and stores of the flag * values are atomic, allowing us to dispense with any explicit locking. * * pss_signalFlags are intended to be set in cases where we don't need to * keep track of whether or not the target process has handled the signal, * but sometimes we need confirmation, as when making a global state change * that cannot be considered complete until all backends have taken notice * of it. For such use cases, we set a bit in pss_barrierCheckMask and then * increment the current "barrier generation"; when the new barrier generation * (or greater) appears in the pss_barrierGeneration flag of every process, * we know that the message has been received everywhere. */ typedef struct { pid_t pss_pid; sig_atomic_t pss_signalFlags[NUM_PROCSIGNALS]; pg_atomic_uint64 pss_barrierGeneration; pg_atomic_uint32 pss_barrierCheckMask; } ProcSignalSlot; /* * Information that is global to the entire ProcSignal system can be stored * here. * * psh_barrierGeneration is the highest barrier generation in existence. */ typedef struct { pg_atomic_uint64 psh_barrierGeneration; ProcSignalSlot psh_slot[FLEXIBLE_ARRAY_MEMBER]; } ProcSignalHeader; /* * We reserve a slot for each possible BackendId, plus one for each * possible auxiliary process type. (This scheme assumes there is not * more than one of any auxiliary process type at a time.) */ #define NumProcSignalSlots (MaxBackends + NUM_AUXPROCTYPES) /* Check whether the relevant type bit is set in the flags. */ #define BARRIER_SHOULD_CHECK(flags, type) \ (((flags) & (((uint32) 1) << (uint32) (type))) != 0) static ProcSignalHeader *ProcSignal = NULL; static volatile ProcSignalSlot *MyProcSignalSlot = NULL; static bool CheckProcSignal(ProcSignalReason reason); static void CleanupProcSignalState(int status, Datum arg); static void ProcessBarrierPlaceholder(void); /* * ProcSignalShmemSize * Compute space needed for procsignal's shared memory */ Size ProcSignalShmemSize(void) { Size size; size = mul_size(NumProcSignalSlots, sizeof(ProcSignalSlot)); size = add_size(size, offsetof(ProcSignalHeader, psh_slot)); return size; } /* * ProcSignalShmemInit * Allocate and initialize procsignal's shared memory */ void ProcSignalShmemInit(void) { Size size = ProcSignalShmemSize(); bool found; ProcSignal = (ProcSignalHeader *) ShmemInitStruct("ProcSignal", size, &found); /* If we're first, initialize. */ if (!found) { int i; pg_atomic_init_u64(&ProcSignal->psh_barrierGeneration, 0); for (i = 0; i < NumProcSignalSlots; ++i) { ProcSignalSlot *slot = &ProcSignal->psh_slot[i]; slot->pss_pid = 0; MemSet(slot->pss_signalFlags, 0, sizeof(slot->pss_signalFlags)); pg_atomic_init_u64(&slot->pss_barrierGeneration, PG_UINT64_MAX); pg_atomic_init_u32(&slot->pss_barrierCheckMask, 0); } } } /* * ProcSignalInit * Register the current process in the procsignal array * * The passed index should be my BackendId if the process has one, * or MaxBackends + aux process type if not. */ void ProcSignalInit(int pss_idx) { volatile ProcSignalSlot *slot; uint64 barrier_generation; Assert(pss_idx >= 1 && pss_idx <= NumProcSignalSlots); slot = &ProcSignal->psh_slot[pss_idx - 1]; /* sanity check */ if (slot->pss_pid != 0) elog(LOG, "process %d taking over ProcSignal slot %d, but it's not empty", MyProcPid, pss_idx); /* Clear out any leftover signal reasons */ MemSet(slot->pss_signalFlags, 0, NUM_PROCSIGNALS * sizeof(sig_atomic_t)); /* * Initialize barrier state. Since we're a brand-new process, there * shouldn't be any leftover backend-private state that needs to be * updated. Therefore, we can broadcast the latest barrier generation and * disregard any previously-set check bits. * * NB: This only works if this initialization happens early enough in the * startup sequence that we haven't yet cached any state that might need * to be invalidated. That's also why we have a memory barrier here, to be * sure that any later reads of memory happen strictly after this. */ pg_atomic_write_u32(&slot->pss_barrierCheckMask, 0); barrier_generation = pg_atomic_read_u64(&ProcSignal->psh_barrierGeneration); pg_atomic_write_u64(&slot->pss_barrierGeneration, barrier_generation); pg_memory_barrier(); /* Mark slot with my PID */ slot->pss_pid = MyProcPid; /* Remember slot location for CheckProcSignal */ MyProcSignalSlot = slot; /* Set up to release the slot on process exit */ on_shmem_exit(CleanupProcSignalState, Int32GetDatum(pss_idx)); } /* * CleanupProcSignalState * Remove current process from ProcSignal mechanism * * This function is called via on_shmem_exit() during backend shutdown. */ static void CleanupProcSignalState(int status, Datum arg) { int pss_idx = DatumGetInt32(arg); volatile ProcSignalSlot *slot; slot = &ProcSignal->psh_slot[pss_idx - 1]; Assert(slot == MyProcSignalSlot); /* * Clear MyProcSignalSlot, so that a SIGUSR1 received after this point * won't try to access it after it's no longer ours (and perhaps even * after we've unmapped the shared memory segment). */ MyProcSignalSlot = NULL; /* sanity check */ if (slot->pss_pid != MyProcPid) { /* * don't ERROR here. We're exiting anyway, and don't want to get into * infinite loop trying to exit */ elog(LOG, "process %d releasing ProcSignal slot %d, but it contains %d", MyProcPid, pss_idx, (int) slot->pss_pid); return; /* XXX better to zero the slot anyway? */ } /* * Make this slot look like it's absorbed all possible barriers, so that * no barrier waits block on it. */ pg_atomic_write_u64(&slot->pss_barrierGeneration, PG_UINT64_MAX); slot->pss_pid = 0; } /* * SendProcSignal * Send a signal to a Postgres process * * Providing backendId is optional, but it will speed up the operation. * * On success (a signal was sent), zero is returned. * On error, -1 is returned, and errno is set (typically to ESRCH or EPERM). * * Not to be confused with ProcSendSignal */ int SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId) { volatile ProcSignalSlot *slot; if (backendId != InvalidBackendId) { slot = &ProcSignal->psh_slot[backendId - 1]; /* * Note: Since there's no locking, it's possible that the target * process detaches from shared memory and exits right after this * test, before we set the flag and send signal. And the signal slot * might even be recycled by a new process, so it's remotely possible * that we set a flag for a wrong process. That's OK, all the signals * are such that no harm is done if they're mistakenly fired. */ if (slot->pss_pid == pid) { /* Atomically set the proper flag */ slot->pss_signalFlags[reason] = true; /* Send signal */ return kill(pid, SIGUSR1); } } else { /* * BackendId not provided, so search the array using pid. We search * the array back to front so as to reduce search overhead. Passing * InvalidBackendId means that the target is most likely an auxiliary * process, which will have a slot near the end of the array. */ int i; for (i = NumProcSignalSlots - 1; i >= 0; i--) { slot = &ProcSignal->psh_slot[i]; if (slot->pss_pid == pid) { /* the above note about race conditions applies here too */ /* Atomically set the proper flag */ slot->pss_signalFlags[reason] = true; /* Send signal */ return kill(pid, SIGUSR1); } } } errno = ESRCH; return -1; } /* * EmitProcSignalBarrier * Send a signal to every Postgres process * * The return value of this function is the barrier "generation" created * by this operation. This value can be passed to WaitForProcSignalBarrier * to wait until it is known that every participant in the ProcSignal * mechanism has absorbed the signal (or started afterwards). * * Note that it would be a bad idea to use this for anything that happens * frequently, as interrupting every backend could cause a noticeable * performance hit. * * Callers are entitled to assume that this function will not throw ERROR * or FATAL. */ uint64 EmitProcSignalBarrier(ProcSignalBarrierType type) { uint32 flagbit = 1 << (uint32) type; uint64 generation; /* * Set all the flags. * * Note that pg_atomic_fetch_or_u32 has full barrier semantics, so this is * totally ordered with respect to anything the caller did before, and * anything that we do afterwards. (This is also true of the later call to * pg_atomic_add_fetch_u64.) */ for (int i = 0; i < NumProcSignalSlots; i++) { volatile ProcSignalSlot *slot = &ProcSignal->psh_slot[i]; pg_atomic_fetch_or_u32(&slot->pss_barrierCheckMask, flagbit); } /* * Increment the generation counter. */ generation = pg_atomic_add_fetch_u64(&ProcSignal->psh_barrierGeneration, 1); /* * Signal all the processes, so that they update their advertised barrier * generation. * * Concurrency is not a problem here. Backends that have exited don't * matter, and new backends that have joined since we entered this * function must already have current state, since the caller is * responsible for making sure that the relevant state is entirely visible * before calling this function in the first place. We still have to wake * them up - because we can't distinguish between such backends and older * backends that need to update state - but they won't actually need to * change any state. */ for (int i = NumProcSignalSlots - 1; i >= 0; i--) { volatile ProcSignalSlot *slot = &ProcSignal->psh_slot[i]; pid_t pid = slot->pss_pid; if (pid != 0) { /* see SendProcSignal for details */ slot->pss_signalFlags[PROCSIG_BARRIER] = true; kill(pid, SIGUSR1); } } return generation; } /* * WaitForProcSignalBarrier - wait until it is guaranteed that all changes * requested by a specific call to EmitProcSignalBarrier() have taken effect. * * We expect that the barrier will normally be absorbed very quickly by other * backends, so we start by waiting just 1/8 of a second and then back off * by a factor of two every time we time out, to a maximum wait time of * 1 second. */ void WaitForProcSignalBarrier(uint64 generation) { long timeout = 125L; Assert(generation <= pg_atomic_read_u64(&ProcSignal->psh_barrierGeneration)); for (int i = NumProcSignalSlots - 1; i >= 0; i--) { volatile ProcSignalSlot *slot = &ProcSignal->psh_slot[i]; uint64 oldval; oldval = pg_atomic_read_u64(&slot->pss_barrierGeneration); while (oldval < generation) { int events; CHECK_FOR_INTERRUPTS(); events = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, timeout, WAIT_EVENT_PROC_SIGNAL_BARRIER); ResetLatch(MyLatch); oldval = pg_atomic_read_u64(&slot->pss_barrierGeneration); if (events & WL_TIMEOUT) timeout = Min(timeout * 2, 1000L); } } /* * The caller is probably calling this function because it wants to read * the shared state or perform further writes to shared state once all * backends are known to have absorbed the barrier. However, the read of * pss_barrierGeneration was performed unlocked; insert a memory barrier * to separate it from whatever follows. */ pg_memory_barrier(); } /* * Handle receipt of an interrupt indicating a global barrier event. * * All the actual work is deferred to ProcessProcSignalBarrier(), because we * cannot safely access the barrier generation inside the signal handler as * 64bit atomics might use spinlock based emulation, even for reads. As this * routine only gets called when PROCSIG_BARRIER is sent that won't cause a * lot fo unnecessary work. */ static void HandleProcSignalBarrierInterrupt(void) { InterruptPending = true; ProcSignalBarrierPending = true; /* latch will be set by procsignal_sigusr1_handler */ } /* * Perform global barrier related interrupt checking. * * Any backend that participates in ProcSignal signaling must arrange to * call this function periodically. It is called from CHECK_FOR_INTERRUPTS(), * which is enough for normal backends, but not necessarily for all types of * background processes. */ void ProcessProcSignalBarrier(void) { uint64 local_gen; uint64 shared_gen; uint32 flags; Assert(MyProcSignalSlot); /* Exit quickly if there's no work to do. */ if (!ProcSignalBarrierPending) return; ProcSignalBarrierPending = false; /* * It's not unlikely to process multiple barriers at once, before the * signals for all the barriers have arrived. To avoid unnecessary work in * response to subsequent signals, exit early if we already have processed * all of them. */ local_gen = pg_atomic_read_u64(&MyProcSignalSlot->pss_barrierGeneration); shared_gen = pg_atomic_read_u64(&ProcSignal->psh_barrierGeneration); Assert(local_gen <= shared_gen); if (local_gen == shared_gen) return; /* * Get and clear the flags that are set for this backend. Note that * pg_atomic_exchange_u32 is a full barrier, so we're guaranteed that the * read of the barrier generation above happens before we atomically * extract the flags, and that any subsequent state changes happen * afterward. */ flags = pg_atomic_exchange_u32(&MyProcSignalSlot->pss_barrierCheckMask, 0); /* * Process each type of barrier. It's important that nothing we call from * here throws an error, because pss_barrierCheckMask has already been * cleared. If we jumped out of here before processing all barrier types, * then we'd forget about the need to do so later. * * NB: It ought to be OK to call the barrier-processing functions * unconditionally, but it's more efficient to call only the ones that * might need us to do something based on the flags. */ if (BARRIER_SHOULD_CHECK(flags, PROCSIGNAL_BARRIER_PLACEHOLDER)) ProcessBarrierPlaceholder(); /* * State changes related to all types of barriers that might have been * emitted have now been handled, so we can update our notion of the * generation to the one we observed before beginning the updates. If * things have changed further, it'll get fixed up when this function is * next called. */ pg_atomic_write_u64(&MyProcSignalSlot->pss_barrierGeneration, shared_gen); } static void ProcessBarrierPlaceholder(void) { /* * XXX. This is just a placeholder until the first real user of this * machinery gets committed. Rename PROCSIGNAL_BARRIER_PLACEHOLDER to * PROCSIGNAL_BARRIER_SOMETHING_ELSE where SOMETHING_ELSE is something * appropriately descriptive. Get rid of this function and instead have * ProcessBarrierSomethingElse. Most likely, that function should live in * the file pertaining to that subsystem, rather than here. */ } /* * CheckProcSignal - check to see if a particular reason has been * signaled, and clear the signal flag. Should be called after receiving * SIGUSR1. */ static bool CheckProcSignal(ProcSignalReason reason) { volatile ProcSignalSlot *slot = MyProcSignalSlot; if (slot != NULL) { /* Careful here --- don't clear flag if we haven't seen it set */ if (slot->pss_signalFlags[reason]) { slot->pss_signalFlags[reason] = false; return true; } } return false; } /* * procsignal_sigusr1_handler - handle SIGUSR1 signal. */ void procsignal_sigusr1_handler(SIGNAL_ARGS) { int save_errno = errno; if (CheckProcSignal(PROCSIG_CATCHUP_INTERRUPT)) HandleCatchupInterrupt(); if (CheckProcSignal(PROCSIG_NOTIFY_INTERRUPT)) HandleNotifyInterrupt(); if (CheckProcSignal(PROCSIG_PARALLEL_MESSAGE)) HandleParallelMessageInterrupt(); if (CheckProcSignal(PROCSIG_WALSND_INIT_STOPPING)) HandleWalSndInitStopping(); if (CheckProcSignal(PROCSIG_BARRIER)) HandleProcSignalBarrierInterrupt(); if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_DATABASE)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_DATABASE); if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_TABLESPACE)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_TABLESPACE); if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOCK)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOCK); if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT); if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); SetLatch(MyLatch); latch_sigusr1_handler(); errno = save_errno; }