/*------------------------------------------------------------------------- * * timeout.c * Routines to multiplex SIGALRM interrupts for multiple timeout reasons. * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/utils/misc/timeout.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include #include "miscadmin.h" #include "storage/proc.h" #include "utils/timeout.h" #include "utils/timestamp.h" /* Data about any one timeout reason */ typedef struct timeout_params { TimeoutId index; /* identifier of timeout reason */ /* volatile because these may be changed from the signal handler */ volatile bool active; /* true if timeout is in active_timeouts[] */ volatile bool indicator; /* true if timeout has occurred */ /* callback function for timeout, or NULL if timeout not registered */ timeout_handler_proc timeout_handler; TimestampTz start_time; /* time that timeout was last activated */ TimestampTz fin_time; /* time it is, or was last, due to fire */ int interval_in_ms; /* time between firings, or 0 if just once */ } timeout_params; /* * List of possible timeout reasons in the order of enum TimeoutId. */ static timeout_params all_timeouts[MAX_TIMEOUTS]; static bool all_timeouts_initialized = false; /* * List of active timeouts ordered by their fin_time and priority. * This list is subject to change by the interrupt handler, so it's volatile. */ static volatile int num_active_timeouts = 0; static timeout_params *volatile active_timeouts[MAX_TIMEOUTS]; /* * Flag controlling whether the signal handler is allowed to do anything. * This is useful to avoid race conditions with the handler. Note in * particular that this lets us make changes in the data structures without * tediously disabling and re-enabling the timer signal. Most of the time, * no interrupt would happen anyway during such critical sections, but if * one does, this rule ensures it's safe. Leaving the signal enabled across * multiple operations can greatly reduce the number of kernel calls we make, * too. See comments in schedule_alarm() about that. * * We leave this "false" when we're not expecting interrupts, just in case. */ static volatile sig_atomic_t alarm_enabled = false; #define disable_alarm() (alarm_enabled = false) #define enable_alarm() (alarm_enabled = true) /* * State recording if and when we next expect the interrupt to fire. * (signal_due_at is valid only when signal_pending is true.) * Note that the signal handler will unconditionally reset signal_pending to * false, so that can change asynchronously even when alarm_enabled is false. */ static volatile sig_atomic_t signal_pending = false; static volatile TimestampTz signal_due_at = 0; /***************************************************************************** * Internal helper functions * * For all of these, it is caller's responsibility to protect them from * interruption by the signal handler. Generally, call disable_alarm() * first to prevent interruption, then update state, and last call * schedule_alarm(), which will re-enable the signal handler if needed. *****************************************************************************/ /* * Find the index of a given timeout reason in the active array. * If it's not there, return -1. */ static int find_active_timeout(TimeoutId id) { int i; for (i = 0; i < num_active_timeouts; i++) { if (active_timeouts[i]->index == id) return i; } return -1; } /* * Insert specified timeout reason into the list of active timeouts * at the given index. */ static void insert_timeout(TimeoutId id, int index) { int i; if (index < 0 || index > num_active_timeouts) elog(FATAL, "timeout index %d out of range 0..%d", index, num_active_timeouts); Assert(!all_timeouts[id].active); all_timeouts[id].active = true; for (i = num_active_timeouts - 1; i >= index; i--) active_timeouts[i + 1] = active_timeouts[i]; active_timeouts[index] = &all_timeouts[id]; num_active_timeouts++; } /* * Remove the index'th element from the timeout list. */ static void remove_timeout_index(int index) { int i; if (index < 0 || index >= num_active_timeouts) elog(FATAL, "timeout index %d out of range 0..%d", index, num_active_timeouts - 1); Assert(active_timeouts[index]->active); active_timeouts[index]->active = false; for (i = index + 1; i < num_active_timeouts; i++) active_timeouts[i - 1] = active_timeouts[i]; num_active_timeouts--; } /* * Enable the specified timeout reason */ static void enable_timeout(TimeoutId id, TimestampTz now, TimestampTz fin_time, int interval_in_ms) { int i; /* Assert request is sane */ Assert(all_timeouts_initialized); Assert(all_timeouts[id].timeout_handler != NULL); /* * If this timeout was already active, momentarily disable it. We * interpret the call as a directive to reschedule the timeout. */ if (all_timeouts[id].active) remove_timeout_index(find_active_timeout(id)); /* * Find out the index where to insert the new timeout. We sort by * fin_time, and for equal fin_time by priority. */ for (i = 0; i < num_active_timeouts; i++) { timeout_params *old_timeout = active_timeouts[i]; if (fin_time < old_timeout->fin_time) break; if (fin_time == old_timeout->fin_time && id < old_timeout->index) break; } /* * Mark the timeout active, and insert it into the active list. */ all_timeouts[id].indicator = false; all_timeouts[id].start_time = now; all_timeouts[id].fin_time = fin_time; all_timeouts[id].interval_in_ms = interval_in_ms; insert_timeout(id, i); } /* * Schedule alarm for the next active timeout, if any * * We assume the caller has obtained the current time, or a close-enough * approximation. (It's okay if a tick or two has passed since "now", or * if a little more time elapses before we reach the kernel call; that will * cause us to ask for an interrupt a tick or two later than the nearest * timeout, which is no big deal. Passing a "now" value that's in the future * would be bad though.) */ static void schedule_alarm(TimestampTz now) { if (num_active_timeouts > 0) { struct itimerval timeval; TimestampTz nearest_timeout; long secs; int usecs; MemSet(&timeval, 0, sizeof(struct itimerval)); /* * If we think there's a signal pending, but current time is more than * 10ms past when the signal was due, then assume that the timeout * request got lost somehow; clear signal_pending so that we'll reset * the interrupt request below. (10ms corresponds to the worst-case * timeout granularity on modern systems.) It won't hurt us if the * interrupt does manage to fire between now and when we reach the * setitimer() call. */ if (signal_pending && now > signal_due_at + 10 * 1000) signal_pending = false; /* * Get the time remaining till the nearest pending timeout. If it is * negative, assume that we somehow missed an interrupt, and clear * signal_pending. This gives us another chance to recover if the * kernel drops a timeout request for some reason. */ nearest_timeout = active_timeouts[0]->fin_time; if (now > nearest_timeout) { signal_pending = false; /* force an interrupt as soon as possible */ secs = 0; usecs = 1; } else { TimestampDifference(now, nearest_timeout, &secs, &usecs); /* * It's possible that the difference is less than a microsecond; * ensure we don't cancel, rather than set, the interrupt. */ if (secs == 0 && usecs == 0) usecs = 1; } timeval.it_value.tv_sec = secs; timeval.it_value.tv_usec = usecs; /* * We must enable the signal handler before calling setitimer(); if we * did it in the other order, we'd have a race condition wherein the * interrupt could occur before we can set alarm_enabled, so that the * signal handler would fail to do anything. * * Because we didn't bother to disable the timer in disable_alarm(), * it's possible that a previously-set interrupt will fire between * enable_alarm() and setitimer(). This is safe, however. There are * two possible outcomes: * * 1. The signal handler finds nothing to do (because the nearest * timeout event is still in the future). It will re-set the timer * and return. Then we'll overwrite the timer value with a new one. * This will mean that the timer fires a little later than we * intended, but only by the amount of time it takes for the signal * handler to do nothing useful, which shouldn't be much. * * 2. The signal handler executes and removes one or more timeout * events. When it returns, either the queue is now empty or the * frontmost event is later than the one we looked at above. So we'll * overwrite the timer value with one that is too soon (plus or minus * the signal handler's execution time), causing a useless interrupt * to occur. But the handler will then re-set the timer and * everything will still work as expected. * * Since these cases are of very low probability (the window here * being quite narrow), it's not worth adding cycles to the mainline * code to prevent occasional wasted interrupts. */ enable_alarm(); /* * If there is already an interrupt pending that's at or before the * needed time, we need not do anything more. The signal handler will * do the right thing in the first case, and re-schedule the interrupt * for later in the second case. It might seem that the extra * interrupt is wasted work, but it's not terribly much work, and this * method has very significant advantages in the common use-case where * we repeatedly set a timeout that we don't expect to reach and then * cancel it. Instead of invoking setitimer() every time the timeout * is set or canceled, we perform one interrupt and a re-scheduling * setitimer() call at intervals roughly equal to the timeout delay. * For example, with statement_timeout = 1s and a throughput of * thousands of queries per second, this method requires an interrupt * and setitimer() call roughly once a second, rather than thousands * of setitimer() calls per second. * * Because of the possible passage of time between when we obtained * "now" and when we reach setitimer(), the kernel's opinion of when * to trigger the interrupt is likely to be a bit later than * signal_due_at. That's fine, for the same reasons described above. */ if (signal_pending && nearest_timeout >= signal_due_at) return; /* * As with calling enable_alarm(), we must set signal_pending *before* * calling setitimer(); if we did it after, the signal handler could * trigger before we set it, leaving us with a false opinion that a * signal is still coming. * * Other race conditions involved with setting/checking signal_pending * are okay, for the reasons described above. One additional point is * that the signal handler could fire after we set signal_due_at, but * still before the setitimer() call. Then the handler could * overwrite signal_due_at with a value it computes, which will be the * same as or perhaps later than what we just computed. After we * perform setitimer(), the net effect would be that signal_due_at * gives a time later than when the interrupt will really happen; * which is a safe situation. */ signal_due_at = nearest_timeout; signal_pending = true; /* Set the alarm timer */ if (setitimer(ITIMER_REAL, &timeval, NULL) != 0) { /* * Clearing signal_pending here is a bit pro forma, but not * entirely so, since something in the FATAL exit path could try * to use timeout facilities. */ signal_pending = false; elog(FATAL, "could not enable SIGALRM timer: %m"); } } } /***************************************************************************** * Signal handler *****************************************************************************/ /* * Signal handler for SIGALRM * * Process any active timeout reasons and then reschedule the interrupt * as needed. */ static void handle_sig_alarm(SIGNAL_ARGS) { int save_errno = errno; /* * Bump the holdoff counter, to make sure nothing we call will process * interrupts directly. No timeout handler should do that, but these * failures are hard to debug, so better be sure. */ HOLD_INTERRUPTS(); /* * SIGALRM is always cause for waking anything waiting on the process * latch. */ SetLatch(MyLatch); /* * Always reset signal_pending, even if !alarm_enabled, since indeed no * signal is now pending. */ signal_pending = false; /* * Fire any pending timeouts, but only if we're enabled to do so. */ if (alarm_enabled) { /* * Disable alarms, just in case this platform allows signal handlers * to interrupt themselves. schedule_alarm() will re-enable if * appropriate. */ disable_alarm(); if (num_active_timeouts > 0) { TimestampTz now = GetCurrentTimestamp(); /* While the first pending timeout has been reached ... */ while (num_active_timeouts > 0 && now >= active_timeouts[0]->fin_time) { timeout_params *this_timeout = active_timeouts[0]; /* Remove it from the active list */ remove_timeout_index(0); /* Mark it as fired */ this_timeout->indicator = true; /* And call its handler function */ this_timeout->timeout_handler(); /* If it should fire repeatedly, re-enable it. */ if (this_timeout->interval_in_ms > 0) { TimestampTz new_fin_time; /* * To guard against drift, schedule the next instance of * the timeout based on the intended firing time rather * than the actual firing time. But if the timeout was so * late that we missed an entire cycle, fall back to * scheduling based on the actual firing time. */ new_fin_time = TimestampTzPlusMilliseconds(this_timeout->fin_time, this_timeout->interval_in_ms); if (new_fin_time < now) new_fin_time = TimestampTzPlusMilliseconds(now, this_timeout->interval_in_ms); enable_timeout(this_timeout->index, now, new_fin_time, this_timeout->interval_in_ms); } /* * The handler might not take negligible time (CheckDeadLock * for instance isn't too cheap), so let's update our idea of * "now" after each one. */ now = GetCurrentTimestamp(); } /* Done firing timeouts, so reschedule next interrupt if any */ schedule_alarm(now); } } RESUME_INTERRUPTS(); errno = save_errno; } /***************************************************************************** * Public API *****************************************************************************/ /* * Initialize timeout module. * * This must be called in every process that wants to use timeouts. * * If the process was forked from another one that was also using this * module, be sure to call this before re-enabling signals; else handlers * meant to run in the parent process might get invoked in this one. */ void InitializeTimeouts(void) { int i; /* Initialize, or re-initialize, all local state */ disable_alarm(); num_active_timeouts = 0; for (i = 0; i < MAX_TIMEOUTS; i++) { all_timeouts[i].index = i; all_timeouts[i].active = false; all_timeouts[i].indicator = false; all_timeouts[i].timeout_handler = NULL; all_timeouts[i].start_time = 0; all_timeouts[i].fin_time = 0; all_timeouts[i].interval_in_ms = 0; } all_timeouts_initialized = true; /* Now establish the signal handler */ pqsignal(SIGALRM, handle_sig_alarm); } /* * Register a timeout reason * * For predefined timeouts, this just registers the callback function. * * For user-defined timeouts, pass id == USER_TIMEOUT; we then allocate and * return a timeout ID. */ TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler) { Assert(all_timeouts_initialized); /* There's no need to disable the signal handler here. */ if (id >= USER_TIMEOUT) { /* Allocate a user-defined timeout reason */ for (id = USER_TIMEOUT; id < MAX_TIMEOUTS; id++) if (all_timeouts[id].timeout_handler == NULL) break; if (id >= MAX_TIMEOUTS) ereport(FATAL, (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), errmsg("cannot add more timeout reasons"))); } Assert(all_timeouts[id].timeout_handler == NULL); all_timeouts[id].timeout_handler = handler; return id; } /* * Reschedule any pending SIGALRM interrupt. * * This can be used during error recovery in case query cancel resulted in loss * of a SIGALRM event (due to longjmp'ing out of handle_sig_alarm before it * could do anything). But note it's not necessary if any of the public * enable_ or disable_timeout functions are called in the same area, since * those all do schedule_alarm() internally if needed. */ void reschedule_timeouts(void) { /* For flexibility, allow this to be called before we're initialized. */ if (!all_timeouts_initialized) return; /* Disable timeout interrupts for safety. */ disable_alarm(); /* Reschedule the interrupt, if any timeouts remain active. */ if (num_active_timeouts > 0) schedule_alarm(GetCurrentTimestamp()); } /* * Enable the specified timeout to fire after the specified delay. * * Delay is given in milliseconds. */ void enable_timeout_after(TimeoutId id, int delay_ms) { TimestampTz now; TimestampTz fin_time; /* Disable timeout interrupts for safety. */ disable_alarm(); /* Queue the timeout at the appropriate time. */ now = GetCurrentTimestamp(); fin_time = TimestampTzPlusMilliseconds(now, delay_ms); enable_timeout(id, now, fin_time, 0); /* Set the timer interrupt. */ schedule_alarm(now); } /* * Enable the specified timeout to fire periodically, with the specified * delay as the time between firings. * * Delay is given in milliseconds. */ void enable_timeout_every(TimeoutId id, TimestampTz fin_time, int delay_ms) { TimestampTz now; /* Disable timeout interrupts for safety. */ disable_alarm(); /* Queue the timeout at the appropriate time. */ now = GetCurrentTimestamp(); enable_timeout(id, now, fin_time, delay_ms); /* Set the timer interrupt. */ schedule_alarm(now); } /* * Enable the specified timeout to fire at the specified time. * * This is provided to support cases where there's a reason to calculate * the timeout by reference to some point other than "now". If there isn't, * use enable_timeout_after(), to avoid calling GetCurrentTimestamp() twice. */ void enable_timeout_at(TimeoutId id, TimestampTz fin_time) { TimestampTz now; /* Disable timeout interrupts for safety. */ disable_alarm(); /* Queue the timeout at the appropriate time. */ now = GetCurrentTimestamp(); enable_timeout(id, now, fin_time, 0); /* Set the timer interrupt. */ schedule_alarm(now); } /* * Enable multiple timeouts at once. * * This works like calling enable_timeout_after() and/or enable_timeout_at() * multiple times. Use this to reduce the number of GetCurrentTimestamp() * and setitimer() calls needed to establish multiple timeouts. */ void enable_timeouts(const EnableTimeoutParams *timeouts, int count) { TimestampTz now; int i; /* Disable timeout interrupts for safety. */ disable_alarm(); /* Queue the timeout(s) at the appropriate times. */ now = GetCurrentTimestamp(); for (i = 0; i < count; i++) { TimeoutId id = timeouts[i].id; TimestampTz fin_time; switch (timeouts[i].type) { case TMPARAM_AFTER: fin_time = TimestampTzPlusMilliseconds(now, timeouts[i].delay_ms); enable_timeout(id, now, fin_time, 0); break; case TMPARAM_AT: enable_timeout(id, now, timeouts[i].fin_time, 0); break; case TMPARAM_EVERY: fin_time = TimestampTzPlusMilliseconds(now, timeouts[i].delay_ms); enable_timeout(id, now, fin_time, timeouts[i].delay_ms); break; default: elog(ERROR, "unrecognized timeout type %d", (int) timeouts[i].type); break; } } /* Set the timer interrupt. */ schedule_alarm(now); } /* * Cancel the specified timeout. * * The timeout's I've-been-fired indicator is reset, * unless keep_indicator is true. * * When a timeout is canceled, any other active timeout remains in force. * It's not an error to disable a timeout that is not enabled. */ void disable_timeout(TimeoutId id, bool keep_indicator) { /* Assert request is sane */ Assert(all_timeouts_initialized); Assert(all_timeouts[id].timeout_handler != NULL); /* Disable timeout interrupts for safety. */ disable_alarm(); /* Find the timeout and remove it from the active list. */ if (all_timeouts[id].active) remove_timeout_index(find_active_timeout(id)); /* Mark it inactive, whether it was active or not. */ if (!keep_indicator) all_timeouts[id].indicator = false; /* Reschedule the interrupt, if any timeouts remain active. */ if (num_active_timeouts > 0) schedule_alarm(GetCurrentTimestamp()); } /* * Cancel multiple timeouts at once. * * The timeouts' I've-been-fired indicators are reset, * unless timeouts[i].keep_indicator is true. * * This works like calling disable_timeout() multiple times. * Use this to reduce the number of GetCurrentTimestamp() * and setitimer() calls needed to cancel multiple timeouts. */ void disable_timeouts(const DisableTimeoutParams *timeouts, int count) { int i; Assert(all_timeouts_initialized); /* Disable timeout interrupts for safety. */ disable_alarm(); /* Cancel the timeout(s). */ for (i = 0; i < count; i++) { TimeoutId id = timeouts[i].id; Assert(all_timeouts[id].timeout_handler != NULL); if (all_timeouts[id].active) remove_timeout_index(find_active_timeout(id)); if (!timeouts[i].keep_indicator) all_timeouts[id].indicator = false; } /* Reschedule the interrupt, if any timeouts remain active. */ if (num_active_timeouts > 0) schedule_alarm(GetCurrentTimestamp()); } /* * Disable the signal handler, remove all timeouts from the active list, * and optionally reset their timeout indicators. */ void disable_all_timeouts(bool keep_indicators) { int i; disable_alarm(); /* * We used to disable the timer interrupt here, but in common usage * patterns it's cheaper to leave it enabled; that may save us from having * to enable it again shortly. See comments in schedule_alarm(). */ num_active_timeouts = 0; for (i = 0; i < MAX_TIMEOUTS; i++) { all_timeouts[i].active = false; if (!keep_indicators) all_timeouts[i].indicator = false; } } /* * Return true if the timeout is active (enabled and not yet fired) * * This is, of course, subject to race conditions, as the timeout could fire * immediately after we look. */ bool get_timeout_active(TimeoutId id) { return all_timeouts[id].active; } /* * Return the timeout's I've-been-fired indicator * * If reset_indicator is true, reset the indicator when returning true. * To avoid missing timeouts due to race conditions, we are careful not to * reset the indicator when returning false. */ bool get_timeout_indicator(TimeoutId id, bool reset_indicator) { if (all_timeouts[id].indicator) { if (reset_indicator) all_timeouts[id].indicator = false; return true; } return false; } /* * Return the time when the timeout was most recently activated * * Note: will return 0 if timeout has never been activated in this process. * However, we do *not* reset the start_time when a timeout occurs, so as * not to create a race condition if SIGALRM fires just as some code is * about to fetch the value. */ TimestampTz get_timeout_start_time(TimeoutId id) { return all_timeouts[id].start_time; } /* * Return the time when the timeout is, or most recently was, due to fire * * Note: will return 0 if timeout has never been activated in this process. * However, we do *not* reset the fin_time when a timeout occurs, so as * not to create a race condition if SIGALRM fires just as some code is * about to fetch the value. */ TimestampTz get_timeout_finish_time(TimeoutId id) { return all_timeouts[id].fin_time; }