diff options
Diffstat (limited to '')
-rw-r--r-- | src/VBox/Runtime/r0drv/linux/timer-r0drv-linux.c | 1727 |
1 files changed, 1727 insertions, 0 deletions
diff --git a/src/VBox/Runtime/r0drv/linux/timer-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/timer-r0drv-linux.c new file mode 100644 index 00000000..8d70c1ab --- /dev/null +++ b/src/VBox/Runtime/r0drv/linux/timer-r0drv-linux.c @@ -0,0 +1,1727 @@ +/* $Id: timer-r0drv-linux.c $ */ +/** @file + * IPRT - Timers, Ring-0 Driver, Linux. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL) only, as it comes in the "COPYING.CDDL" file of the + * VirtualBox OSE distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "the-linux-kernel.h" +#include "internal/iprt.h" + +#include <iprt/timer.h> +#include <iprt/time.h> +#include <iprt/mp.h> +#include <iprt/cpuset.h> +#include <iprt/spinlock.h> +#include <iprt/err.h> +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/alloc.h> + +#include "internal/magics.h" + +/** @def RTTIMER_LINUX_WITH_HRTIMER + * Whether to use high resolution timers. */ +#if !defined(RTTIMER_LINUX_WITH_HRTIMER) \ + && defined(IPRT_LINUX_HAS_HRTIMER) +# define RTTIMER_LINUX_WITH_HRTIMER +#endif + +#if RTLNX_VER_MAX(2,6,31) +# define mod_timer_pinned mod_timer +# define HRTIMER_MODE_ABS_PINNED HRTIMER_MODE_ABS +#endif + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Timer state machine. + * + * This is used to try handle the issues with MP events and + * timers that runs on all CPUs. It's relatively nasty :-/ + */ +typedef enum RTTIMERLNXSTATE +{ + /** Stopped. */ + RTTIMERLNXSTATE_STOPPED = 0, + /** Transient state; next ACTIVE. */ + RTTIMERLNXSTATE_STARTING, + /** Transient state; next ACTIVE. (not really necessary) */ + RTTIMERLNXSTATE_MP_STARTING, + /** Active. */ + RTTIMERLNXSTATE_ACTIVE, + /** Active and in callback; next ACTIVE, STOPPED or CALLBACK_DESTROYING. */ + RTTIMERLNXSTATE_CALLBACK, + /** Stopped while in the callback; next STOPPED. */ + RTTIMERLNXSTATE_CB_STOPPING, + /** Restarted while in the callback; next ACTIVE, STOPPED, DESTROYING. */ + RTTIMERLNXSTATE_CB_RESTARTING, + /** The callback shall destroy the timer; next STOPPED. */ + RTTIMERLNXSTATE_CB_DESTROYING, + /** Transient state; next STOPPED. */ + RTTIMERLNXSTATE_STOPPING, + /** Transient state; next STOPPED. */ + RTTIMERLNXSTATE_MP_STOPPING, + /** The usual 32-bit hack. */ + RTTIMERLNXSTATE_32BIT_HACK = 0x7fffffff +} RTTIMERLNXSTATE; + + +/** + * A Linux sub-timer. + */ +typedef struct RTTIMERLNXSUBTIMER +{ + /** Timer specific data. */ + union + { +#if defined(RTTIMER_LINUX_WITH_HRTIMER) + /** High resolution timer. */ + struct + { + /** The linux timer structure. */ + struct hrtimer LnxTimer; + } Hr; +#endif + /** Standard timer. */ + struct + { + /** The linux timer structure. */ + struct timer_list LnxTimer; + /** The start of the current run (ns). + * This is used to calculate when the timer ought to fire the next time. */ + uint64_t u64NextTS; + /** When the timer was started. */ + uint64_t nsStartTS; + /** The u64NextTS in jiffies. */ + unsigned long ulNextJiffies; + /** Set when starting or changing the timer so that u64StartTs + * and u64NextTS gets reinitialized (eliminating some jitter). */ + bool volatile fFirstAfterChg; + } Std; + } u; + /** The current tick number. */ + uint64_t iTick; + /** Restart the single shot timer at this specific time. + * Used when a single shot timer is restarted from the callback. */ + uint64_t volatile uNsRestartAt; + /** Pointer to the parent timer. */ + PRTTIMER pParent; + /** The current sub-timer state. */ + RTTIMERLNXSTATE volatile enmState; +} RTTIMERLNXSUBTIMER; +/** Pointer to a linux sub-timer. */ +typedef RTTIMERLNXSUBTIMER *PRTTIMERLNXSUBTIMER; + + +/** + * The internal representation of an Linux timer handle. + */ +typedef struct RTTIMER +{ + /** Magic. + * This is RTTIMER_MAGIC, but changes to something else before the timer + * is destroyed to indicate clearly that thread should exit. */ + uint32_t volatile u32Magic; + /** Spinlock synchronizing the fSuspended and MP event handling. + * This is NIL_RTSPINLOCK if cCpus == 1. */ + RTSPINLOCK hSpinlock; + /** Flag indicating that the timer is suspended. */ + bool volatile fSuspended; + /** Whether the timer must run on one specific CPU or not. */ + bool fSpecificCpu; +#ifdef CONFIG_SMP + /** Whether the timer must run on all CPUs or not. */ + bool fAllCpus; +#endif /* else: All -> specific on non-SMP kernels */ + /** Whether it is a high resolution timer or a standard one. */ + bool fHighRes; + /** The id of the CPU it must run on if fSpecificCpu is set. */ + RTCPUID idCpu; + /** The number of CPUs this timer should run on. */ + RTCPUID cCpus; + /** Callback. */ + PFNRTTIMER pfnTimer; + /** User argument. */ + void *pvUser; + /** The timer interval. 0 if one-shot. */ + uint64_t volatile u64NanoInterval; + /** This is set to the number of jiffies between ticks if the interval is + * an exact number of jiffies. (Standard timers only.) */ + unsigned long volatile cJiffies; + /** The change interval spinlock for standard timers only. */ + spinlock_t ChgIntLock; + /** Workqueue item for delayed destruction. */ + RTR0LNXWORKQUEUEITEM DtorWorkqueueItem; + /** Sub-timers. + * Normally there is just one, but for RTTIMER_FLAGS_CPU_ALL this will contain + * an entry for all possible cpus. In that case the index will be the same as + * for the RTCpuSet. */ + RTTIMERLNXSUBTIMER aSubTimers[1]; +} RTTIMER; + + +/** + * A rtTimerLinuxStartOnCpu and rtTimerLinuxStartOnCpu argument package. + */ +typedef struct RTTIMERLINUXSTARTONCPUARGS +{ + /** The current time (RTTimeSystemNanoTS). */ + uint64_t u64Now; + /** When to start firing (delta). */ + uint64_t u64First; +} RTTIMERLINUXSTARTONCPUARGS; +/** Pointer to a rtTimerLinuxStartOnCpu argument package. */ +typedef RTTIMERLINUXSTARTONCPUARGS *PRTTIMERLINUXSTARTONCPUARGS; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +#ifdef CONFIG_SMP +static DECLCALLBACK(void) rtTimerLinuxMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser); +#endif + +#if 0 +#define DEBUG_HACKING +#include <iprt/string.h> +#include <iprt/asm-amd64-x86.h> +static void myLogBackdoorPrintf(const char *pszFormat, ...) +{ + char szTmp[256]; + va_list args; + size_t cb; + + cb = RTStrPrintf(szTmp, sizeof(szTmp) - 10, "%d: ", RTMpCpuId()); + va_start(args, pszFormat); + cb += RTStrPrintfV(&szTmp[cb], sizeof(szTmp) - cb, pszFormat, args); + va_end(args); + + ASMOutStrU8(0x504, (uint8_t *)&szTmp[0], cb); +} +# define RTAssertMsg1Weak(pszExpr, uLine, pszFile, pszFunction) \ + myLogBackdoorPrintf("\n!!Guest Assertion failed!!\n%s(%d) %s\n%s\n", uLine, pszFile, pszFunction, (pszExpr)) +# define RTAssertMsg2Weak myLogBackdoorPrintf +# define RTTIMERLNX_LOG(a) myLogBackdoorPrintf a +#else +# define RTTIMERLNX_LOG(a) do { } while (0) +#endif + +/** + * Sets the state. + */ +DECLINLINE(void) rtTimerLnxSetState(RTTIMERLNXSTATE volatile *penmState, RTTIMERLNXSTATE enmNewState) +{ +#ifdef DEBUG_HACKING + RTTIMERLNX_LOG(("set %d -> %d\n", *penmState, enmNewState)); +#endif + ASMAtomicWriteU32((uint32_t volatile *)penmState, enmNewState); +} + + +/** + * Sets the state if it has a certain value. + * + * @return true if xchg was done. + * @return false if xchg wasn't done. + */ +#ifdef DEBUG_HACKING +#define rtTimerLnxCmpXchgState(penmState, enmNewState, enmCurState) rtTimerLnxCmpXchgStateDebug(penmState, enmNewState, enmCurState, __LINE__) +static bool rtTimerLnxCmpXchgStateDebug(RTTIMERLNXSTATE volatile *penmState, RTTIMERLNXSTATE enmNewState, + RTTIMERLNXSTATE enmCurState, uint32_t uLine) +{ + RTTIMERLNXSTATE enmOldState = enmCurState; + bool fRc = ASMAtomicCmpXchgExU32((uint32_t volatile *)penmState, enmNewState, enmCurState, (uint32_t *)&enmOldState); + RTTIMERLNX_LOG(("cxg %d -> %d - %d at %u\n", enmOldState, enmNewState, fRc, uLine)); + return fRc; +} +#else +DECLINLINE(bool) rtTimerLnxCmpXchgState(RTTIMERLNXSTATE volatile *penmState, RTTIMERLNXSTATE enmNewState, + RTTIMERLNXSTATE enmCurState) +{ + return ASMAtomicCmpXchgU32((uint32_t volatile *)penmState, enmNewState, enmCurState); +} +#endif + + +/** + * Gets the state. + */ +DECLINLINE(RTTIMERLNXSTATE) rtTimerLnxGetState(RTTIMERLNXSTATE volatile *penmState) +{ + return (RTTIMERLNXSTATE)ASMAtomicUoReadU32((uint32_t volatile *)penmState); +} + +#ifdef RTTIMER_LINUX_WITH_HRTIMER + +/** + * Converts a nano second time stamp to ktime_t. + * + * ASSUMES RTTimeSystemNanoTS() is implemented using ktime_get_ts(). + * + * @returns ktime_t. + * @param cNanoSecs Nanoseconds. + */ +DECLINLINE(ktime_t) rtTimerLnxNanoToKt(uint64_t cNanoSecs) +{ + /* With some luck the compiler optimizes the division out of this... (Bet it doesn't.) */ + return ktime_set(cNanoSecs / 1000000000, cNanoSecs % 1000000000); +} + +/** + * Converts ktime_t to a nano second time stamp. + * + * ASSUMES RTTimeSystemNanoTS() is implemented using ktime_get_ts(). + * + * @returns nano second time stamp. + * @param Kt ktime_t. + */ +DECLINLINE(uint64_t) rtTimerLnxKtToNano(ktime_t Kt) +{ + return ktime_to_ns(Kt); +} + +#endif /* RTTIMER_LINUX_WITH_HRTIMER */ + +/** + * Converts a nano second interval to jiffies. + * + * @returns Jiffies. + * @param cNanoSecs Nanoseconds. + */ +DECLINLINE(unsigned long) rtTimerLnxNanoToJiffies(uint64_t cNanoSecs) +{ + /* this can be made even better... */ + if (cNanoSecs > (uint64_t)TICK_NSEC * MAX_JIFFY_OFFSET) + return MAX_JIFFY_OFFSET; +# if ARCH_BITS == 32 + if (RT_LIKELY(cNanoSecs <= UINT32_MAX)) + return ((uint32_t)cNanoSecs + (TICK_NSEC-1)) / TICK_NSEC; +# endif + return (cNanoSecs + (TICK_NSEC-1)) / TICK_NSEC; +} + + +/** + * Starts a sub-timer (RTTimerStart). + * + * @param pSubTimer The sub-timer to start. + * @param u64Now The current timestamp (RTTimeSystemNanoTS()). + * @param u64First The interval from u64Now to the first time the timer should fire. + * @param fPinned true = timer pinned to a specific CPU, + * false = timer can migrate between CPUs + * @param fHighRes Whether the user requested a high resolution timer or not. + * @param enmOldState The old timer state. + */ +static void rtTimerLnxStartSubTimer(PRTTIMERLNXSUBTIMER pSubTimer, uint64_t u64Now, uint64_t u64First, + bool fPinned, bool fHighRes) +{ + /* + * Calc when it should start firing. + */ + uint64_t u64NextTS = u64Now + u64First; + if (!fHighRes) + { + pSubTimer->u.Std.u64NextTS = u64NextTS; + pSubTimer->u.Std.nsStartTS = u64NextTS; + } + RTTIMERLNX_LOG(("startsubtimer %p\n", pSubTimer->pParent)); + + pSubTimer->iTick = 0; + +#ifdef RTTIMER_LINUX_WITH_HRTIMER + if (fHighRes) + hrtimer_start(&pSubTimer->u.Hr.LnxTimer, rtTimerLnxNanoToKt(u64NextTS), + fPinned ? HRTIMER_MODE_ABS_PINNED : HRTIMER_MODE_ABS); + else +#endif + { + unsigned long cJiffies = !u64First ? 0 : rtTimerLnxNanoToJiffies(u64First); + pSubTimer->u.Std.ulNextJiffies = jiffies + cJiffies; + pSubTimer->u.Std.fFirstAfterChg = true; +#ifdef CONFIG_SMP + if (fPinned) + { +# if RTLNX_VER_MIN(4,8,0) + mod_timer(&pSubTimer->u.Std.LnxTimer, pSubTimer->u.Std.ulNextJiffies); +# else + mod_timer_pinned(&pSubTimer->u.Std.LnxTimer, pSubTimer->u.Std.ulNextJiffies); +# endif + } + else +#endif + mod_timer(&pSubTimer->u.Std.LnxTimer, pSubTimer->u.Std.ulNextJiffies); + } + + /* Be a bit careful here since we could be racing the callback. */ + if (!rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_ACTIVE, RTTIMERLNXSTATE_STARTING)) + rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_ACTIVE, RTTIMERLNXSTATE_MP_STARTING); +} + + +/** + * Stops a sub-timer (RTTimerStart and rtTimerLinuxMpEvent()). + * + * The caller has already changed the state, so we will not be in a callback + * situation wrt to the calling thread. + * + * @param pSubTimer The sub-timer. + * @param fHighRes Whether the user requested a high resolution timer or not. + */ +static void rtTimerLnxStopSubTimer(PRTTIMERLNXSUBTIMER pSubTimer, bool fHighRes) +{ + RTTIMERLNX_LOG(("stopsubtimer %p %d\n", pSubTimer->pParent, fHighRes)); +#ifdef RTTIMER_LINUX_WITH_HRTIMER + if (fHighRes) + { + /* There is no equivalent to del_timer in the hrtimer API, + hrtimer_cancel() == del_timer_sync(). Just like the WARN_ON in + del_timer_sync() asserts, waiting for a timer callback to complete + is deadlock prone, so don't do it. */ + int rc = hrtimer_try_to_cancel(&pSubTimer->u.Hr.LnxTimer); + if (rc < 0) + { + hrtimer_start(&pSubTimer->u.Hr.LnxTimer, ktime_set(KTIME_SEC_MAX, 0), HRTIMER_MODE_ABS); + hrtimer_try_to_cancel(&pSubTimer->u.Hr.LnxTimer); + } + } + else +#endif + del_timer(&pSubTimer->u.Std.LnxTimer); + + rtTimerLnxSetState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED); +} + + +/** + * Used by RTTimerDestroy and rtTimerLnxCallbackDestroy to do the actual work. + * + * @param pTimer The timer in question. + */ +static void rtTimerLnxDestroyIt(PRTTIMER pTimer) +{ + RTSPINLOCK hSpinlock = pTimer->hSpinlock; + RTCPUID iCpu; + Assert(pTimer->fSuspended); + RTTIMERLNX_LOG(("destroyit %p\n", pTimer)); + + /* + * Remove the MP notifications first because it'll reduce the risk of + * us overtaking any MP event that might theoretically be racing us here. + */ +#ifdef CONFIG_SMP + if ( pTimer->cCpus > 1 + && hSpinlock != NIL_RTSPINLOCK) + { + int rc = RTMpNotificationDeregister(rtTimerLinuxMpEvent, pTimer); + AssertRC(rc); + } +#endif /* CONFIG_SMP */ + + /* + * Invalidate the handle. + */ + ASMAtomicWriteU32(&pTimer->u32Magic, ~RTTIMER_MAGIC); + + /* + * Make sure all timers have stopped executing since we're stopping them in + * an asynchronous manner up in rtTimerLnxStopSubTimer. + */ + iCpu = pTimer->cCpus; + while (iCpu-- > 0) + { +#ifdef RTTIMER_LINUX_WITH_HRTIMER + if (pTimer->fHighRes) + hrtimer_cancel(&pTimer->aSubTimers[iCpu].u.Hr.LnxTimer); + else +#endif + del_timer_sync(&pTimer->aSubTimers[iCpu].u.Std.LnxTimer); + } + + /* + * Finally, free the resources. + */ + RTMemFreeEx(pTimer, RT_UOFFSETOF_DYN(RTTIMER, aSubTimers[pTimer->cCpus])); + if (hSpinlock != NIL_RTSPINLOCK) + RTSpinlockDestroy(hSpinlock); +} + + +/** + * Workqueue callback (no DECLCALLBACK!) for deferred destruction. + * + * @param pWork Pointer to the DtorWorkqueueItem member of our timer + * structure. + */ +static void rtTimerLnxDestroyDeferred(RTR0LNXWORKQUEUEITEM *pWork) +{ + PRTTIMER pTimer = RT_FROM_MEMBER(pWork, RTTIMER, DtorWorkqueueItem); + rtTimerLnxDestroyIt(pTimer); +} + + +/** + * Called when the timer was destroyed by the callback function. + * + * @param pTimer The timer. + * @param pSubTimer The sub-timer which we're handling, the state of this + * will be RTTIMERLNXSTATE_CALLBACK_DESTROYING. + */ +static void rtTimerLnxCallbackDestroy(PRTTIMER pTimer, PRTTIMERLNXSUBTIMER pSubTimer) +{ + /* + * If it's an omni timer, the last dude does the destroying. + */ + if (pTimer->cCpus > 1) + { + uint32_t iCpu = pTimer->cCpus; + RTSpinlockAcquire(pTimer->hSpinlock); + + Assert(pSubTimer->enmState == RTTIMERLNXSTATE_CB_DESTROYING); + rtTimerLnxSetState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED); + + while (iCpu-- > 0) + if (rtTimerLnxGetState(&pTimer->aSubTimers[iCpu].enmState) != RTTIMERLNXSTATE_STOPPED) + { + RTSpinlockRelease(pTimer->hSpinlock); + return; + } + + RTSpinlockRelease(pTimer->hSpinlock); + } + + /* + * Destroying a timer from the callback is unsafe since the callout code + * might be touching the timer structure upon return (hrtimer does!). So, + * we have to defer the actual destruction to the IRPT workqueue. + */ + rtR0LnxWorkqueuePush(&pTimer->DtorWorkqueueItem, rtTimerLnxDestroyDeferred); +} + + +#ifdef CONFIG_SMP +/** + * Deal with a sub-timer that has migrated. + * + * @param pTimer The timer. + * @param pSubTimer The sub-timer. + */ +static void rtTimerLnxCallbackHandleMigration(PRTTIMER pTimer, PRTTIMERLNXSUBTIMER pSubTimer) +{ + RTTIMERLNXSTATE enmState; + if (pTimer->cCpus > 1) + RTSpinlockAcquire(pTimer->hSpinlock); + + do + { + enmState = rtTimerLnxGetState(&pSubTimer->enmState); + switch (enmState) + { + case RTTIMERLNXSTATE_STOPPING: + case RTTIMERLNXSTATE_MP_STOPPING: + enmState = RTTIMERLNXSTATE_STOPPED; + case RTTIMERLNXSTATE_STOPPED: + break; + + default: + AssertMsgFailed(("%d\n", enmState)); RT_FALL_THRU(); + case RTTIMERLNXSTATE_STARTING: + case RTTIMERLNXSTATE_MP_STARTING: + case RTTIMERLNXSTATE_ACTIVE: + case RTTIMERLNXSTATE_CALLBACK: + case RTTIMERLNXSTATE_CB_STOPPING: + case RTTIMERLNXSTATE_CB_RESTARTING: + if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED, enmState)) + enmState = RTTIMERLNXSTATE_STOPPED; + break; + + case RTTIMERLNXSTATE_CB_DESTROYING: + { + if (pTimer->cCpus > 1) + RTSpinlockRelease(pTimer->hSpinlock); + + rtTimerLnxCallbackDestroy(pTimer, pSubTimer); + return; + } + } + } while (enmState != RTTIMERLNXSTATE_STOPPED); + + if (pTimer->cCpus > 1) + RTSpinlockRelease(pTimer->hSpinlock); +} +#endif /* CONFIG_SMP */ + + +/** + * The slow path of rtTimerLnxChangeToCallbackState. + * + * @returns true if changed successfully, false if not. + * @param pSubTimer The sub-timer. + */ +static bool rtTimerLnxChangeToCallbackStateSlow(PRTTIMERLNXSUBTIMER pSubTimer) +{ + for (;;) + { + RTTIMERLNXSTATE enmState = rtTimerLnxGetState(&pSubTimer->enmState); + switch (enmState) + { + case RTTIMERLNXSTATE_ACTIVE: + case RTTIMERLNXSTATE_STARTING: + case RTTIMERLNXSTATE_MP_STARTING: + if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_CALLBACK, enmState)) + return true; + break; + + case RTTIMERLNXSTATE_CALLBACK: + case RTTIMERLNXSTATE_CB_STOPPING: + case RTTIMERLNXSTATE_CB_RESTARTING: + case RTTIMERLNXSTATE_CB_DESTROYING: + AssertMsgFailed(("%d\n", enmState)); RT_FALL_THRU(); + default: + return false; + } + ASMNopPause(); + } +} + + +/** + * Tries to change the sub-timer state to 'callback'. + * + * @returns true if changed successfully, false if not. + * @param pSubTimer The sub-timer. + */ +DECLINLINE(bool) rtTimerLnxChangeToCallbackState(PRTTIMERLNXSUBTIMER pSubTimer) +{ + if (RT_LIKELY(rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_CALLBACK, RTTIMERLNXSTATE_ACTIVE))) + return true; + return rtTimerLnxChangeToCallbackStateSlow(pSubTimer); +} + + +#ifdef RTTIMER_LINUX_WITH_HRTIMER +/** + * Timer callback function for high resolution timers. + * + * @returns HRTIMER_NORESTART or HRTIMER_RESTART depending on whether it's a + * one-shot or interval timer. + * @param pHrTimer Pointer to the sub-timer structure. + */ +static enum hrtimer_restart rtTimerLinuxHrCallback(struct hrtimer *pHrTimer) +{ + PRTTIMERLNXSUBTIMER pSubTimer = RT_FROM_MEMBER(pHrTimer, RTTIMERLNXSUBTIMER, u.Hr.LnxTimer); + PRTTIMER pTimer = pSubTimer->pParent; + + + RTTIMERLNX_LOG(("hrcallback %p\n", pTimer)); + if (RT_UNLIKELY(!rtTimerLnxChangeToCallbackState(pSubTimer))) + return HRTIMER_NORESTART; + +#ifdef CONFIG_SMP + /* + * Check for unwanted migration. + */ + if (pTimer->fAllCpus || pTimer->fSpecificCpu) + { + RTCPUID idCpu = RTMpCpuId(); + if (RT_UNLIKELY( pTimer->fAllCpus + ? (RTCPUID)(pSubTimer - &pTimer->aSubTimers[0]) != idCpu + : pTimer->idCpu != idCpu)) + { + rtTimerLnxCallbackHandleMigration(pTimer, pSubTimer); + return HRTIMER_NORESTART; + } + } +#endif + + if (pTimer->u64NanoInterval) + { + /* + * Periodic timer, run it and update the native timer afterwards so + * we can handle RTTimerStop and RTTimerChangeInterval from the + * callback as well as a racing control thread. + */ + pTimer->pfnTimer(pTimer, pTimer->pvUser, ++pSubTimer->iTick); + hrtimer_add_expires_ns(&pSubTimer->u.Hr.LnxTimer, ASMAtomicReadU64(&pTimer->u64NanoInterval)); + if (RT_LIKELY(rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_ACTIVE, RTTIMERLNXSTATE_CALLBACK))) + return HRTIMER_RESTART; + } + else + { + /* + * One shot timer (no omni), stop it before dispatching it. + * Allow RTTimerStart as well as RTTimerDestroy to be called from + * the callback. + */ + ASMAtomicWriteBool(&pTimer->fSuspended, true); + pTimer->pfnTimer(pTimer, pTimer->pvUser, ++pSubTimer->iTick); + if (RT_LIKELY(rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED, RTTIMERLNXSTATE_CALLBACK))) + return HRTIMER_NORESTART; + } + + /* + * Some state change occurred while we were in the callback routine. + */ + for (;;) + { + RTTIMERLNXSTATE enmState = rtTimerLnxGetState(&pSubTimer->enmState); + switch (enmState) + { + case RTTIMERLNXSTATE_CB_DESTROYING: + rtTimerLnxCallbackDestroy(pTimer, pSubTimer); + return HRTIMER_NORESTART; + + case RTTIMERLNXSTATE_CB_STOPPING: + if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED, RTTIMERLNXSTATE_CB_STOPPING)) + return HRTIMER_NORESTART; + break; + + case RTTIMERLNXSTATE_CB_RESTARTING: + if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_ACTIVE, RTTIMERLNXSTATE_CB_RESTARTING)) + { + pSubTimer->iTick = 0; + hrtimer_set_expires(&pSubTimer->u.Hr.LnxTimer, rtTimerLnxNanoToKt(pSubTimer->uNsRestartAt)); + return HRTIMER_RESTART; + } + break; + + default: + AssertMsgFailed(("%d\n", enmState)); + return HRTIMER_NORESTART; + } + ASMNopPause(); + } +} +#endif /* RTTIMER_LINUX_WITH_HRTIMER */ + + +#if RTLNX_VER_MIN(4,15,0) +/** + * Timer callback function for standard timers. + * + * @param pLnxTimer Pointer to the Linux timer structure. + */ +static void rtTimerLinuxStdCallback(struct timer_list *pLnxTimer) +{ + PRTTIMERLNXSUBTIMER pSubTimer = from_timer(pSubTimer, pLnxTimer, u.Std.LnxTimer); +#else +/** + * Timer callback function for standard timers. + * + * @param ulUser Address of the sub-timer structure. + */ +static void rtTimerLinuxStdCallback(unsigned long ulUser) +{ + PRTTIMERLNXSUBTIMER pSubTimer = (PRTTIMERLNXSUBTIMER)ulUser; +#endif + PRTTIMER pTimer = pSubTimer->pParent; + + RTTIMERLNX_LOG(("stdcallback %p\n", pTimer)); + if (RT_UNLIKELY(!rtTimerLnxChangeToCallbackState(pSubTimer))) + return; + +#ifdef CONFIG_SMP + /* + * Check for unwanted migration. + */ + if (pTimer->fAllCpus || pTimer->fSpecificCpu) + { + RTCPUID idCpu = RTMpCpuId(); + if (RT_UNLIKELY( pTimer->fAllCpus + ? (RTCPUID)(pSubTimer - &pTimer->aSubTimers[0]) != idCpu + : pTimer->idCpu != idCpu)) + { + rtTimerLnxCallbackHandleMigration(pTimer, pSubTimer); + return; + } + } +#endif + + if (pTimer->u64NanoInterval) + { + /* + * Interval timer, calculate the next timeout. + * + * The first time around, we'll re-adjust the u.Std.u64NextTS to + * try prevent some jittering if we were started at a bad time. + */ + const uint64_t iTick = ++pSubTimer->iTick; + unsigned long uCurJiffies = jiffies; + unsigned long ulNextJiffies; + uint64_t u64NanoInterval; + unsigned long cJiffies; + unsigned long flFlags; + + spin_lock_irqsave(&pTimer->ChgIntLock, flFlags); + u64NanoInterval = pTimer->u64NanoInterval; + cJiffies = pTimer->cJiffies; + if (RT_UNLIKELY(pSubTimer->u.Std.fFirstAfterChg)) + { + pSubTimer->u.Std.fFirstAfterChg = false; + pSubTimer->u.Std.u64NextTS = RTTimeSystemNanoTS(); + pSubTimer->u.Std.nsStartTS = pSubTimer->u.Std.u64NextTS - u64NanoInterval * (iTick - 1); + pSubTimer->u.Std.ulNextJiffies = uCurJiffies = jiffies; + } + spin_unlock_irqrestore(&pTimer->ChgIntLock, flFlags); + + pSubTimer->u.Std.u64NextTS += u64NanoInterval; + if (cJiffies) + { + ulNextJiffies = pSubTimer->u.Std.ulNextJiffies + cJiffies; + pSubTimer->u.Std.ulNextJiffies = ulNextJiffies; + if (time_after_eq(ulNextJiffies, uCurJiffies)) + { /* likely */ } + else + { + unsigned long cJiffiesBehind = uCurJiffies - ulNextJiffies; + ulNextJiffies = uCurJiffies + cJiffies / 2; + if (cJiffiesBehind >= HZ / 4) /* Conside if we're lagging too far behind. Screw the u64NextTS member. */ + pSubTimer->u.Std.ulNextJiffies = ulNextJiffies; + /*else: Don't update u.Std.ulNextJiffies so we can continue catching up in the next tick. */ + } + } + else + { + const uint64_t u64NanoTS = RTTimeSystemNanoTS(); + const int64_t cNsBehind = u64NanoTS - pSubTimer->u.Std.u64NextTS; + if (cNsBehind <= 0) + ulNextJiffies = uCurJiffies + rtTimerLnxNanoToJiffies(pSubTimer->u.Std.u64NextTS - u64NanoTS); + else if (u64NanoInterval >= RT_NS_1SEC_64 * 2 / HZ) + { + ulNextJiffies = uCurJiffies + rtTimerLnxNanoToJiffies(u64NanoInterval / 2); + if (cNsBehind >= RT_NS_1SEC_64 / HZ / 4) /* Conside if we're lagging too far behind. */ + pSubTimer->u.Std.u64NextTS = u64NanoTS + u64NanoInterval / 2; + } + else + { + ulNextJiffies = uCurJiffies + 1; + if (cNsBehind >= RT_NS_1SEC_64 / HZ / 4) /* Conside if we're lagging too far behind. */ + pSubTimer->u.Std.u64NextTS = u64NanoTS + RT_NS_1SEC_64 / HZ; + } + pSubTimer->u.Std.ulNextJiffies = ulNextJiffies; + } + + /* + * Run the timer and re-arm it unless the state changed . + * . + * We must re-arm it afterwards as we're not in a position to undo this . + * operation if for instance someone stopped or destroyed us while we . + * were in the callback. (Linux takes care of any races here.) + */ + pTimer->pfnTimer(pTimer, pTimer->pvUser, iTick); + if (RT_LIKELY(rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_ACTIVE, RTTIMERLNXSTATE_CALLBACK))) + { +#ifdef CONFIG_SMP + if (pTimer->fSpecificCpu || pTimer->fAllCpus) + { +# if RTLNX_VER_MIN(4,8,0) + mod_timer(&pSubTimer->u.Std.LnxTimer, ulNextJiffies); +# else + mod_timer_pinned(&pSubTimer->u.Std.LnxTimer, ulNextJiffies); +# endif + } + else +#endif + mod_timer(&pSubTimer->u.Std.LnxTimer, ulNextJiffies); + return; + } + } + else + { + /* + * One shot timer, stop it before dispatching it. + * Allow RTTimerStart as well as RTTimerDestroy to be called from + * the callback. + */ + ASMAtomicWriteBool(&pTimer->fSuspended, true); + pTimer->pfnTimer(pTimer, pTimer->pvUser, ++pSubTimer->iTick); + if (RT_LIKELY(rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED, RTTIMERLNXSTATE_CALLBACK))) + return; + } + + /* + * Some state change occurred while we were in the callback routine. + */ + for (;;) + { + RTTIMERLNXSTATE enmState = rtTimerLnxGetState(&pSubTimer->enmState); + switch (enmState) + { + case RTTIMERLNXSTATE_CB_DESTROYING: + rtTimerLnxCallbackDestroy(pTimer, pSubTimer); + return; + + case RTTIMERLNXSTATE_CB_STOPPING: + if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED, RTTIMERLNXSTATE_CB_STOPPING)) + return; + break; + + case RTTIMERLNXSTATE_CB_RESTARTING: + if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_ACTIVE, RTTIMERLNXSTATE_CB_RESTARTING)) + { + uint64_t u64NanoTS; + uint64_t u64NextTS; + unsigned long flFlags; + + spin_lock_irqsave(&pTimer->ChgIntLock, flFlags); + u64NextTS = pSubTimer->uNsRestartAt; + u64NanoTS = RTTimeSystemNanoTS(); + pSubTimer->iTick = 0; + pSubTimer->u.Std.u64NextTS = u64NextTS; + pSubTimer->u.Std.fFirstAfterChg = true; + pSubTimer->u.Std.ulNextJiffies = u64NextTS > u64NanoTS + ? jiffies + rtTimerLnxNanoToJiffies(u64NextTS - u64NanoTS) + : jiffies; + spin_unlock_irqrestore(&pTimer->ChgIntLock, flFlags); + +#ifdef CONFIG_SMP + if (pTimer->fSpecificCpu || pTimer->fAllCpus) + { +# if RTLNX_VER_MIN(4,8,0) + mod_timer(&pSubTimer->u.Std.LnxTimer, pSubTimer->u.Std.ulNextJiffies); +# else + mod_timer_pinned(&pSubTimer->u.Std.LnxTimer, pSubTimer->u.Std.ulNextJiffies); +# endif + } + else +#endif + mod_timer(&pSubTimer->u.Std.LnxTimer, pSubTimer->u.Std.ulNextJiffies); + return; + } + break; + + default: + AssertMsgFailed(("%d\n", enmState)); + return; + } + ASMNopPause(); + } +} + + +#ifdef CONFIG_SMP + +/** + * Per-cpu callback function (RTMpOnAll/RTMpOnSpecific). + * + * @param idCpu The current CPU. + * @param pvUser1 Pointer to the timer. + * @param pvUser2 Pointer to the argument structure. + */ +static DECLCALLBACK(void) rtTimerLnxStartAllOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2) +{ + PRTTIMERLINUXSTARTONCPUARGS pArgs = (PRTTIMERLINUXSTARTONCPUARGS)pvUser2; + PRTTIMER pTimer = (PRTTIMER)pvUser1; + Assert(idCpu < pTimer->cCpus); + rtTimerLnxStartSubTimer(&pTimer->aSubTimers[idCpu], pArgs->u64Now, pArgs->u64First, true /*fPinned*/, pTimer->fHighRes); +} + + +/** + * Worker for RTTimerStart() that takes care of the ugly bits. + * + * @returns RTTimerStart() return value. + * @param pTimer The timer. + * @param pArgs The argument structure. + */ +static int rtTimerLnxOmniStart(PRTTIMER pTimer, PRTTIMERLINUXSTARTONCPUARGS pArgs) +{ + RTCPUID iCpu; + RTCPUSET OnlineSet; + RTCPUSET OnlineSet2; + int rc2; + + /* + * Prepare all the sub-timers for the startup and then flag the timer + * as a whole as non-suspended, make sure we get them all before + * clearing fSuspended as the MP handler will be waiting on this + * should something happen while we're looping. + */ + RTSpinlockAcquire(pTimer->hSpinlock); + + /* Just make it a omni timer restriction that no stop/start races are allowed. */ + for (iCpu = 0; iCpu < pTimer->cCpus; iCpu++) + if (rtTimerLnxGetState(&pTimer->aSubTimers[iCpu].enmState) != RTTIMERLNXSTATE_STOPPED) + { + RTSpinlockRelease(pTimer->hSpinlock); + return VERR_TIMER_BUSY; + } + + do + { + RTMpGetOnlineSet(&OnlineSet); + for (iCpu = 0; iCpu < pTimer->cCpus; iCpu++) + { + Assert(pTimer->aSubTimers[iCpu].enmState != RTTIMERLNXSTATE_MP_STOPPING); + rtTimerLnxSetState(&pTimer->aSubTimers[iCpu].enmState, + RTCpuSetIsMember(&OnlineSet, iCpu) + ? RTTIMERLNXSTATE_STARTING + : RTTIMERLNXSTATE_STOPPED); + } + } while (!RTCpuSetIsEqual(&OnlineSet, RTMpGetOnlineSet(&OnlineSet2))); + + ASMAtomicWriteBool(&pTimer->fSuspended, false); + + RTSpinlockRelease(pTimer->hSpinlock); + + /* + * Start them (can't find any exported function that allows me to + * do this without the cross calls). + */ + pArgs->u64Now = RTTimeSystemNanoTS(); + rc2 = RTMpOnAll(rtTimerLnxStartAllOnCpu, pTimer, pArgs); + AssertRC(rc2); /* screw this if it fails. */ + + /* + * Reset the sub-timers who didn't start up (ALL CPUs case). + */ + RTSpinlockAcquire(pTimer->hSpinlock); + + for (iCpu = 0; iCpu < pTimer->cCpus; iCpu++) + if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[iCpu].enmState, RTTIMERLNXSTATE_STOPPED, RTTIMERLNXSTATE_STARTING)) + { + /** @todo very odd case for a rainy day. Cpus that temporarily went offline while + * we were between calls needs to nudged as the MP handler will ignore events for + * them because of the STARTING state. This is an extremely unlikely case - not that + * that means anything in my experience... ;-) */ + RTTIMERLNX_LOG(("what!? iCpu=%u -> didn't start\n", iCpu)); + } + + RTSpinlockRelease(pTimer->hSpinlock); + + return VINF_SUCCESS; +} + + +/** + * Worker for RTTimerStop() that takes care of the ugly SMP bits. + * + * @returns true if there was any active callbacks, false if not. + * @param pTimer The timer (valid). + * @param fForDestroy Whether this is for RTTimerDestroy or not. + */ +static bool rtTimerLnxOmniStop(PRTTIMER pTimer, bool fForDestroy) +{ + bool fActiveCallbacks = false; + RTCPUID iCpu; + RTTIMERLNXSTATE enmState; + + + /* + * Mark the timer as suspended and flag all timers as stopping, except + * for those being stopped by an MP event. + */ + RTSpinlockAcquire(pTimer->hSpinlock); + + ASMAtomicWriteBool(&pTimer->fSuspended, true); + for (iCpu = 0; iCpu < pTimer->cCpus; iCpu++) + { + for (;;) + { + enmState = rtTimerLnxGetState(&pTimer->aSubTimers[iCpu].enmState); + if ( enmState == RTTIMERLNXSTATE_STOPPED + || enmState == RTTIMERLNXSTATE_MP_STOPPING) + break; + if ( enmState == RTTIMERLNXSTATE_CALLBACK + || enmState == RTTIMERLNXSTATE_CB_STOPPING + || enmState == RTTIMERLNXSTATE_CB_RESTARTING) + { + Assert(enmState != RTTIMERLNXSTATE_CB_STOPPING || fForDestroy); + if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[iCpu].enmState, + !fForDestroy ? RTTIMERLNXSTATE_CB_STOPPING : RTTIMERLNXSTATE_CB_DESTROYING, + enmState)) + { + fActiveCallbacks = true; + break; + } + } + else + { + Assert(enmState == RTTIMERLNXSTATE_ACTIVE); + if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[iCpu].enmState, RTTIMERLNXSTATE_STOPPING, enmState)) + break; + } + ASMNopPause(); + } + } + + RTSpinlockRelease(pTimer->hSpinlock); + + /* + * Do the actual stopping. Fortunately, this doesn't require any IPIs. + * Unfortunately it cannot be done synchronously. + */ + for (iCpu = 0; iCpu < pTimer->cCpus; iCpu++) + if (rtTimerLnxGetState(&pTimer->aSubTimers[iCpu].enmState) == RTTIMERLNXSTATE_STOPPING) + rtTimerLnxStopSubTimer(&pTimer->aSubTimers[iCpu], pTimer->fHighRes); + + return fActiveCallbacks; +} + + +/** + * Per-cpu callback function (RTMpOnSpecific) used by rtTimerLinuxMpEvent() + * to start a sub-timer on a cpu that just have come online. + * + * @param idCpu The current CPU. + * @param pvUser1 Pointer to the timer. + * @param pvUser2 Pointer to the argument structure. + */ +static DECLCALLBACK(void) rtTimerLinuxMpStartOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2) +{ + PRTTIMERLINUXSTARTONCPUARGS pArgs = (PRTTIMERLINUXSTARTONCPUARGS)pvUser2; + PRTTIMER pTimer = (PRTTIMER)pvUser1; + RTSPINLOCK hSpinlock; + Assert(idCpu < pTimer->cCpus); + + /* + * We have to be kind of careful here as we might be racing RTTimerStop + * (and/or RTTimerDestroy, thus the paranoia. + */ + hSpinlock = pTimer->hSpinlock; + if ( hSpinlock != NIL_RTSPINLOCK + && pTimer->u32Magic == RTTIMER_MAGIC) + { + RTSpinlockAcquire(hSpinlock); + + if ( !ASMAtomicUoReadBool(&pTimer->fSuspended) + && pTimer->u32Magic == RTTIMER_MAGIC) + { + /* We're sane and the timer is not suspended yet. */ + PRTTIMERLNXSUBTIMER pSubTimer = &pTimer->aSubTimers[idCpu]; + if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_MP_STARTING, RTTIMERLNXSTATE_STOPPED)) + rtTimerLnxStartSubTimer(pSubTimer, pArgs->u64Now, pArgs->u64First, true /*fPinned*/, pTimer->fHighRes); + } + + RTSpinlockRelease(hSpinlock); + } +} + + +/** + * MP event notification callback. + * + * @param enmEvent The event. + * @param idCpu The cpu it applies to. + * @param pvUser The timer. + */ +static DECLCALLBACK(void) rtTimerLinuxMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser) +{ + PRTTIMER pTimer = (PRTTIMER)pvUser; + PRTTIMERLNXSUBTIMER pSubTimer = &pTimer->aSubTimers[idCpu]; + RTSPINLOCK hSpinlock; + + Assert(idCpu < pTimer->cCpus); + + /* + * Some initial paranoia. + */ + if (pTimer->u32Magic != RTTIMER_MAGIC) + return; + hSpinlock = pTimer->hSpinlock; + if (hSpinlock == NIL_RTSPINLOCK) + return; + + RTSpinlockAcquire(hSpinlock); + + /* Is it active? */ + if ( !ASMAtomicUoReadBool(&pTimer->fSuspended) + && pTimer->u32Magic == RTTIMER_MAGIC) + { + switch (enmEvent) + { + /* + * Try do it without leaving the spin lock, but if we have to, retake it + * when we're on the right cpu. + */ + case RTMPEVENT_ONLINE: + if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_MP_STARTING, RTTIMERLNXSTATE_STOPPED)) + { + RTTIMERLINUXSTARTONCPUARGS Args; + Args.u64Now = RTTimeSystemNanoTS(); + Args.u64First = 0; + + if (RTMpCpuId() == idCpu) + rtTimerLnxStartSubTimer(pSubTimer, Args.u64Now, Args.u64First, true /*fPinned*/, pTimer->fHighRes); + else + { + rtTimerLnxSetState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED); /* we'll recheck it. */ + RTSpinlockRelease(hSpinlock); + + RTMpOnSpecific(idCpu, rtTimerLinuxMpStartOnCpu, pTimer, &Args); + return; /* we've left the spinlock */ + } + } + break; + + /* + * The CPU is (going) offline, make sure the sub-timer is stopped. + * + * Linux will migrate it to a different CPU, but we don't want this. The + * timer function is checking for this. + */ + case RTMPEVENT_OFFLINE: + { + RTTIMERLNXSTATE enmState; + while ( (enmState = rtTimerLnxGetState(&pSubTimer->enmState)) == RTTIMERLNXSTATE_ACTIVE + || enmState == RTTIMERLNXSTATE_CALLBACK + || enmState == RTTIMERLNXSTATE_CB_RESTARTING) + { + if (enmState == RTTIMERLNXSTATE_ACTIVE) + { + if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_MP_STOPPING, RTTIMERLNXSTATE_ACTIVE)) + { + RTSpinlockRelease(hSpinlock); + + rtTimerLnxStopSubTimer(pSubTimer, pTimer->fHighRes); + return; /* we've left the spinlock */ + } + } + else if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_CB_STOPPING, enmState)) + break; + + /* State not stable, try again. */ + ASMNopPause(); + } + break; + } + } + } + + RTSpinlockRelease(hSpinlock); +} + +#endif /* CONFIG_SMP */ + + +/** + * Callback function use by RTTimerStart via RTMpOnSpecific to start a timer + * running on a specific CPU. + * + * @param idCpu The current CPU. + * @param pvUser1 Pointer to the timer. + * @param pvUser2 Pointer to the argument structure. + */ +static DECLCALLBACK(void) rtTimerLnxStartOnSpecificCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2) +{ + PRTTIMERLINUXSTARTONCPUARGS pArgs = (PRTTIMERLINUXSTARTONCPUARGS)pvUser2; + PRTTIMER pTimer = (PRTTIMER)pvUser1; + RT_NOREF_PV(idCpu); + rtTimerLnxStartSubTimer(&pTimer->aSubTimers[0], pArgs->u64Now, pArgs->u64First, true /*fPinned*/, pTimer->fHighRes); +} + + +RTDECL(int) RTTimerStart(PRTTIMER pTimer, uint64_t u64First) +{ + RTTIMERLINUXSTARTONCPUARGS Args; + int rc2; + IPRT_LINUX_SAVE_EFL_AC(); + + /* + * Validate. + */ + AssertPtrReturn(pTimer, VERR_INVALID_HANDLE); + AssertReturn(pTimer->u32Magic == RTTIMER_MAGIC, VERR_INVALID_HANDLE); + + if (!ASMAtomicUoReadBool(&pTimer->fSuspended)) + return VERR_TIMER_ACTIVE; + RTTIMERLNX_LOG(("start %p cCpus=%d\n", pTimer, pTimer->cCpus)); + + Args.u64First = u64First; +#ifdef CONFIG_SMP + /* + * Omni timer? + */ + if (pTimer->fAllCpus) + { + rc2 = rtTimerLnxOmniStart(pTimer, &Args); + IPRT_LINUX_RESTORE_EFL_AC(); + return rc2; + } +#endif + + /* + * Simple timer - Pretty straight forward if it wasn't for restarting. + */ + Args.u64Now = RTTimeSystemNanoTS(); + ASMAtomicWriteU64(&pTimer->aSubTimers[0].uNsRestartAt, Args.u64Now + u64First); + for (;;) + { + RTTIMERLNXSTATE enmState = rtTimerLnxGetState(&pTimer->aSubTimers[0].enmState); + switch (enmState) + { + case RTTIMERLNXSTATE_STOPPED: + if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[0].enmState, RTTIMERLNXSTATE_STARTING, RTTIMERLNXSTATE_STOPPED)) + { + ASMAtomicWriteBool(&pTimer->fSuspended, false); + if (!pTimer->fSpecificCpu) + rtTimerLnxStartSubTimer(&pTimer->aSubTimers[0], Args.u64Now, Args.u64First, + false /*fPinned*/, pTimer->fHighRes); + else + { + rc2 = RTMpOnSpecific(pTimer->idCpu, rtTimerLnxStartOnSpecificCpu, pTimer, &Args); + if (RT_FAILURE(rc2)) + { + /* Suspend it, the cpu id is probably invalid or offline. */ + ASMAtomicWriteBool(&pTimer->fSuspended, true); + rtTimerLnxSetState(&pTimer->aSubTimers[0].enmState, RTTIMERLNXSTATE_STOPPED); + return rc2; + } + } + IPRT_LINUX_RESTORE_EFL_AC(); + return VINF_SUCCESS; + } + break; + + case RTTIMERLNXSTATE_CALLBACK: + case RTTIMERLNXSTATE_CB_STOPPING: + if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[0].enmState, RTTIMERLNXSTATE_CB_RESTARTING, enmState)) + { + ASMAtomicWriteBool(&pTimer->fSuspended, false); + IPRT_LINUX_RESTORE_EFL_AC(); + return VINF_SUCCESS; + } + break; + + default: + AssertMsgFailed(("%d\n", enmState)); + IPRT_LINUX_RESTORE_EFL_AC(); + return VERR_INTERNAL_ERROR_4; + } + ASMNopPause(); + } +} +RT_EXPORT_SYMBOL(RTTimerStart); + + +/** + * Common worker for RTTimerStop and RTTimerDestroy. + * + * @returns true if there was any active callbacks, false if not. + * @param pTimer The timer to stop. + * @param fForDestroy Whether it's RTTimerDestroy calling or not. + */ +static bool rtTimerLnxStop(PRTTIMER pTimer, bool fForDestroy) +{ + RTTIMERLNX_LOG(("lnxstop %p %d\n", pTimer, fForDestroy)); +#ifdef CONFIG_SMP + /* + * Omni timer? + */ + if (pTimer->fAllCpus) + return rtTimerLnxOmniStop(pTimer, fForDestroy); +#endif + + /* + * Simple timer. + */ + ASMAtomicWriteBool(&pTimer->fSuspended, true); + for (;;) + { + RTTIMERLNXSTATE enmState = rtTimerLnxGetState(&pTimer->aSubTimers[0].enmState); + switch (enmState) + { + case RTTIMERLNXSTATE_ACTIVE: + if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[0].enmState, RTTIMERLNXSTATE_STOPPING, RTTIMERLNXSTATE_ACTIVE)) + { + rtTimerLnxStopSubTimer(&pTimer->aSubTimers[0], pTimer->fHighRes); + return false; + } + break; + + case RTTIMERLNXSTATE_CALLBACK: + case RTTIMERLNXSTATE_CB_RESTARTING: + case RTTIMERLNXSTATE_CB_STOPPING: + Assert(enmState != RTTIMERLNXSTATE_CB_STOPPING || fForDestroy); + if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[0].enmState, + !fForDestroy ? RTTIMERLNXSTATE_CB_STOPPING : RTTIMERLNXSTATE_CB_DESTROYING, + enmState)) + return true; + break; + + case RTTIMERLNXSTATE_STOPPED: + return VINF_SUCCESS; + + case RTTIMERLNXSTATE_CB_DESTROYING: + AssertMsgFailed(("enmState=%d pTimer=%p\n", enmState, pTimer)); + return true; + + default: + case RTTIMERLNXSTATE_STARTING: + case RTTIMERLNXSTATE_MP_STARTING: + case RTTIMERLNXSTATE_STOPPING: + case RTTIMERLNXSTATE_MP_STOPPING: + AssertMsgFailed(("enmState=%d pTimer=%p\n", enmState, pTimer)); + return false; + } + + /* State not stable, try again. */ + ASMNopPause(); + } +} + + +RTDECL(int) RTTimerStop(PRTTIMER pTimer) +{ + /* + * Validate. + */ + IPRT_LINUX_SAVE_EFL_AC(); + AssertPtrReturn(pTimer, VERR_INVALID_HANDLE); + AssertReturn(pTimer->u32Magic == RTTIMER_MAGIC, VERR_INVALID_HANDLE); + RTTIMERLNX_LOG(("stop %p\n", pTimer)); + + if (ASMAtomicUoReadBool(&pTimer->fSuspended)) + return VERR_TIMER_SUSPENDED; + + rtTimerLnxStop(pTimer, false /*fForDestroy*/); + + IPRT_LINUX_RESTORE_EFL_AC(); + return VINF_SUCCESS; +} +RT_EXPORT_SYMBOL(RTTimerStop); + + +RTDECL(int) RTTimerChangeInterval(PRTTIMER pTimer, uint64_t u64NanoInterval) +{ + unsigned long cJiffies; + unsigned long flFlags; + IPRT_LINUX_SAVE_EFL_AC(); + + /* + * Validate. + */ + AssertPtrReturn(pTimer, VERR_INVALID_HANDLE); + AssertReturn(pTimer->u32Magic == RTTIMER_MAGIC, VERR_INVALID_HANDLE); + AssertReturn(u64NanoInterval, VERR_INVALID_PARAMETER); + AssertReturn(u64NanoInterval < UINT64_MAX / 8, VERR_INVALID_PARAMETER); + AssertReturn(pTimer->u64NanoInterval, VERR_INVALID_STATE); + RTTIMERLNX_LOG(("change %p %llu\n", pTimer, u64NanoInterval)); + +#ifdef RTTIMER_LINUX_WITH_HRTIMER + /* + * For the high resolution timers it is easy since we don't care so much + * about when it is applied to the sub-timers. + */ + if (pTimer->fHighRes) + { + ASMAtomicWriteU64(&pTimer->u64NanoInterval, u64NanoInterval); + IPRT_LINUX_RESTORE_EFL_AC(); + return VINF_SUCCESS; + } +#endif + + /* + * Standard timers have a bit more complicated way of calculating + * their interval and such. So, forget omni timers for now. + */ + if (pTimer->cCpus > 1) + return VERR_NOT_SUPPORTED; + + cJiffies = u64NanoInterval / (RT_NS_1SEC / HZ); + if (cJiffies * (RT_NS_1SEC / HZ) != u64NanoInterval) + cJiffies = 0; + + spin_lock_irqsave(&pTimer->ChgIntLock, flFlags); + pTimer->aSubTimers[0].u.Std.fFirstAfterChg = true; + pTimer->cJiffies = cJiffies; + ASMAtomicWriteU64(&pTimer->u64NanoInterval, u64NanoInterval); + spin_unlock_irqrestore(&pTimer->ChgIntLock, flFlags); + IPRT_LINUX_RESTORE_EFL_AC(); + return VINF_SUCCESS; +} +RT_EXPORT_SYMBOL(RTTimerChangeInterval); + + +RTDECL(int) RTTimerDestroy(PRTTIMER pTimer) +{ + bool fCanDestroy; + IPRT_LINUX_SAVE_EFL_AC(); + + /* + * Validate. It's ok to pass NULL pointer. + */ + if (pTimer == /*NIL_RTTIMER*/ NULL) + return VINF_SUCCESS; + AssertPtrReturn(pTimer, VERR_INVALID_HANDLE); + AssertReturn(pTimer->u32Magic == RTTIMER_MAGIC, VERR_INVALID_HANDLE); + RTTIMERLNX_LOG(("destroy %p\n", pTimer)); +/** @todo We should invalidate the magic here! */ + + /* + * Stop the timer if it's still active, then destroy it if we can. + */ + if (!ASMAtomicUoReadBool(&pTimer->fSuspended)) + fCanDestroy = rtTimerLnxStop(pTimer, true /*fForDestroy*/); + else + { + uint32_t iCpu = pTimer->cCpus; + if (pTimer->cCpus > 1) + RTSpinlockAcquire(pTimer->hSpinlock); + + fCanDestroy = true; + while (iCpu-- > 0) + { + for (;;) + { + RTTIMERLNXSTATE enmState = rtTimerLnxGetState(&pTimer->aSubTimers[iCpu].enmState); + switch (enmState) + { + case RTTIMERLNXSTATE_CALLBACK: + case RTTIMERLNXSTATE_CB_RESTARTING: + case RTTIMERLNXSTATE_CB_STOPPING: + if (!rtTimerLnxCmpXchgState(&pTimer->aSubTimers[iCpu].enmState, RTTIMERLNXSTATE_CB_DESTROYING, enmState)) + continue; + fCanDestroy = false; + break; + + case RTTIMERLNXSTATE_CB_DESTROYING: + AssertMsgFailed(("%d\n", enmState)); + fCanDestroy = false; + break; + default: + break; + } + break; + } + } + + if (pTimer->cCpus > 1) + RTSpinlockRelease(pTimer->hSpinlock); + } + + if (fCanDestroy) + { + /* For paranoid reasons, defer actually destroying the semaphore when + in atomic or interrupt context. */ +#if RTLNX_VER_MIN(2,5,32) + if (in_atomic() || in_interrupt()) +#else + if (in_interrupt()) +#endif + rtR0LnxWorkqueuePush(&pTimer->DtorWorkqueueItem, rtTimerLnxDestroyDeferred); + else + rtTimerLnxDestroyIt(pTimer); + } + + IPRT_LINUX_RESTORE_EFL_AC(); + return VINF_SUCCESS; +} +RT_EXPORT_SYMBOL(RTTimerDestroy); + + +RTDECL(int) RTTimerCreateEx(PRTTIMER *ppTimer, uint64_t u64NanoInterval, uint32_t fFlags, PFNRTTIMER pfnTimer, void *pvUser) +{ + PRTTIMER pTimer; + RTCPUID iCpu; + unsigned cCpus; + int rc; + IPRT_LINUX_SAVE_EFL_AC(); + + rtR0LnxWorkqueueFlush(); /* for 2.4 */ + *ppTimer = NULL; + + /* + * Validate flags. + */ + if (!RTTIMER_FLAGS_ARE_VALID(fFlags)) + { + IPRT_LINUX_RESTORE_EFL_AC(); + return VERR_INVALID_PARAMETER; + } + if ( (fFlags & RTTIMER_FLAGS_CPU_SPECIFIC) + && (fFlags & RTTIMER_FLAGS_CPU_ALL) != RTTIMER_FLAGS_CPU_ALL + && !RTMpIsCpuPossible(RTMpCpuIdFromSetIndex(fFlags & RTTIMER_FLAGS_CPU_MASK))) + { + IPRT_LINUX_RESTORE_EFL_AC(); + return VERR_CPU_NOT_FOUND; + } + + /* + * Allocate the timer handler. + */ + cCpus = 1; +#ifdef CONFIG_SMP + if ((fFlags & RTTIMER_FLAGS_CPU_ALL) == RTTIMER_FLAGS_CPU_ALL) + { + cCpus = RTMpGetMaxCpuId() + 1; + Assert(cCpus <= RTCPUSET_MAX_CPUS); /* On linux we have a 1:1 relationship between cpuid and set index. */ + AssertReturnStmt(u64NanoInterval, IPRT_LINUX_RESTORE_EFL_AC(), VERR_NOT_IMPLEMENTED); /* We don't implement single shot on all cpus, sorry. */ + } +#endif + + rc = RTMemAllocEx(RT_UOFFSETOF_DYN(RTTIMER, aSubTimers[cCpus]), 0, + RTMEMALLOCEX_FLAGS_ZEROED | RTMEMALLOCEX_FLAGS_ANY_CTX_FREE, (void **)&pTimer); + if (RT_FAILURE(rc)) + { + IPRT_LINUX_RESTORE_EFL_AC(); + return rc; + } + + /* + * Initialize it. + */ + pTimer->u32Magic = RTTIMER_MAGIC; + pTimer->hSpinlock = NIL_RTSPINLOCK; + pTimer->fSuspended = true; + pTimer->fHighRes = !!(fFlags & RTTIMER_FLAGS_HIGH_RES); +#ifdef CONFIG_SMP + pTimer->fSpecificCpu = (fFlags & RTTIMER_FLAGS_CPU_SPECIFIC) && (fFlags & RTTIMER_FLAGS_CPU_ALL) != RTTIMER_FLAGS_CPU_ALL; + pTimer->fAllCpus = (fFlags & RTTIMER_FLAGS_CPU_ALL) == RTTIMER_FLAGS_CPU_ALL; + pTimer->idCpu = pTimer->fSpecificCpu + ? RTMpCpuIdFromSetIndex(fFlags & RTTIMER_FLAGS_CPU_MASK) + : NIL_RTCPUID; +#else + pTimer->fSpecificCpu = !!(fFlags & RTTIMER_FLAGS_CPU_SPECIFIC); + pTimer->idCpu = RTMpCpuId(); +#endif + pTimer->cCpus = cCpus; + pTimer->pfnTimer = pfnTimer; + pTimer->pvUser = pvUser; + pTimer->u64NanoInterval = u64NanoInterval; + pTimer->cJiffies = u64NanoInterval / (RT_NS_1SEC / HZ); + if (pTimer->cJiffies * (RT_NS_1SEC / HZ) != u64NanoInterval) + pTimer->cJiffies = 0; + spin_lock_init(&pTimer->ChgIntLock); + + for (iCpu = 0; iCpu < cCpus; iCpu++) + { +#ifdef RTTIMER_LINUX_WITH_HRTIMER + if (pTimer->fHighRes) + { + hrtimer_init(&pTimer->aSubTimers[iCpu].u.Hr.LnxTimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + pTimer->aSubTimers[iCpu].u.Hr.LnxTimer.function = rtTimerLinuxHrCallback; + } + else +#endif + { +#if RTLNX_VER_MIN(4,15,0) + timer_setup(&pTimer->aSubTimers[iCpu].u.Std.LnxTimer, rtTimerLinuxStdCallback, TIMER_PINNED); +#elif RTLNX_VER_MIN(4,8,0) + init_timer_pinned(&pTimer->aSubTimers[iCpu].u.Std.LnxTimer); +#else + init_timer(&pTimer->aSubTimers[iCpu].u.Std.LnxTimer); +#endif +#if RTLNX_VER_MAX(4,15,0) + pTimer->aSubTimers[iCpu].u.Std.LnxTimer.data = (unsigned long)&pTimer->aSubTimers[iCpu]; + pTimer->aSubTimers[iCpu].u.Std.LnxTimer.function = rtTimerLinuxStdCallback; +#endif + pTimer->aSubTimers[iCpu].u.Std.LnxTimer.expires = jiffies; + pTimer->aSubTimers[iCpu].u.Std.u64NextTS = 0; + } + pTimer->aSubTimers[iCpu].iTick = 0; + pTimer->aSubTimers[iCpu].pParent = pTimer; + pTimer->aSubTimers[iCpu].enmState = RTTIMERLNXSTATE_STOPPED; + } + +#ifdef CONFIG_SMP + /* + * If this is running on ALL cpus, we'll have to register a callback + * for MP events (so timers can be started/stopped on cpus going + * online/offline). We also create the spinlock for synchronizing + * stop/start/mp-event. + */ + if (cCpus > 1) + { + int rc = RTSpinlockCreate(&pTimer->hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "RTTimerLnx"); + if (RT_SUCCESS(rc)) + rc = RTMpNotificationRegister(rtTimerLinuxMpEvent, pTimer); + else + pTimer->hSpinlock = NIL_RTSPINLOCK; + if (RT_FAILURE(rc)) + { + RTTimerDestroy(pTimer); + IPRT_LINUX_RESTORE_EFL_AC(); + return rc; + } + } +#endif /* CONFIG_SMP */ + + RTTIMERLNX_LOG(("create %p hires=%d fFlags=%#x cCpus=%u\n", pTimer, pTimer->fHighRes, fFlags, cCpus)); + *ppTimer = pTimer; + IPRT_LINUX_RESTORE_EFL_AC(); + return VINF_SUCCESS; +} +RT_EXPORT_SYMBOL(RTTimerCreateEx); + + +RTDECL(uint32_t) RTTimerGetSystemGranularity(void) +{ +#if 0 /** @todo Not sure if this is what we want or not... Add new API for + * querying the resolution of the high res timers? */ + struct timespec Ts; + int rc; + IPRT_LINUX_SAVE_EFL_AC(); + rc = hrtimer_get_res(CLOCK_MONOTONIC, &Ts); + IPRT_LINUX_RESTORE_EFL_AC(); + if (!rc) + { + Assert(!Ts.tv_sec); + return Ts.tv_nsec; + } +#endif + /* */ +#if RTLNX_VER_MAX(4,9,0) || RTLNX_VER_MIN(4,13,0) + /* On 4.9, 4.10 and 4.12 we've observed tstRTR0Timer failures of the omni timer tests + where we get about half of the ticks we want. The failing test is using this value + as interval. So, this is a very very crude hack to try make omni timers work + correctly without actually knowing what's going wrong... */ + return RT_NS_1SEC * 2 / HZ; /* ns */ +#else + return RT_NS_1SEC / HZ; /* ns */ +#endif +} +RT_EXPORT_SYMBOL(RTTimerGetSystemGranularity); + + +RTDECL(int) RTTimerRequestSystemGranularity(uint32_t u32Request, uint32_t *pu32Granted) +{ + RT_NOREF_PV(u32Request); RT_NOREF_PV(*pu32Granted); + return VERR_NOT_SUPPORTED; +} +RT_EXPORT_SYMBOL(RTTimerRequestSystemGranularity); + + +RTDECL(int) RTTimerReleaseSystemGranularity(uint32_t u32Granted) +{ + RT_NOREF_PV(u32Granted); + return VERR_NOT_SUPPORTED; +} +RT_EXPORT_SYMBOL(RTTimerReleaseSystemGranularity); + + +RTDECL(bool) RTTimerCanDoHighResolution(void) +{ +#ifdef RTTIMER_LINUX_WITH_HRTIMER + return true; +#else + return false; +#endif +} +RT_EXPORT_SYMBOL(RTTimerCanDoHighResolution); + |