diff options
Diffstat (limited to '')
-rw-r--r-- | src/VBox/Runtime/r0drv/nt/mp-r0drv-nt.cpp | 1979 |
1 files changed, 1979 insertions, 0 deletions
diff --git a/src/VBox/Runtime/r0drv/nt/mp-r0drv-nt.cpp b/src/VBox/Runtime/r0drv/nt/mp-r0drv-nt.cpp new file mode 100644 index 00000000..9419e41f --- /dev/null +++ b/src/VBox/Runtime/r0drv/nt/mp-r0drv-nt.cpp @@ -0,0 +1,1979 @@ +/* $Id: mp-r0drv-nt.cpp $ */ +/** @file + * IPRT - Multiprocessor, Ring-0 Driver, NT. + */ + +/* + * Copyright (C) 2008-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL) only, as it comes in the "COPYING.CDDL" file of the + * VirtualBox OSE distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "the-nt-kernel.h" + +#include <iprt/mp.h> +#include <iprt/cpuset.h> +#include <iprt/err.h> +#include <iprt/asm.h> +#include <iprt/log.h> +#include <iprt/mem.h> +#include <iprt/time.h> +#include "r0drv/mp-r0drv.h" +#include "symdb.h" +#include "internal-r0drv-nt.h" +#include "internal/mp.h" + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +typedef enum +{ + RT_NT_CPUID_SPECIFIC, + RT_NT_CPUID_PAIR, + RT_NT_CPUID_OTHERS, + RT_NT_CPUID_ALL +} RT_NT_CPUID; + + +/** + * Used by the RTMpOnSpecific. + */ +typedef struct RTMPNTONSPECIFICARGS +{ + /** Set if we're executing. */ + bool volatile fExecuting; + /** Set when done executing. */ + bool volatile fDone; + /** Number of references to this heap block. */ + uint32_t volatile cRefs; + /** Event that the calling thread is waiting on. */ + KEVENT DoneEvt; + /** The deferred procedure call object. */ + KDPC Dpc; + /** The callback argument package. */ + RTMPARGS CallbackArgs; +} RTMPNTONSPECIFICARGS; +/** Pointer to an argument/state structure for RTMpOnSpecific on NT. */ +typedef RTMPNTONSPECIFICARGS *PRTMPNTONSPECIFICARGS; + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Inactive bit for g_aidRtMpNtByCpuSetIdx. */ +#define RTMPNT_ID_F_INACTIVE RT_BIT_32(31) + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** Maximum number of processor groups. */ +uint32_t g_cRtMpNtMaxGroups; +/** Maximum number of processors. */ +uint32_t g_cRtMpNtMaxCpus; +/** Number of active processors. */ +uint32_t volatile g_cRtMpNtActiveCpus; +/** The NT CPU set. + * KeQueryActiveProcssors() cannot be called at all IRQLs and therefore we'll + * have to cache it. Fortunately, NT doesn't really support taking CPUs offline, + * and taking them online was introduced with W2K8 where it is intended for virtual + * machines and not real HW. We update this, g_cRtMpNtActiveCpus and + * g_aidRtMpNtByCpuSetIdx from the rtR0NtMpProcessorChangeCallback. + */ +RTCPUSET g_rtMpNtCpuSet; + +/** Static per group info. + * @remarks With 256 groups this takes up 33KB. */ +static struct +{ + /** The max CPUs in the group. */ + uint16_t cMaxCpus; + /** The number of active CPUs at the time of initialization. */ + uint16_t cActiveCpus; + /** CPU set indexes for each CPU in the group. */ + int16_t aidxCpuSetMembers[64]; +} g_aRtMpNtCpuGroups[256]; +/** Maps CPU set indexes to RTCPUID. + * Inactive CPUs has bit 31 set (RTMPNT_ID_F_INACTIVE) so we can identify them + * and shuffle duplicates during CPU hotplugging. We assign temporary IDs to + * the inactive CPUs starting at g_cRtMpNtMaxCpus - 1, ASSUMING that active + * CPUs has IDs from 0 to g_cRtMpNtActiveCpus. */ +RTCPUID g_aidRtMpNtByCpuSetIdx[RTCPUSET_MAX_CPUS]; +/** The handle of the rtR0NtMpProcessorChangeCallback registration. */ +static PVOID g_pvMpCpuChangeCallback = NULL; +/** Size of the KAFFINITY_EX structure. + * This increased from 20 to 32 bitmap words in the 2020 H2 windows 10 release + * (i.e. 1280 to 2048 CPUs). We expect this to increase in the future. */ +static size_t g_cbRtMpNtKaffinityEx = RT_UOFFSETOF(KAFFINITY_EX, Bitmap) + + RT_SIZEOFMEMB(KAFFINITY_EX, Bitmap[0]) * 256; +/** The size value of the KAFFINITY_EX structure. */ +static uint16_t g_cRtMpNtKaffinityExEntries = 256; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static VOID __stdcall rtR0NtMpProcessorChangeCallback(void *pvUser, PKE_PROCESSOR_CHANGE_NOTIFY_CONTEXT pChangeCtx, + PNTSTATUS prcOperationStatus); +static int rtR0NtInitQueryGroupRelations(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX **ppInfo); + + + +/** + * Initalizes multiprocessor globals (called by rtR0InitNative). + * + * @returns IPRT status code. + * @param pOsVerInfo Version information. + */ +DECLHIDDEN(int) rtR0MpNtInit(RTNTSDBOSVER const *pOsVerInfo) +{ +#define MY_CHECK_BREAK(a_Check, a_DbgPrintArgs) \ + AssertMsgBreakStmt(a_Check, a_DbgPrintArgs, DbgPrint a_DbgPrintArgs; rc = VERR_INTERNAL_ERROR_4 ) +#define MY_CHECK_RETURN(a_Check, a_DbgPrintArgs, a_rcRet) \ + AssertMsgReturnStmt(a_Check, a_DbgPrintArgs, DbgPrint a_DbgPrintArgs, a_rcRet) +#define MY_CHECK(a_Check, a_DbgPrintArgs) \ + AssertMsgStmt(a_Check, a_DbgPrintArgs, DbgPrint a_DbgPrintArgs; rc = VERR_INTERNAL_ERROR_4 ) + + /* + * API combination checks. + */ + MY_CHECK_RETURN(!g_pfnrtKeSetTargetProcessorDpcEx || g_pfnrtKeGetProcessorNumberFromIndex, + ("IPRT: Fatal: Missing KeSetTargetProcessorDpcEx without KeGetProcessorNumberFromIndex!\n"), + VERR_SYMBOL_NOT_FOUND); + + /* + * Get max number of processor groups. + * + * We may need to upadjust this number below, because windows likes to keep + * all options open when it comes to hotplugged CPU group assignments. A + * server advertising up to 64 CPUs in the ACPI table will get a result of + * 64 from KeQueryMaximumGroupCount. That makes sense. However, when windows + * server 2012 does a two processor group setup for it, the sum of the + * GroupInfo[*].MaximumProcessorCount members below is 128. This is probably + * because windows doesn't want to make decisions grouping of hotpluggable CPUs. + * So, we need to bump the maximum count to 128 below do deal with this as we + * want to have valid CPU set indexes for all potential CPUs - how could we + * otherwise use the RTMpGetSet() result and also RTCpuSetCount(RTMpGetSet()) + * should equal RTMpGetCount(). + */ + if (g_pfnrtKeQueryMaximumGroupCount) + { + g_cRtMpNtMaxGroups = g_pfnrtKeQueryMaximumGroupCount(); + MY_CHECK_RETURN(g_cRtMpNtMaxGroups <= RTCPUSET_MAX_CPUS && g_cRtMpNtMaxGroups > 0, + ("IPRT: Fatal: g_cRtMpNtMaxGroups=%u, max %u\n", g_cRtMpNtMaxGroups, RTCPUSET_MAX_CPUS), + VERR_MP_TOO_MANY_CPUS); + } + else + g_cRtMpNtMaxGroups = 1; + + /* + * Get max number CPUs. + * This also defines the range of NT CPU indexes, RTCPUID and index into RTCPUSET. + */ + if (g_pfnrtKeQueryMaximumProcessorCountEx) + { + g_cRtMpNtMaxCpus = g_pfnrtKeQueryMaximumProcessorCountEx(ALL_PROCESSOR_GROUPS); + MY_CHECK_RETURN(g_cRtMpNtMaxCpus <= RTCPUSET_MAX_CPUS && g_cRtMpNtMaxCpus > 0, + ("IPRT: Fatal: g_cRtMpNtMaxCpus=%u, max %u [KeQueryMaximumProcessorCountEx]\n", + g_cRtMpNtMaxGroups, RTCPUSET_MAX_CPUS), + VERR_MP_TOO_MANY_CPUS); + } + else if (g_pfnrtKeQueryMaximumProcessorCount) + { + g_cRtMpNtMaxCpus = g_pfnrtKeQueryMaximumProcessorCount(); + MY_CHECK_RETURN(g_cRtMpNtMaxCpus <= RTCPUSET_MAX_CPUS && g_cRtMpNtMaxCpus > 0, + ("IPRT: Fatal: g_cRtMpNtMaxCpus=%u, max %u [KeQueryMaximumProcessorCount]\n", + g_cRtMpNtMaxGroups, RTCPUSET_MAX_CPUS), + VERR_MP_TOO_MANY_CPUS); + } + else if (g_pfnrtKeQueryActiveProcessors) + { + KAFFINITY fActiveProcessors = g_pfnrtKeQueryActiveProcessors(); + MY_CHECK_RETURN(fActiveProcessors != 0, + ("IPRT: Fatal: KeQueryActiveProcessors returned 0!\n"), + VERR_INTERNAL_ERROR_2); + g_cRtMpNtMaxCpus = 0; + do + { + g_cRtMpNtMaxCpus++; + fActiveProcessors >>= 1; + } while (fActiveProcessors); + } + else + g_cRtMpNtMaxCpus = KeNumberProcessors; + + /* + * Just because we're a bit paranoid about getting something wrong wrt to the + * kernel interfaces, we try 16 times to get the KeQueryActiveProcessorCountEx + * and KeQueryLogicalProcessorRelationship information to match up. + */ + for (unsigned cTries = 0;; cTries++) + { + /* + * Get number of active CPUs. + */ + if (g_pfnrtKeQueryActiveProcessorCountEx) + { + g_cRtMpNtActiveCpus = g_pfnrtKeQueryActiveProcessorCountEx(ALL_PROCESSOR_GROUPS); + MY_CHECK_RETURN(g_cRtMpNtActiveCpus <= g_cRtMpNtMaxCpus && g_cRtMpNtActiveCpus > 0, + ("IPRT: Fatal: g_cRtMpNtMaxGroups=%u, max %u [KeQueryActiveProcessorCountEx]\n", + g_cRtMpNtMaxGroups, g_cRtMpNtMaxCpus), + VERR_MP_TOO_MANY_CPUS); + } + else if (g_pfnrtKeQueryActiveProcessorCount) + { + g_cRtMpNtActiveCpus = g_pfnrtKeQueryActiveProcessorCount(NULL); + MY_CHECK_RETURN(g_cRtMpNtActiveCpus <= g_cRtMpNtMaxCpus && g_cRtMpNtActiveCpus > 0, + ("IPRT: Fatal: g_cRtMpNtMaxGroups=%u, max %u [KeQueryActiveProcessorCount]\n", + g_cRtMpNtMaxGroups, g_cRtMpNtMaxCpus), + VERR_MP_TOO_MANY_CPUS); + } + else + g_cRtMpNtActiveCpus = g_cRtMpNtMaxCpus; + + /* + * Query the details for the groups to figure out which CPUs are online as + * well as the NT index limit. + */ + for (unsigned i = 0; i < RT_ELEMENTS(g_aidRtMpNtByCpuSetIdx); i++) +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + g_aidRtMpNtByCpuSetIdx[i] = NIL_RTCPUID; +#else + g_aidRtMpNtByCpuSetIdx[i] = i < g_cRtMpNtMaxCpus ? i : NIL_RTCPUID; +#endif + for (unsigned idxGroup = 0; idxGroup < RT_ELEMENTS(g_aRtMpNtCpuGroups); idxGroup++) + { + g_aRtMpNtCpuGroups[idxGroup].cMaxCpus = 0; + g_aRtMpNtCpuGroups[idxGroup].cActiveCpus = 0; + for (unsigned idxMember = 0; idxMember < RT_ELEMENTS(g_aRtMpNtCpuGroups[idxGroup].aidxCpuSetMembers); idxMember++) + g_aRtMpNtCpuGroups[idxGroup].aidxCpuSetMembers[idxMember] = -1; + } + + if (g_pfnrtKeQueryLogicalProcessorRelationship) + { + MY_CHECK_RETURN(g_pfnrtKeGetProcessorIndexFromNumber, + ("IPRT: Fatal: Found KeQueryLogicalProcessorRelationship but not KeGetProcessorIndexFromNumber!\n"), + VERR_SYMBOL_NOT_FOUND); + MY_CHECK_RETURN(g_pfnrtKeGetProcessorNumberFromIndex, + ("IPRT: Fatal: Found KeQueryLogicalProcessorRelationship but not KeGetProcessorIndexFromNumber!\n"), + VERR_SYMBOL_NOT_FOUND); + MY_CHECK_RETURN(g_pfnrtKeSetTargetProcessorDpcEx, + ("IPRT: Fatal: Found KeQueryLogicalProcessorRelationship but not KeSetTargetProcessorDpcEx!\n"), + VERR_SYMBOL_NOT_FOUND); + + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pInfo = NULL; + int rc = rtR0NtInitQueryGroupRelations(&pInfo); + if (RT_FAILURE(rc)) + return rc; + + MY_CHECK(pInfo->Group.MaximumGroupCount == g_cRtMpNtMaxGroups, + ("IPRT: Fatal: MaximumGroupCount=%u != g_cRtMpNtMaxGroups=%u!\n", + pInfo->Group.MaximumGroupCount, g_cRtMpNtMaxGroups)); + MY_CHECK(pInfo->Group.ActiveGroupCount > 0 && pInfo->Group.ActiveGroupCount <= g_cRtMpNtMaxGroups, + ("IPRT: Fatal: ActiveGroupCount=%u != g_cRtMpNtMaxGroups=%u!\n", + pInfo->Group.ActiveGroupCount, g_cRtMpNtMaxGroups)); + + /* + * First we need to recalc g_cRtMpNtMaxCpus (see above). + */ + uint32_t cMaxCpus = 0; + uint32_t idxGroup; + for (idxGroup = 0; RT_SUCCESS(rc) && idxGroup < pInfo->Group.ActiveGroupCount; idxGroup++) + { + const PROCESSOR_GROUP_INFO *pGrpInfo = &pInfo->Group.GroupInfo[idxGroup]; + MY_CHECK_BREAK(pGrpInfo->MaximumProcessorCount <= MAXIMUM_PROC_PER_GROUP, + ("IPRT: Fatal: MaximumProcessorCount=%u\n", pGrpInfo->MaximumProcessorCount)); + MY_CHECK_BREAK(pGrpInfo->ActiveProcessorCount <= pGrpInfo->MaximumProcessorCount, + ("IPRT: Fatal: ActiveProcessorCount=%u > MaximumProcessorCount=%u\n", + pGrpInfo->ActiveProcessorCount, pGrpInfo->MaximumProcessorCount)); + cMaxCpus += pGrpInfo->MaximumProcessorCount; + } + if (cMaxCpus > g_cRtMpNtMaxCpus && RT_SUCCESS(rc)) + { + DbgPrint("IPRT: g_cRtMpNtMaxCpus=%u -> %u\n", g_cRtMpNtMaxCpus, cMaxCpus); +#ifndef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + uint32_t i = RT_MIN(cMaxCpus, RT_ELEMENTS(g_aidRtMpNtByCpuSetIdx)); + while (i-- > g_cRtMpNtMaxCpus) + g_aidRtMpNtByCpuSetIdx[i] = i; +#endif + g_cRtMpNtMaxCpus = cMaxCpus; + if (g_cRtMpNtMaxGroups > RTCPUSET_MAX_CPUS) + { + MY_CHECK(g_cRtMpNtMaxGroups <= RTCPUSET_MAX_CPUS && g_cRtMpNtMaxGroups > 0, + ("IPRT: Fatal: g_cRtMpNtMaxGroups=%u, max %u\n", g_cRtMpNtMaxGroups, RTCPUSET_MAX_CPUS)); + rc = VERR_MP_TOO_MANY_CPUS; + } + } + + /* + * Calc online mask, partition IDs and such. + * + * Also check ASSUMPTIONS: + * + * 1. Processor indexes going from 0 and up to + * KeQueryMaximumProcessorCountEx(ALL_PROCESSOR_GROUPS) - 1. + * + * 2. Currently valid processor indexes, i.e. accepted by + * KeGetProcessorIndexFromNumber & KeGetProcessorNumberFromIndex, goes + * from 0 thru KeQueryActiveProcessorCountEx(ALL_PROCESSOR_GROUPS) - 1. + * + * 3. PROCESSOR_GROUP_INFO::MaximumProcessorCount gives the number of + * relevant bits in the ActiveProcessorMask (from LSB). + * + * 4. Active processor count found in KeQueryLogicalProcessorRelationship + * output matches what KeQueryActiveProcessorCountEx(ALL) returns. + * + * 5. Active + inactive processor counts in same does not exceed + * KeQueryMaximumProcessorCountEx(ALL). + * + * Note! Processor indexes are assigned as CPUs come online and are not + * preallocated according to group maximums. Since CPUS are only taken + * online and never offlined, this means that internal CPU bitmaps are + * never sparse and no time is wasted scanning unused bits. + * + * Unfortunately, it means that ring-3 cannot easily guess the index + * assignments when hotswapping is used, and must use GIP when available. + */ + RTCpuSetEmpty(&g_rtMpNtCpuSet); + uint32_t cInactive = 0; + uint32_t cActive = 0; + uint32_t idxCpuMax = 0; + uint32_t idxCpuSetNextInactive = g_cRtMpNtMaxCpus - 1; + for (idxGroup = 0; RT_SUCCESS(rc) && idxGroup < pInfo->Group.ActiveGroupCount; idxGroup++) + { + const PROCESSOR_GROUP_INFO *pGrpInfo = &pInfo->Group.GroupInfo[idxGroup]; + MY_CHECK_BREAK(pGrpInfo->MaximumProcessorCount <= MAXIMUM_PROC_PER_GROUP, + ("IPRT: Fatal: MaximumProcessorCount=%u\n", pGrpInfo->MaximumProcessorCount)); + MY_CHECK_BREAK(pGrpInfo->ActiveProcessorCount <= pGrpInfo->MaximumProcessorCount, + ("IPRT: Fatal: ActiveProcessorCount=%u > MaximumProcessorCount=%u\n", + pGrpInfo->ActiveProcessorCount, pGrpInfo->MaximumProcessorCount)); + + g_aRtMpNtCpuGroups[idxGroup].cMaxCpus = pGrpInfo->MaximumProcessorCount; + g_aRtMpNtCpuGroups[idxGroup].cActiveCpus = pGrpInfo->ActiveProcessorCount; + + for (uint32_t idxMember = 0; idxMember < pGrpInfo->MaximumProcessorCount; idxMember++) + { + PROCESSOR_NUMBER ProcNum; + ProcNum.Group = (USHORT)idxGroup; + ProcNum.Number = (UCHAR)idxMember; + ProcNum.Reserved = 0; + ULONG idxCpu = g_pfnrtKeGetProcessorIndexFromNumber(&ProcNum); + if (idxCpu != INVALID_PROCESSOR_INDEX) + { + MY_CHECK_BREAK(idxCpu < g_cRtMpNtMaxCpus && idxCpu < RTCPUSET_MAX_CPUS, /* ASSUMPTION #1 */ + ("IPRT: Fatal: idxCpu=%u >= g_cRtMpNtMaxCpus=%u (RTCPUSET_MAX_CPUS=%u)\n", + idxCpu, g_cRtMpNtMaxCpus, RTCPUSET_MAX_CPUS)); + if (idxCpu > idxCpuMax) + idxCpuMax = idxCpu; + g_aRtMpNtCpuGroups[idxGroup].aidxCpuSetMembers[idxMember] = idxCpu; +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + g_aidRtMpNtByCpuSetIdx[idxCpu] = RTMPCPUID_FROM_GROUP_AND_NUMBER(idxGroup, idxMember); +#endif + + ProcNum.Group = UINT16_MAX; + ProcNum.Number = UINT8_MAX; + ProcNum.Reserved = UINT8_MAX; + NTSTATUS rcNt = g_pfnrtKeGetProcessorNumberFromIndex(idxCpu, &ProcNum); + MY_CHECK_BREAK(NT_SUCCESS(rcNt), + ("IPRT: Fatal: KeGetProcessorNumberFromIndex(%u,) -> %#x!\n", idxCpu, rcNt)); + MY_CHECK_BREAK(ProcNum.Group == idxGroup && ProcNum.Number == idxMember, + ("IPRT: Fatal: KeGetProcessorXxxxFromYyyy roundtrip error for %#x! Group: %u vs %u, Number: %u vs %u\n", + idxCpu, ProcNum.Group, idxGroup, ProcNum.Number, idxMember)); + + if (pGrpInfo->ActiveProcessorMask & RT_BIT_64(idxMember)) + { + RTCpuSetAddByIndex(&g_rtMpNtCpuSet, idxCpu); + cActive++; + } + else + cInactive++; /* (This is a little unexpected, but not important as long as things add up below.) */ + } + else + { + /* Must be not present / inactive when KeGetProcessorIndexFromNumber fails. */ + MY_CHECK_BREAK(!(pGrpInfo->ActiveProcessorMask & RT_BIT_64(idxMember)), + ("IPRT: Fatal: KeGetProcessorIndexFromNumber(%u/%u) failed but CPU is active! cMax=%u cActive=%u fActive=%p\n", + idxGroup, idxMember, pGrpInfo->MaximumProcessorCount, pGrpInfo->ActiveProcessorCount, + pGrpInfo->ActiveProcessorMask)); + cInactive++; + if (idxCpuSetNextInactive >= g_cRtMpNtActiveCpus) + { + g_aRtMpNtCpuGroups[idxGroup].aidxCpuSetMembers[idxMember] = idxCpuSetNextInactive; +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + g_aidRtMpNtByCpuSetIdx[idxCpuSetNextInactive] = RTMPCPUID_FROM_GROUP_AND_NUMBER(idxGroup, idxMember) + | RTMPNT_ID_F_INACTIVE; +#endif + idxCpuSetNextInactive--; + } + } + } + } + + MY_CHECK(cInactive + cActive <= g_cRtMpNtMaxCpus, /* ASSUMPTION #5 (not '==' because of inactive groups) */ + ("IPRT: Fatal: cInactive=%u + cActive=%u > g_cRtMpNtMaxCpus=%u\n", cInactive, cActive, g_cRtMpNtMaxCpus)); + + /* Deal with inactive groups using KeQueryMaximumProcessorCountEx or as + best as we can by as best we can by stipulating maximum member counts + from the previous group. */ + if ( RT_SUCCESS(rc) + && idxGroup < pInfo->Group.MaximumGroupCount) + { + uint16_t cInactiveLeft = g_cRtMpNtMaxCpus - (cInactive + cActive); + while (idxGroup < pInfo->Group.MaximumGroupCount) + { + uint32_t cMaxMembers = 0; + if (g_pfnrtKeQueryMaximumProcessorCountEx) + cMaxMembers = g_pfnrtKeQueryMaximumProcessorCountEx(idxGroup); + if (cMaxMembers != 0 || cInactiveLeft == 0) + AssertStmt(cMaxMembers <= cInactiveLeft, cMaxMembers = cInactiveLeft); + else + { + uint16_t cGroupsLeft = pInfo->Group.MaximumGroupCount - idxGroup; + cMaxMembers = pInfo->Group.GroupInfo[idxGroup - 1].MaximumProcessorCount; + while (cMaxMembers * cGroupsLeft < cInactiveLeft) + cMaxMembers++; + if (cMaxMembers > cInactiveLeft) + cMaxMembers = cInactiveLeft; + } + + g_aRtMpNtCpuGroups[idxGroup].cMaxCpus = (uint16_t)cMaxMembers; + g_aRtMpNtCpuGroups[idxGroup].cActiveCpus = 0; + for (uint16_t idxMember = 0; idxMember < cMaxMembers; idxMember++) + if (idxCpuSetNextInactive >= g_cRtMpNtActiveCpus) + { + g_aRtMpNtCpuGroups[idxGroup].aidxCpuSetMembers[idxMember] = idxCpuSetNextInactive; +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + g_aidRtMpNtByCpuSetIdx[idxCpuSetNextInactive] = RTMPCPUID_FROM_GROUP_AND_NUMBER(idxGroup, idxMember) + | RTMPNT_ID_F_INACTIVE; +#endif + idxCpuSetNextInactive--; + } + cInactiveLeft -= cMaxMembers; + idxGroup++; + } + } + + /* We're done with pInfo now, free it so we can start returning when assertions fail. */ + RTMemFree(pInfo); + if (RT_FAILURE(rc)) /* MY_CHECK_BREAK sets rc. */ + return rc; + MY_CHECK_RETURN(cActive >= g_cRtMpNtActiveCpus, + ("IPRT: Fatal: cActive=%u < g_cRtMpNtActiveCpus=%u - CPUs removed?\n", cActive, g_cRtMpNtActiveCpus), + VERR_INTERNAL_ERROR_3); + MY_CHECK_RETURN(idxCpuMax < cActive, /* ASSUMPTION #2 */ + ("IPRT: Fatal: idCpuMax=%u >= cActive=%u! Unexpected CPU index allocation. CPUs removed?\n", + idxCpuMax, cActive), + VERR_INTERNAL_ERROR_4); + + /* Retry if CPUs were added. */ + if ( cActive != g_cRtMpNtActiveCpus + && cTries < 16) + continue; + MY_CHECK_RETURN(cActive == g_cRtMpNtActiveCpus, /* ASSUMPTION #4 */ + ("IPRT: Fatal: cActive=%u != g_cRtMpNtActiveCpus=%u\n", cActive, g_cRtMpNtActiveCpus), + VERR_INTERNAL_ERROR_5); + } + else + { + /* Legacy: */ + MY_CHECK_RETURN(g_cRtMpNtMaxGroups == 1, ("IPRT: Fatal: Missing KeQueryLogicalProcessorRelationship!\n"), + VERR_SYMBOL_NOT_FOUND); + + /** @todo Is it possible that the affinity mask returned by + * KeQueryActiveProcessors is sparse? */ + if (g_pfnrtKeQueryActiveProcessors) + RTCpuSetFromU64(&g_rtMpNtCpuSet, g_pfnrtKeQueryActiveProcessors()); + else if (g_cRtMpNtMaxCpus < 64) + RTCpuSetFromU64(&g_rtMpNtCpuSet, (UINT64_C(1) << g_cRtMpNtMaxCpus) - 1); + else + { + MY_CHECK_RETURN(g_cRtMpNtMaxCpus == 64, ("IPRT: Fatal: g_cRtMpNtMaxCpus=%u, expect 64 or less\n", g_cRtMpNtMaxCpus), + VERR_MP_TOO_MANY_CPUS); + RTCpuSetFromU64(&g_rtMpNtCpuSet, UINT64_MAX); + } + + g_aRtMpNtCpuGroups[0].cMaxCpus = g_cRtMpNtMaxCpus; + g_aRtMpNtCpuGroups[0].cActiveCpus = g_cRtMpNtMaxCpus; + for (unsigned i = 0; i < g_cRtMpNtMaxCpus; i++) + { + g_aRtMpNtCpuGroups[0].aidxCpuSetMembers[i] = i; +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + g_aidRtMpNtByCpuSetIdx[i] = RTMPCPUID_FROM_GROUP_AND_NUMBER(0, i); +#endif + } + } + + /* + * Register CPU hot plugging callback (it also counts active CPUs). + */ + Assert(g_pvMpCpuChangeCallback == NULL); + if (g_pfnrtKeRegisterProcessorChangeCallback) + { + MY_CHECK_RETURN(g_pfnrtKeDeregisterProcessorChangeCallback, + ("IPRT: Fatal: KeRegisterProcessorChangeCallback without KeDeregisterProcessorChangeCallback!\n"), + VERR_SYMBOL_NOT_FOUND); + + RTCPUSET const ActiveSetCopy = g_rtMpNtCpuSet; + RTCpuSetEmpty(&g_rtMpNtCpuSet); + uint32_t const cActiveCpus = g_cRtMpNtActiveCpus; + g_cRtMpNtActiveCpus = 0; + + g_pvMpCpuChangeCallback = g_pfnrtKeRegisterProcessorChangeCallback(rtR0NtMpProcessorChangeCallback, NULL /*pvUser*/, + KE_PROCESSOR_CHANGE_ADD_EXISTING); + if (g_pvMpCpuChangeCallback) + { + if (cActiveCpus == g_cRtMpNtActiveCpus) + { /* likely */ } + else + { + g_pfnrtKeDeregisterProcessorChangeCallback(g_pvMpCpuChangeCallback); + if (cTries < 16) + { + /* Retry if CPUs were added. */ + MY_CHECK_RETURN(g_cRtMpNtActiveCpus >= cActiveCpus, + ("IPRT: Fatal: g_cRtMpNtActiveCpus=%u < cActiveCpus=%u! CPUs removed?\n", + g_cRtMpNtActiveCpus, cActiveCpus), + VERR_INTERNAL_ERROR_2); + MY_CHECK_RETURN(g_cRtMpNtActiveCpus <= g_cRtMpNtMaxCpus, + ("IPRT: Fatal: g_cRtMpNtActiveCpus=%u > g_cRtMpNtMaxCpus=%u!\n", + g_cRtMpNtActiveCpus, g_cRtMpNtMaxCpus), + VERR_INTERNAL_ERROR_2); + continue; + } + MY_CHECK_RETURN(0, ("IPRT: Fatal: g_cRtMpNtActiveCpus=%u cActiveCpus=%u\n", g_cRtMpNtActiveCpus, cActiveCpus), + VERR_INTERNAL_ERROR_3); + } + } + else + { + AssertFailed(); + g_rtMpNtCpuSet = ActiveSetCopy; + g_cRtMpNtActiveCpus = cActiveCpus; + } + } + break; + } /* Retry loop for stable active CPU count. */ + +#undef MY_CHECK_RETURN + + /* + * Special IPI fun for RTMpPokeCpu. + * + * On Vista and later the DPC method doesn't seem to reliably send IPIs, + * so we have to use alternative methods. + * + * On AMD64 We used to use the HalSendSoftwareInterrupt API (also x86 on + * W10+), it looks faster and more convenient to use, however we're either + * using it wrong or it doesn't reliably do what we want (see @bugref{8343}). + * + * The HalRequestIpip API is thus far the only alternative to KeInsertQueueDpc + * for doing targetted IPIs. Trouble with this API is that it changed + * fundamentally in Window 7 when they added support for lots of processors. + * + * If we really think we cannot use KeInsertQueueDpc, we use the broadcast IPI + * API KeIpiGenericCall. + */ + if ( pOsVerInfo->uMajorVer > 6 + || (pOsVerInfo->uMajorVer == 6 && pOsVerInfo->uMinorVer > 0)) + g_pfnrtHalRequestIpiPreW7 = NULL; + else + g_pfnrtHalRequestIpiW7Plus = NULL; + + if ( g_pfnrtHalRequestIpiW7Plus + && g_pfnrtKeInitializeAffinityEx + && g_pfnrtKeAddProcessorAffinityEx + && g_pfnrtKeGetProcessorIndexFromNumber) + { + /* Determine the real size of the KAFFINITY_EX structure. */ + size_t const cbAffinity = _8K; + PKAFFINITY_EX pAffinity = (PKAFFINITY_EX)RTMemAllocZ(cbAffinity); + AssertReturn(pAffinity, VERR_NO_MEMORY); + size_t const cMaxEntries = (cbAffinity - RT_UOFFSETOF(KAFFINITY_EX, Bitmap[0])) / sizeof(pAffinity->Bitmap[0]); + g_pfnrtKeInitializeAffinityEx(pAffinity); + if (pAffinity->Size > 1 && pAffinity->Size <= cMaxEntries) + { + g_cRtMpNtKaffinityExEntries = pAffinity->Size; + g_cbRtMpNtKaffinityEx = pAffinity->Size * sizeof(pAffinity->Bitmap[0]) + RT_UOFFSETOF(KAFFINITY_EX, Bitmap[0]); + g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingHalRequestIpiW7Plus; + RTMemFree(pAffinity); + DbgPrint("IPRT: RTMpPoke => rtMpPokeCpuUsingHalRequestIpiW7Plus\n"); + return VINF_SUCCESS; + } + DbgPrint("IPRT: RTMpPoke can't use rtMpPokeCpuUsingHalRequestIpiW7Plus! pAffinity->Size=%u\n", pAffinity->Size); + AssertReleaseMsg(pAffinity->Size <= cMaxEntries, ("%#x\n", pAffinity->Size)); /* stack is toast if larger (32768 CPUs). */ + RTMemFree(pAffinity); + } + + if (pOsVerInfo->uMajorVer >= 6 && g_pfnrtKeIpiGenericCall) + { + DbgPrint("IPRT: RTMpPoke => rtMpPokeCpuUsingBroadcastIpi\n"); + g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingBroadcastIpi; + } + else if (g_pfnrtKeSetTargetProcessorDpc) + { + DbgPrint("IPRT: RTMpPoke => rtMpPokeCpuUsingDpc\n"); + g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingDpc; + /* Windows XP should send always send an IPI -> VERIFY */ + } + else + { + DbgPrint("IPRT: RTMpPoke => rtMpPokeCpuUsingFailureNotSupported\n"); + Assert(pOsVerInfo->uMajorVer == 3 && pOsVerInfo->uMinorVer <= 50); + g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingFailureNotSupported; + } + + return VINF_SUCCESS; +} + + +/** + * Called by rtR0TermNative. + */ +DECLHIDDEN(void) rtR0MpNtTerm(void) +{ + /* + * Deregister the processor change callback. + */ + PVOID pvMpCpuChangeCallback = g_pvMpCpuChangeCallback; + g_pvMpCpuChangeCallback = NULL; + if (pvMpCpuChangeCallback) + { + AssertReturnVoid(g_pfnrtKeDeregisterProcessorChangeCallback); + g_pfnrtKeDeregisterProcessorChangeCallback(pvMpCpuChangeCallback); + } +} + + +DECLHIDDEN(int) rtR0MpNotificationNativeInit(void) +{ + return VINF_SUCCESS; +} + + +DECLHIDDEN(void) rtR0MpNotificationNativeTerm(void) +{ +} + + +/** + * Implements the NT PROCESSOR_CALLBACK_FUNCTION callback function. + * + * This maintains the g_rtMpNtCpuSet and works MP notification callbacks. When + * registered, it's called for each active CPU in the system, avoiding racing + * CPU hotplugging (as well as testing the callback). + * + * @param pvUser User context (not used). + * @param pChangeCtx Change context (in). + * @param prcOperationStatus Operation status (in/out). + * + * @remarks ASSUMES no concurrent execution of KeProcessorAddCompleteNotify + * notification callbacks. At least during callback registration + * callout, we're owning KiDynamicProcessorLock. + * + * @remarks When registering the handler, we first get KeProcessorAddStartNotify + * callbacks for all active CPUs, and after they all succeed we get the + * KeProcessorAddCompleteNotify callbacks. + */ +static VOID __stdcall rtR0NtMpProcessorChangeCallback(void *pvUser, PKE_PROCESSOR_CHANGE_NOTIFY_CONTEXT pChangeCtx, + PNTSTATUS prcOperationStatus) +{ + RT_NOREF(pvUser, prcOperationStatus); + switch (pChangeCtx->State) + { + /* + * Check whether we can deal with the CPU, failing the start operation if we + * can't. The checks we are doing here are to avoid complicated/impossible + * cases in KeProcessorAddCompleteNotify. They are really just verify specs. + */ + case KeProcessorAddStartNotify: + { + NTSTATUS rcNt = STATUS_SUCCESS; + if (pChangeCtx->NtNumber < RTCPUSET_MAX_CPUS) + { + if (pChangeCtx->NtNumber >= g_cRtMpNtMaxCpus) + { + DbgPrint("IPRT: KeProcessorAddStartNotify failure: NtNumber=%u is higher than the max CPU count (%u)!\n", + pChangeCtx->NtNumber, g_cRtMpNtMaxCpus); + rcNt = STATUS_INTERNAL_ERROR; + } + + /* The ProcessNumber field was introduced in Windows 7. */ + PROCESSOR_NUMBER ProcNum; + if (g_pfnrtKeGetProcessorIndexFromNumber) + { + ProcNum = pChangeCtx->ProcNumber; + KEPROCESSORINDEX idxCpu = g_pfnrtKeGetProcessorIndexFromNumber(&ProcNum); + if (idxCpu != pChangeCtx->NtNumber) + { + DbgPrint("IPRT: KeProcessorAddStartNotify failure: g_pfnrtKeGetProcessorIndexFromNumber(%u.%u) -> %u, expected %u!\n", + ProcNum.Group, ProcNum.Number, idxCpu, pChangeCtx->NtNumber); + rcNt = STATUS_INTERNAL_ERROR; + } + } + else + { + ProcNum.Group = 0; + ProcNum.Number = pChangeCtx->NtNumber; + } + + if ( ProcNum.Group < RT_ELEMENTS(g_aRtMpNtCpuGroups) + && ProcNum.Number < RT_ELEMENTS(g_aRtMpNtCpuGroups[0].aidxCpuSetMembers)) + { + if (ProcNum.Group >= g_cRtMpNtMaxGroups) + { + DbgPrint("IPRT: KeProcessorAddStartNotify failure: %u.%u is out of range - max groups: %u!\n", + ProcNum.Group, ProcNum.Number, g_cRtMpNtMaxGroups); + rcNt = STATUS_INTERNAL_ERROR; + } + + if (ProcNum.Number < g_aRtMpNtCpuGroups[ProcNum.Group].cMaxCpus) + { + Assert(g_aRtMpNtCpuGroups[ProcNum.Group].aidxCpuSetMembers[ProcNum.Number] != -1); + if (g_aRtMpNtCpuGroups[ProcNum.Group].aidxCpuSetMembers[ProcNum.Number] == -1) + { + DbgPrint("IPRT: KeProcessorAddStartNotify failure: Internal error! %u.%u was assigned -1 as set index!\n", + ProcNum.Group, ProcNum.Number); + rcNt = STATUS_INTERNAL_ERROR; + } + + Assert(g_aidRtMpNtByCpuSetIdx[pChangeCtx->NtNumber] != NIL_RTCPUID); + if (g_aidRtMpNtByCpuSetIdx[pChangeCtx->NtNumber] == NIL_RTCPUID) + { + DbgPrint("IPRT: KeProcessorAddStartNotify failure: Internal error! %u (%u.%u) translates to NIL_RTCPUID!\n", + pChangeCtx->NtNumber, ProcNum.Group, ProcNum.Number); + rcNt = STATUS_INTERNAL_ERROR; + } + } + else + { + DbgPrint("IPRT: KeProcessorAddStartNotify failure: max processors in group %u is %u, cannot add %u to it!\n", + ProcNum.Group, g_aRtMpNtCpuGroups[ProcNum.Group].cMaxCpus, ProcNum.Group, ProcNum.Number); + rcNt = STATUS_INTERNAL_ERROR; + } + } + else + { + DbgPrint("IPRT: KeProcessorAddStartNotify failure: %u.%u is out of range (max %u.%u)!\n", + ProcNum.Group, ProcNum.Number, RT_ELEMENTS(g_aRtMpNtCpuGroups), RT_ELEMENTS(g_aRtMpNtCpuGroups[0].aidxCpuSetMembers)); + rcNt = STATUS_INTERNAL_ERROR; + } + } + else + { + DbgPrint("IPRT: KeProcessorAddStartNotify failure: NtNumber=%u is outside RTCPUSET_MAX_CPUS (%u)!\n", + pChangeCtx->NtNumber, RTCPUSET_MAX_CPUS); + rcNt = STATUS_INTERNAL_ERROR; + } + if (!NT_SUCCESS(rcNt)) + *prcOperationStatus = rcNt; + break; + } + + /* + * Update the globals. Since we've checked out range limits and other + * limitations already we just AssertBreak here. + */ + case KeProcessorAddCompleteNotify: + { + /* + * Calc the processor number and assert conditions checked in KeProcessorAddStartNotify. + */ + AssertBreak(pChangeCtx->NtNumber < RTCPUSET_MAX_CPUS); + AssertBreak(pChangeCtx->NtNumber < g_cRtMpNtMaxCpus); + Assert(pChangeCtx->NtNumber == g_cRtMpNtActiveCpus); /* light assumption */ + PROCESSOR_NUMBER ProcNum; + if (g_pfnrtKeGetProcessorIndexFromNumber) + { + ProcNum = pChangeCtx->ProcNumber; + AssertBreak(g_pfnrtKeGetProcessorIndexFromNumber(&ProcNum) == pChangeCtx->NtNumber); + AssertBreak(ProcNum.Group < RT_ELEMENTS(g_aRtMpNtCpuGroups)); + AssertBreak(ProcNum.Group < g_cRtMpNtMaxGroups); + } + else + { + ProcNum.Group = 0; + ProcNum.Number = pChangeCtx->NtNumber; + } + AssertBreak(ProcNum.Number < RT_ELEMENTS(g_aRtMpNtCpuGroups[ProcNum.Group].aidxCpuSetMembers)); + AssertBreak(ProcNum.Number < g_aRtMpNtCpuGroups[ProcNum.Group].cMaxCpus); + AssertBreak(g_aRtMpNtCpuGroups[ProcNum.Group].aidxCpuSetMembers[ProcNum.Number] != -1); + AssertBreak(g_aidRtMpNtByCpuSetIdx[pChangeCtx->NtNumber] != NIL_RTCPUID); + + /* + * Add ourselves to the online CPU set and update the active CPU count. + */ + RTCpuSetAddByIndex(&g_rtMpNtCpuSet, pChangeCtx->NtNumber); + ASMAtomicIncU32(&g_cRtMpNtActiveCpus); + + /* + * Update the group info. + * + * If the index prediction failed (real hotplugging callbacks only) we + * have to switch it around. This is particularly annoying when we + * use the index as the ID. + */ +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + RTCPUID idCpu = RTMPCPUID_FROM_GROUP_AND_NUMBER(ProcNum.Group, ProcNum.Number); + RTCPUID idOld = g_aidRtMpNtByCpuSetIdx[pChangeCtx->NtNumber]; + if ((idOld & ~RTMPNT_ID_F_INACTIVE) != idCpu) + { + Assert(idOld & RTMPNT_ID_F_INACTIVE); + int idxDest = g_aRtMpNtCpuGroups[ProcNum.Group].aidxCpuSetMembers[ProcNum.Number]; + g_aRtMpNtCpuGroups[rtMpCpuIdGetGroup(idOld)].aidxCpuSetMembers[rtMpCpuIdGetGroupMember(idOld)] = idxDest; + g_aidRtMpNtByCpuSetIdx[idxDest] = idOld; + } + g_aidRtMpNtByCpuSetIdx[pChangeCtx->NtNumber] = idCpu; +#else + Assert(g_aidRtMpNtByCpuSetIdx[pChangeCtx->NtNumber] == pChangeCtx->NtNumber); + int idxDest = g_aRtMpNtCpuGroups[ProcNum.Group].aidxCpuSetMembers[ProcNum.Number]; + if ((ULONG)idxDest != pChangeCtx->NtNumber) + { + bool fFound = false; + uint32_t idxOldGroup = g_cRtMpNtMaxGroups; + while (idxOldGroup-- > 0 && !fFound) + { + uint32_t idxMember = g_aRtMpNtCpuGroups[idxOldGroup].cMaxCpus; + while (idxMember-- > 0) + if (g_aRtMpNtCpuGroups[idxOldGroup].aidxCpuSetMembers[idxMember] == (int)pChangeCtx->NtNumber) + { + g_aRtMpNtCpuGroups[idxOldGroup].aidxCpuSetMembers[idxMember] = idxDest; + fFound = true; + break; + } + } + Assert(fFound); + } +#endif + g_aRtMpNtCpuGroups[ProcNum.Group].aidxCpuSetMembers[ProcNum.Number] = pChangeCtx->NtNumber; + + /* + * Do MP notification callbacks. + */ + rtMpNotificationDoCallbacks(RTMPEVENT_ONLINE, pChangeCtx->NtNumber); + break; + } + + case KeProcessorAddFailureNotify: + /* ignore */ + break; + + default: + AssertMsgFailed(("State=%u\n", pChangeCtx->State)); + } +} + + +/** + * Wrapper around KeQueryLogicalProcessorRelationship. + * + * @returns IPRT status code. + * @param ppInfo Where to return the info. Pass to RTMemFree when done. + */ +static int rtR0NtInitQueryGroupRelations(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX **ppInfo) +{ + ULONG cbInfo = sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) + + g_cRtMpNtMaxGroups * sizeof(GROUP_RELATIONSHIP); + NTSTATUS rcNt; + do + { + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)RTMemAlloc(cbInfo); + if (pInfo) + { + rcNt = g_pfnrtKeQueryLogicalProcessorRelationship(NULL /*pProcNumber*/, RelationGroup, pInfo, &cbInfo); + if (NT_SUCCESS(rcNt)) + { + *ppInfo = pInfo; + return VINF_SUCCESS; + } + + RTMemFree(pInfo); + pInfo = NULL; + } + else + rcNt = STATUS_NO_MEMORY; + } while (rcNt == STATUS_INFO_LENGTH_MISMATCH); + DbgPrint("IPRT: Fatal: KeQueryLogicalProcessorRelationship failed: %#x\n", rcNt); + AssertMsgFailed(("KeQueryLogicalProcessorRelationship failed: %#x\n", rcNt)); + return RTErrConvertFromNtStatus(rcNt); +} + + + + + +RTDECL(RTCPUID) RTMpCpuId(void) +{ + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + PROCESSOR_NUMBER ProcNum; + ProcNum.Group = 0; + if (g_pfnrtKeGetCurrentProcessorNumberEx) + { + ProcNum.Number = 0; + g_pfnrtKeGetCurrentProcessorNumberEx(&ProcNum); + } + else + ProcNum.Number = KeGetCurrentProcessorNumber(); /* Number is 8-bit, so we're not subject to BYTE -> WORD upgrade in WDK. */ + return RTMPCPUID_FROM_GROUP_AND_NUMBER(ProcNum.Group, ProcNum.Number); + +#else + + if (g_pfnrtKeGetCurrentProcessorNumberEx) + { + KEPROCESSORINDEX idxCpu = g_pfnrtKeGetCurrentProcessorNumberEx(NULL); + Assert(idxCpu < RTCPUSET_MAX_CPUS); + return idxCpu; + } + + return (uint8_t)KeGetCurrentProcessorNumber(); /* PCR->Number was changed from BYTE to WORD in the WDK, thus the cast. */ +#endif +} + + +RTDECL(int) RTMpCurSetIndex(void) +{ +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + + if (g_pfnrtKeGetCurrentProcessorNumberEx) + { + KEPROCESSORINDEX idxCpu = g_pfnrtKeGetCurrentProcessorNumberEx(NULL); + Assert(idxCpu < RTCPUSET_MAX_CPUS); + return idxCpu; + } + return (uint8_t)KeGetCurrentProcessorNumber(); /* PCR->Number was changed from BYTE to WORD in the WDK, thus the cast. */ +#else + return (int)RTMpCpuId(); +#endif +} + + +RTDECL(int) RTMpCurSetIndexAndId(PRTCPUID pidCpu) +{ +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + + PROCESSOR_NUMBER ProcNum = { 0 , 0, 0 }; + KEPROCESSORINDEX idxCpu = g_pfnrtKeGetCurrentProcessorNumberEx(&ProcNum); + Assert(idxCpu < RTCPUSET_MAX_CPUS); + *pidCpu = RTMPCPUID_FROM_GROUP_AND_NUMBER(ProcNum.Group, ProcNum.Number); + return idxCpu; +#else + return *pidCpu = RTMpCpuId(); +#endif +} + + +RTDECL(int) RTMpCpuIdToSetIndex(RTCPUID idCpu) +{ +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + + if (idCpu != NIL_RTCPUID) + { + if (g_pfnrtKeGetProcessorIndexFromNumber) + { + PROCESSOR_NUMBER ProcNum; + ProcNum.Group = rtMpCpuIdGetGroup(idCpu); + ProcNum.Number = rtMpCpuIdGetGroupMember(idCpu); + ProcNum.Reserved = 0; + KEPROCESSORINDEX idxCpu = g_pfnrtKeGetProcessorIndexFromNumber(&ProcNum); + if (idxCpu != INVALID_PROCESSOR_INDEX) + { + Assert(idxCpu < g_cRtMpNtMaxCpus); + Assert((ULONG)g_aRtMpNtCpuGroups[ProcNum.Group].aidxCpuSetMembers[ProcNum.Number] == idxCpu); + return idxCpu; + } + + /* Since NT assigned indexes as the CPUs come online, we cannot produce an ID <-> index + mapping for not-yet-onlined CPUS that is consistent. We just have to do our best... */ + if ( ProcNum.Group < g_cRtMpNtMaxGroups + && ProcNum.Number < g_aRtMpNtCpuGroups[ProcNum.Group].cMaxCpus) + return g_aRtMpNtCpuGroups[ProcNum.Group].aidxCpuSetMembers[ProcNum.Number]; + } + else if (rtMpCpuIdGetGroup(idCpu) == 0) + return rtMpCpuIdGetGroupMember(idCpu); + } + return -1; +#else + /* 1:1 mapping, just do range checks. */ + return idCpu < RTCPUSET_MAX_CPUS ? (int)idCpu : -1; +#endif +} + + +RTDECL(RTCPUID) RTMpCpuIdFromSetIndex(int iCpu) +{ +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + + if ((unsigned)iCpu < g_cRtMpNtMaxCpus) + { + if (g_pfnrtKeGetProcessorIndexFromNumber) + { + PROCESSOR_NUMBER ProcNum = { 0, 0, 0 }; + NTSTATUS rcNt = g_pfnrtKeGetProcessorNumberFromIndex(iCpu, &ProcNum); + if (NT_SUCCESS(rcNt)) + { + Assert(ProcNum.Group <= g_cRtMpNtMaxGroups); + Assert( (g_aidRtMpNtByCpuSetIdx[iCpu] & ~RTMPNT_ID_F_INACTIVE) + == RTMPCPUID_FROM_GROUP_AND_NUMBER(ProcNum.Group, ProcNum.Number)); + return RTMPCPUID_FROM_GROUP_AND_NUMBER(ProcNum.Group, ProcNum.Number); + } + } + return g_aidRtMpNtByCpuSetIdx[iCpu]; + } + return NIL_RTCPUID; +#else + /* 1:1 mapping, just do range checks. */ + return (unsigned)iCpu < RTCPUSET_MAX_CPUS ? iCpu : NIL_RTCPUID; +#endif +} + + +RTDECL(int) RTMpSetIndexFromCpuGroupMember(uint32_t idxGroup, uint32_t idxMember) +{ + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + + if (idxGroup < g_cRtMpNtMaxGroups) + if (idxMember < g_aRtMpNtCpuGroups[idxGroup].cMaxCpus) + return g_aRtMpNtCpuGroups[idxGroup].aidxCpuSetMembers[idxMember]; + return -1; +} + + +RTDECL(uint32_t) RTMpGetCpuGroupCounts(uint32_t idxGroup, uint32_t *pcActive) +{ + if (idxGroup < g_cRtMpNtMaxGroups) + { + if (pcActive) + *pcActive = g_aRtMpNtCpuGroups[idxGroup].cActiveCpus; + return g_aRtMpNtCpuGroups[idxGroup].cMaxCpus; + } + if (pcActive) + *pcActive = 0; + return 0; +} + + +RTDECL(uint32_t) RTMpGetMaxCpuGroupCount(void) +{ + return g_cRtMpNtMaxGroups; +} + + +RTDECL(RTCPUID) RTMpGetMaxCpuId(void) +{ + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + return RTMPCPUID_FROM_GROUP_AND_NUMBER(g_cRtMpNtMaxGroups - 1, g_aRtMpNtCpuGroups[g_cRtMpNtMaxGroups - 1].cMaxCpus - 1); +#else + /* According to MSDN the processor indexes goes from 0 to the maximum + number of CPUs in the system. We've check this in initterm-r0drv-nt.cpp. */ + return g_cRtMpNtMaxCpus - 1; +#endif +} + + +RTDECL(bool) RTMpIsCpuOnline(RTCPUID idCpu) +{ + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + return RTCpuSetIsMember(&g_rtMpNtCpuSet, idCpu); +} + + +RTDECL(bool) RTMpIsCpuPossible(RTCPUID idCpu) +{ + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + +#ifdef IPRT_WITH_RTCPUID_AS_GROUP_AND_NUMBER + if (idCpu != NIL_RTCPUID) + { + unsigned idxGroup = rtMpCpuIdGetGroup(idCpu); + if (idxGroup < g_cRtMpNtMaxGroups) + return rtMpCpuIdGetGroupMember(idCpu) < g_aRtMpNtCpuGroups[idxGroup].cMaxCpus; + } + return false; + +#else + /* A possible CPU ID is one with a value lower than g_cRtMpNtMaxCpus (see + comment in RTMpGetMaxCpuId). */ + return idCpu < g_cRtMpNtMaxCpus; +#endif +} + + + +RTDECL(PRTCPUSET) RTMpGetSet(PRTCPUSET pSet) +{ + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + + /* The set of possible CPU IDs(/indexes) are from 0 up to + g_cRtMpNtMaxCpus (see comment in RTMpGetMaxCpuId). */ + RTCpuSetEmpty(pSet); + int idxCpu = g_cRtMpNtMaxCpus; + while (idxCpu-- > 0) + RTCpuSetAddByIndex(pSet, idxCpu); + return pSet; +} + + +RTDECL(RTCPUID) RTMpGetCount(void) +{ + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + return g_cRtMpNtMaxCpus; +} + + +RTDECL(PRTCPUSET) RTMpGetOnlineSet(PRTCPUSET pSet) +{ + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + + *pSet = g_rtMpNtCpuSet; + return pSet; +} + + +RTDECL(RTCPUID) RTMpGetOnlineCount(void) +{ + RTCPUSET Set; + RTMpGetOnlineSet(&Set); + return RTCpuSetCount(&Set); +} + + +RTDECL(RTCPUID) RTMpGetOnlineCoreCount(void) +{ + /** @todo fix me */ + return RTMpGetOnlineCount(); +} + + + +#if 0 +/* Experiment with checking the undocumented KPRCB structure + * 'dt nt!_kprcb 0xaddress' shows the layout + */ +typedef struct +{ + LIST_ENTRY DpcListHead; + ULONG_PTR DpcLock; + volatile ULONG DpcQueueDepth; + ULONG DpcQueueCount; +} KDPC_DATA, *PKDPC_DATA; + +RTDECL(bool) RTMpIsCpuWorkPending(void) +{ + uint8_t *pkprcb; + PKDPC_DATA pDpcData; + + _asm { + mov eax, fs:0x20 + mov pkprcb, eax + } + pDpcData = (PKDPC_DATA)(pkprcb + 0x19e0); + if (pDpcData->DpcQueueDepth) + return true; + + pDpcData++; + if (pDpcData->DpcQueueDepth) + return true; + return false; +} +#else +RTDECL(bool) RTMpIsCpuWorkPending(void) +{ + /** @todo not implemented */ + return false; +} +#endif + + +/** + * Wrapper between the native KIPI_BROADCAST_WORKER and IPRT's PFNRTMPWORKER for + * the RTMpOnAll case. + * + * @param uUserCtx The user context argument (PRTMPARGS). + */ +static ULONG_PTR rtmpNtOnAllBroadcastIpiWrapper(ULONG_PTR uUserCtx) +{ + PRTMPARGS pArgs = (PRTMPARGS)uUserCtx; + /*ASMAtomicIncU32(&pArgs->cHits); - not needed */ + pArgs->pfnWorker(RTMpCpuId(), pArgs->pvUser1, pArgs->pvUser2); + return 0; +} + + +/** + * Wrapper between the native KIPI_BROADCAST_WORKER and IPRT's PFNRTMPWORKER for + * the RTMpOnOthers case. + * + * @param uUserCtx The user context argument (PRTMPARGS). + */ +static ULONG_PTR rtmpNtOnOthersBroadcastIpiWrapper(ULONG_PTR uUserCtx) +{ + PRTMPARGS pArgs = (PRTMPARGS)uUserCtx; + RTCPUID idCpu = RTMpCpuId(); + if (pArgs->idCpu != idCpu) + { + /*ASMAtomicIncU32(&pArgs->cHits); - not needed */ + pArgs->pfnWorker(idCpu, pArgs->pvUser1, pArgs->pvUser2); + } + return 0; +} + + +/** + * Wrapper between the native KIPI_BROADCAST_WORKER and IPRT's PFNRTMPWORKER for + * the RTMpOnPair case. + * + * @param uUserCtx The user context argument (PRTMPARGS). + */ +static ULONG_PTR rtmpNtOnPairBroadcastIpiWrapper(ULONG_PTR uUserCtx) +{ + PRTMPARGS pArgs = (PRTMPARGS)uUserCtx; + RTCPUID idCpu = RTMpCpuId(); + if ( pArgs->idCpu == idCpu + || pArgs->idCpu2 == idCpu) + { + ASMAtomicIncU32(&pArgs->cHits); + pArgs->pfnWorker(idCpu, pArgs->pvUser1, pArgs->pvUser2); + } + return 0; +} + + +/** + * Wrapper between the native KIPI_BROADCAST_WORKER and IPRT's PFNRTMPWORKER for + * the RTMpOnSpecific case. + * + * @param uUserCtx The user context argument (PRTMPARGS). + */ +static ULONG_PTR rtmpNtOnSpecificBroadcastIpiWrapper(ULONG_PTR uUserCtx) +{ + PRTMPARGS pArgs = (PRTMPARGS)uUserCtx; + RTCPUID idCpu = RTMpCpuId(); + if (pArgs->idCpu == idCpu) + { + ASMAtomicIncU32(&pArgs->cHits); + pArgs->pfnWorker(idCpu, pArgs->pvUser1, pArgs->pvUser2); + } + return 0; +} + + +/** + * Internal worker for the RTMpOn* APIs using KeIpiGenericCall. + * + * @returns VINF_SUCCESS. + * @param pfnWorker The callback. + * @param pvUser1 User argument 1. + * @param pvUser2 User argument 2. + * @param pfnNativeWrapper The wrapper between the NT and IPRT callbacks. + * @param idCpu First CPU to match, ultimately specific to the + * pfnNativeWrapper used. + * @param idCpu2 Second CPU to match, ultimately specific to the + * pfnNativeWrapper used. + * @param pcHits Where to return the number of this. Optional. + */ +static int rtMpCallUsingBroadcastIpi(PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2, + PKIPI_BROADCAST_WORKER pfnNativeWrapper, RTCPUID idCpu, RTCPUID idCpu2, + uint32_t *pcHits) +{ + RTMPARGS Args; + Args.pfnWorker = pfnWorker; + Args.pvUser1 = pvUser1; + Args.pvUser2 = pvUser2; + Args.idCpu = idCpu; + Args.idCpu2 = idCpu2; + Args.cRefs = 0; + Args.cHits = 0; + + AssertPtr(g_pfnrtKeIpiGenericCall); + g_pfnrtKeIpiGenericCall(pfnNativeWrapper, (uintptr_t)&Args); + if (pcHits) + *pcHits = Args.cHits; + return VINF_SUCCESS; +} + + +/** + * Wrapper between the native nt per-cpu callbacks and PFNRTWORKER + * + * @param Dpc DPC object + * @param DeferredContext Context argument specified by KeInitializeDpc + * @param SystemArgument1 Argument specified by KeInsertQueueDpc + * @param SystemArgument2 Argument specified by KeInsertQueueDpc + */ +static VOID rtmpNtDPCWrapper(IN PKDPC Dpc, IN PVOID DeferredContext, IN PVOID SystemArgument1, IN PVOID SystemArgument2) +{ + PRTMPARGS pArgs = (PRTMPARGS)DeferredContext; + RT_NOREF3(Dpc, SystemArgument1, SystemArgument2); + + ASMAtomicIncU32(&pArgs->cHits); + pArgs->pfnWorker(RTMpCpuId(), pArgs->pvUser1, pArgs->pvUser2); + + /* Dereference the argument structure. */ + int32_t cRefs = ASMAtomicDecS32(&pArgs->cRefs); + Assert(cRefs >= 0); + if (cRefs == 0) + RTMemFree(pArgs); +} + + +/** + * Wrapper around KeSetTargetProcessorDpcEx / KeSetTargetProcessorDpc. + * + * This is shared with the timer code. + * + * @returns IPRT status code (errors are asserted). + * @param pDpc The DPC. + * @param idCpu The ID of the new target CPU. + */ +DECLHIDDEN(int) rtMpNtSetTargetProcessorDpc(KDPC *pDpc, RTCPUID idCpu) +{ + if (g_pfnrtKeSetTargetProcessorDpcEx) + { + /* Convert to stupid process number (bet KeSetTargetProcessorDpcEx does + the reverse conversion internally). */ + PROCESSOR_NUMBER ProcNum; + NTSTATUS rcNt = g_pfnrtKeGetProcessorNumberFromIndex(RTMpCpuIdToSetIndex(idCpu), &ProcNum); + AssertMsgReturn(NT_SUCCESS(rcNt), + ("KeGetProcessorNumberFromIndex(%u) -> %#x\n", idCpu, rcNt), + RTErrConvertFromNtStatus(rcNt)); + + rcNt = g_pfnrtKeSetTargetProcessorDpcEx(pDpc, &ProcNum); + AssertMsgReturn(NT_SUCCESS(rcNt), + ("KeSetTargetProcessorDpcEx(,%u(%u/%u)) -> %#x\n", idCpu, ProcNum.Group, ProcNum.Number, rcNt), + RTErrConvertFromNtStatus(rcNt)); + } + else if (g_pfnrtKeSetTargetProcessorDpc) + g_pfnrtKeSetTargetProcessorDpc(pDpc, RTMpCpuIdToSetIndex(idCpu)); + else + return VERR_NOT_SUPPORTED; + return VINF_SUCCESS; +} + + +/** + * Internal worker for the RTMpOn* APIs. + * + * @returns IPRT status code. + * @param pfnWorker The callback. + * @param pvUser1 User argument 1. + * @param pvUser2 User argument 2. + * @param enmCpuid What to do / is idCpu valid. + * @param idCpu Used if enmCpuid is RT_NT_CPUID_SPECIFIC or + * RT_NT_CPUID_PAIR, otherwise ignored. + * @param idCpu2 Used if enmCpuid is RT_NT_CPUID_PAIR, otherwise ignored. + * @param pcHits Where to return the number of this. Optional. + */ +static int rtMpCallUsingDpcs(PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2, + RT_NT_CPUID enmCpuid, RTCPUID idCpu, RTCPUID idCpu2, uint32_t *pcHits) +{ +#if 0 + /* KeFlushQueuedDpcs must be run at IRQL PASSIVE_LEVEL according to MSDN, but the + * driver verifier doesn't complain... + */ + AssertMsg(KeGetCurrentIrql() == PASSIVE_LEVEL, ("%d != %d (PASSIVE_LEVEL)\n", KeGetCurrentIrql(), PASSIVE_LEVEL)); +#endif + /* KeFlushQueuedDpcs is not present in Windows 2000; import it dynamically so we can just fail this call. */ + if (!g_pfnrtNtKeFlushQueuedDpcs) + return VERR_NOT_SUPPORTED; + + /* + * Make a copy of the active CPU set and figure out how many KDPCs we really need. + * We must not try setup DPCs for CPUs which aren't there, because that may fail. + */ + RTCPUSET OnlineSet = g_rtMpNtCpuSet; + uint32_t cDpcsNeeded; + switch (enmCpuid) + { + case RT_NT_CPUID_SPECIFIC: + cDpcsNeeded = 1; + break; + case RT_NT_CPUID_PAIR: + cDpcsNeeded = 2; + break; + default: + do + { + cDpcsNeeded = g_cRtMpNtActiveCpus; + OnlineSet = g_rtMpNtCpuSet; + } while (cDpcsNeeded != g_cRtMpNtActiveCpus); + break; + } + + /* + * Allocate an RTMPARGS structure followed by cDpcsNeeded KDPCs + * and initialize them. + */ + PRTMPARGS pArgs = (PRTMPARGS)RTMemAllocZ(sizeof(RTMPARGS) + cDpcsNeeded * sizeof(KDPC)); + if (!pArgs) + return VERR_NO_MEMORY; + + pArgs->pfnWorker = pfnWorker; + pArgs->pvUser1 = pvUser1; + pArgs->pvUser2 = pvUser2; + pArgs->idCpu = NIL_RTCPUID; + pArgs->idCpu2 = NIL_RTCPUID; + pArgs->cHits = 0; + pArgs->cRefs = 1; + + int rc; + KDPC *paExecCpuDpcs = (KDPC *)(pArgs + 1); + if (enmCpuid == RT_NT_CPUID_SPECIFIC) + { + KeInitializeDpc(&paExecCpuDpcs[0], rtmpNtDPCWrapper, pArgs); + if (g_pfnrtKeSetImportanceDpc) + g_pfnrtKeSetImportanceDpc(&paExecCpuDpcs[0], HighImportance); + rc = rtMpNtSetTargetProcessorDpc(&paExecCpuDpcs[0], idCpu); + pArgs->idCpu = idCpu; + } + else if (enmCpuid == RT_NT_CPUID_PAIR) + { + KeInitializeDpc(&paExecCpuDpcs[0], rtmpNtDPCWrapper, pArgs); + if (g_pfnrtKeSetImportanceDpc) + g_pfnrtKeSetImportanceDpc(&paExecCpuDpcs[0], HighImportance); + rc = rtMpNtSetTargetProcessorDpc(&paExecCpuDpcs[0], idCpu); + pArgs->idCpu = idCpu; + + KeInitializeDpc(&paExecCpuDpcs[1], rtmpNtDPCWrapper, pArgs); + if (g_pfnrtKeSetImportanceDpc) + g_pfnrtKeSetImportanceDpc(&paExecCpuDpcs[1], HighImportance); + if (RT_SUCCESS(rc)) + rc = rtMpNtSetTargetProcessorDpc(&paExecCpuDpcs[1], (int)idCpu2); + pArgs->idCpu2 = idCpu2; + } + else + { + rc = VINF_SUCCESS; + for (uint32_t i = 0; i < cDpcsNeeded && RT_SUCCESS(rc); i++) + if (RTCpuSetIsMemberByIndex(&OnlineSet, i)) + { + KeInitializeDpc(&paExecCpuDpcs[i], rtmpNtDPCWrapper, pArgs); + if (g_pfnrtKeSetImportanceDpc) + g_pfnrtKeSetImportanceDpc(&paExecCpuDpcs[i], HighImportance); + rc = rtMpNtSetTargetProcessorDpc(&paExecCpuDpcs[i], RTMpCpuIdFromSetIndex(i)); + } + } + if (RT_FAILURE(rc)) + { + RTMemFree(pArgs); + return rc; + } + + /* + * Raise the IRQL to DISPATCH_LEVEL so we can't be rescheduled to another cpu. + * KeInsertQueueDpc must also be executed at IRQL >= DISPATCH_LEVEL. + */ + KIRQL oldIrql; + KeRaiseIrql(DISPATCH_LEVEL, &oldIrql); + + /* + * We cannot do other than assume a 1:1 relationship between the + * affinity mask and the process despite the warnings in the docs. + * If someone knows a better way to get this done, please let bird know. + */ + ASMCompilerBarrier(); /* paranoia */ + if (enmCpuid == RT_NT_CPUID_SPECIFIC) + { + ASMAtomicIncS32(&pArgs->cRefs); + BOOLEAN fRc = KeInsertQueueDpc(&paExecCpuDpcs[0], 0, 0); + Assert(fRc); NOREF(fRc); + } + else if (enmCpuid == RT_NT_CPUID_PAIR) + { + ASMAtomicIncS32(&pArgs->cRefs); + BOOLEAN fRc = KeInsertQueueDpc(&paExecCpuDpcs[0], 0, 0); + Assert(fRc); NOREF(fRc); + + ASMAtomicIncS32(&pArgs->cRefs); + fRc = KeInsertQueueDpc(&paExecCpuDpcs[1], 0, 0); + Assert(fRc); NOREF(fRc); + } + else + { + uint32_t iSelf = RTMpCurSetIndex(); + for (uint32_t i = 0; i < cDpcsNeeded; i++) + { + if ( (i != iSelf) + && RTCpuSetIsMemberByIndex(&OnlineSet, i)) + { + ASMAtomicIncS32(&pArgs->cRefs); + BOOLEAN fRc = KeInsertQueueDpc(&paExecCpuDpcs[i], 0, 0); + Assert(fRc); NOREF(fRc); + } + } + if (enmCpuid != RT_NT_CPUID_OTHERS) + pfnWorker(iSelf, pvUser1, pvUser2); + } + + KeLowerIrql(oldIrql); + + /* + * Flush all DPCs and wait for completion. (can take long!) + */ + /** @todo Consider changing this to an active wait using some atomic inc/dec + * stuff (and check for the current cpu above in the specific case). */ + /** @todo Seems KeFlushQueuedDpcs doesn't wait for the DPCs to be completely + * executed. Seen pArgs being freed while some CPU was using it before + * cRefs was added. */ + if (g_pfnrtNtKeFlushQueuedDpcs) + g_pfnrtNtKeFlushQueuedDpcs(); + + if (pcHits) + *pcHits = pArgs->cHits; + + /* Dereference the argument structure. */ + int32_t cRefs = ASMAtomicDecS32(&pArgs->cRefs); + Assert(cRefs >= 0); + if (cRefs == 0) + RTMemFree(pArgs); + + return VINF_SUCCESS; +} + + +RTDECL(int) RTMpOnAll(PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2) +{ + if (g_pfnrtKeIpiGenericCall) + return rtMpCallUsingBroadcastIpi(pfnWorker, pvUser1, pvUser2, rtmpNtOnAllBroadcastIpiWrapper, + NIL_RTCPUID, NIL_RTCPUID, NULL); + return rtMpCallUsingDpcs(pfnWorker, pvUser1, pvUser2, RT_NT_CPUID_ALL, NIL_RTCPUID, NIL_RTCPUID, NULL); +} + + +RTDECL(int) RTMpOnOthers(PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2) +{ + if (g_pfnrtKeIpiGenericCall) + return rtMpCallUsingBroadcastIpi(pfnWorker, pvUser1, pvUser2, rtmpNtOnOthersBroadcastIpiWrapper, + NIL_RTCPUID, NIL_RTCPUID, NULL); + return rtMpCallUsingDpcs(pfnWorker, pvUser1, pvUser2, RT_NT_CPUID_OTHERS, NIL_RTCPUID, NIL_RTCPUID, NULL); +} + + +RTDECL(int) RTMpOnPair(RTCPUID idCpu1, RTCPUID idCpu2, uint32_t fFlags, PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2) +{ + int rc; + AssertReturn(idCpu1 != idCpu2, VERR_INVALID_PARAMETER); + AssertReturn(!(fFlags & RTMPON_F_VALID_MASK), VERR_INVALID_FLAGS); + if ((fFlags & RTMPON_F_CONCURRENT_EXEC) && !g_pfnrtKeIpiGenericCall) + return VERR_NOT_SUPPORTED; + + /* + * Check that both CPUs are online before doing the broadcast call. + */ + if ( RTMpIsCpuOnline(idCpu1) + && RTMpIsCpuOnline(idCpu2)) + { + /* + * The broadcast IPI isn't quite as bad as it could have been, because + * it looks like windows doesn't synchronize CPUs on the way out, they + * seems to get back to normal work while the pair is still busy. + */ + uint32_t cHits = 0; + if (g_pfnrtKeIpiGenericCall) + rc = rtMpCallUsingBroadcastIpi(pfnWorker, pvUser1, pvUser2, rtmpNtOnPairBroadcastIpiWrapper, idCpu1, idCpu2, &cHits); + else + rc = rtMpCallUsingDpcs(pfnWorker, pvUser1, pvUser2, RT_NT_CPUID_PAIR, idCpu1, idCpu2, &cHits); + if (RT_SUCCESS(rc)) + { + Assert(cHits <= 2); + if (cHits == 2) + rc = VINF_SUCCESS; + else if (cHits == 1) + rc = VERR_NOT_ALL_CPUS_SHOWED; + else if (cHits == 0) + rc = VERR_CPU_OFFLINE; + else + rc = VERR_CPU_IPE_1; + } + } + /* + * A CPU must be present to be considered just offline. + */ + else if ( RTMpIsCpuPresent(idCpu1) + && RTMpIsCpuPresent(idCpu2)) + rc = VERR_CPU_OFFLINE; + else + rc = VERR_CPU_NOT_FOUND; + return rc; +} + + +RTDECL(bool) RTMpOnPairIsConcurrentExecSupported(void) +{ + return g_pfnrtKeIpiGenericCall != NULL; +} + + +/** + * Releases a reference to a RTMPNTONSPECIFICARGS heap allocation, freeing it + * when the last reference is released. + */ +DECLINLINE(void) rtMpNtOnSpecificRelease(PRTMPNTONSPECIFICARGS pArgs) +{ + uint32_t cRefs = ASMAtomicDecU32(&pArgs->cRefs); + AssertMsg(cRefs <= 1, ("cRefs=%#x\n", cRefs)); + if (cRefs == 0) + RTMemFree(pArgs); +} + + +/** + * Wrapper between the native nt per-cpu callbacks and PFNRTWORKER + * + * @param Dpc DPC object + * @param DeferredContext Context argument specified by KeInitializeDpc + * @param SystemArgument1 Argument specified by KeInsertQueueDpc + * @param SystemArgument2 Argument specified by KeInsertQueueDpc + */ +static VOID rtMpNtOnSpecificDpcWrapper(IN PKDPC Dpc, IN PVOID DeferredContext, + IN PVOID SystemArgument1, IN PVOID SystemArgument2) +{ + PRTMPNTONSPECIFICARGS pArgs = (PRTMPNTONSPECIFICARGS)DeferredContext; + RT_NOREF3(Dpc, SystemArgument1, SystemArgument2); + + ASMAtomicWriteBool(&pArgs->fExecuting, true); + + pArgs->CallbackArgs.pfnWorker(RTMpCpuId(), pArgs->CallbackArgs.pvUser1, pArgs->CallbackArgs.pvUser2); + + ASMAtomicWriteBool(&pArgs->fDone, true); + KeSetEvent(&pArgs->DoneEvt, 1 /*PriorityIncrement*/, FALSE /*Wait*/); + + rtMpNtOnSpecificRelease(pArgs); +} + + +RTDECL(int) RTMpOnSpecific(RTCPUID idCpu, PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2) +{ + /* + * Don't try mess with an offline CPU. + */ + if (!RTMpIsCpuOnline(idCpu)) + return !RTMpIsCpuPossible(idCpu) + ? VERR_CPU_NOT_FOUND + : VERR_CPU_OFFLINE; + + /* + * Use the broadcast IPI routine if there are no more than two CPUs online, + * or if the current IRQL is unsuitable for KeWaitForSingleObject. + */ + int rc; + uint32_t cHits = 0; + if ( g_pfnrtKeIpiGenericCall + && ( RTMpGetOnlineCount() <= 2 + || KeGetCurrentIrql() > APC_LEVEL) + ) + { + rc = rtMpCallUsingBroadcastIpi(pfnWorker, pvUser1, pvUser2, rtmpNtOnSpecificBroadcastIpiWrapper, + idCpu, NIL_RTCPUID, &cHits); + if (RT_SUCCESS(rc)) + { + if (cHits == 1) + return VINF_SUCCESS; + rc = cHits == 0 ? VERR_CPU_OFFLINE : VERR_CPU_IPE_1; + } + return rc; + } + +#if 0 + rc = rtMpCallUsingDpcs(pfnWorker, pvUser1, pvUser2, RT_NT_CPUID_SPECIFIC, idCpu, NIL_RTCPUID, &cHits); + if (RT_SUCCESS(rc)) + { + if (cHits == 1) + return VINF_SUCCESS; + rc = cHits == 0 ? VERR_CPU_OFFLINE : VERR_CPU_IPE_1; + } + return rc; + +#else + /* + * Initialize the argument package and the objects within it. + * The package is referenced counted to avoid unnecessary spinning to + * synchronize cleanup and prevent stack corruption. + */ + PRTMPNTONSPECIFICARGS pArgs = (PRTMPNTONSPECIFICARGS)RTMemAllocZ(sizeof(*pArgs)); + if (!pArgs) + return VERR_NO_MEMORY; + pArgs->cRefs = 2; + pArgs->fExecuting = false; + pArgs->fDone = false; + pArgs->CallbackArgs.pfnWorker = pfnWorker; + pArgs->CallbackArgs.pvUser1 = pvUser1; + pArgs->CallbackArgs.pvUser2 = pvUser2; + pArgs->CallbackArgs.idCpu = idCpu; + pArgs->CallbackArgs.cHits = 0; + pArgs->CallbackArgs.cRefs = 2; + KeInitializeEvent(&pArgs->DoneEvt, SynchronizationEvent, FALSE /* not signalled */); + KeInitializeDpc(&pArgs->Dpc, rtMpNtOnSpecificDpcWrapper, pArgs); + if (g_pfnrtKeSetImportanceDpc) + g_pfnrtKeSetImportanceDpc(&pArgs->Dpc, HighImportance); + rc = rtMpNtSetTargetProcessorDpc(&pArgs->Dpc, idCpu); + if (RT_FAILURE(rc)) + { + RTMemFree(pArgs); + return rc; + } + + /* + * Disable preemption while we check the current processor and inserts the DPC. + */ + KIRQL bOldIrql; + KeRaiseIrql(DISPATCH_LEVEL, &bOldIrql); + ASMCompilerBarrier(); /* paranoia */ + + if (RTMpCpuId() == idCpu) + { + /* Just execute the callback on the current CPU. */ + pfnWorker(idCpu, pvUser1, pvUser2); + KeLowerIrql(bOldIrql); + + RTMemFree(pArgs); + return VINF_SUCCESS; + } + + /* Different CPU, so queue it if the CPU is still online. */ + if (RTMpIsCpuOnline(idCpu)) + { + BOOLEAN fRc = KeInsertQueueDpc(&pArgs->Dpc, 0, 0); + Assert(fRc); NOREF(fRc); + KeLowerIrql(bOldIrql); + + uint64_t const nsRealWaitTS = RTTimeNanoTS(); + + /* + * Wait actively for a while in case the CPU/thread responds quickly. + */ + uint32_t cLoopsLeft = 0x20000; + while (cLoopsLeft-- > 0) + { + if (pArgs->fDone) + { + rtMpNtOnSpecificRelease(pArgs); + return VINF_SUCCESS; + } + ASMNopPause(); + } + + /* + * It didn't respond, so wait on the event object, poking the CPU if it's slow. + */ + LARGE_INTEGER Timeout; + Timeout.QuadPart = -10000; /* 1ms */ + NTSTATUS rcNt = KeWaitForSingleObject(&pArgs->DoneEvt, Executive, KernelMode, FALSE /* Alertable */, &Timeout); + if (rcNt == STATUS_SUCCESS) + { + rtMpNtOnSpecificRelease(pArgs); + return VINF_SUCCESS; + } + + /* If it hasn't respondend yet, maybe poke it and wait some more. */ + if (rcNt == STATUS_TIMEOUT) + { + if ( !pArgs->fExecuting + && ( g_pfnrtMpPokeCpuWorker == rtMpPokeCpuUsingHalRequestIpiW7Plus + || g_pfnrtMpPokeCpuWorker == rtMpPokeCpuUsingHalRequestIpiPreW7)) + RTMpPokeCpu(idCpu); + + Timeout.QuadPart = -1280000; /* 128ms */ + rcNt = KeWaitForSingleObject(&pArgs->DoneEvt, Executive, KernelMode, FALSE /* Alertable */, &Timeout); + if (rcNt == STATUS_SUCCESS) + { + rtMpNtOnSpecificRelease(pArgs); + return VINF_SUCCESS; + } + } + + /* + * Something weird is happening, try bail out. + */ + if (KeRemoveQueueDpc(&pArgs->Dpc)) + { + RTMemFree(pArgs); /* DPC was still queued, so we can return without further ado. */ + LogRel(("RTMpOnSpecific(%#x): Not processed after %llu ns: rcNt=%#x\n", idCpu, RTTimeNanoTS() - nsRealWaitTS, rcNt)); + } + else + { + /* DPC is running, wait a good while for it to complete. */ + LogRel(("RTMpOnSpecific(%#x): Still running after %llu ns: rcNt=%#x\n", idCpu, RTTimeNanoTS() - nsRealWaitTS, rcNt)); + + Timeout.QuadPart = -30*1000*1000*10; /* 30 seconds */ + rcNt = KeWaitForSingleObject(&pArgs->DoneEvt, Executive, KernelMode, FALSE /* Alertable */, &Timeout); + if (rcNt != STATUS_SUCCESS) + LogRel(("RTMpOnSpecific(%#x): Giving up on running worker after %llu ns: rcNt=%#x\n", idCpu, RTTimeNanoTS() - nsRealWaitTS, rcNt)); + } + rc = RTErrConvertFromNtStatus(rcNt); + } + else + { + /* CPU is offline.*/ + KeLowerIrql(bOldIrql); + rc = !RTMpIsCpuPossible(idCpu) ? VERR_CPU_NOT_FOUND : VERR_CPU_OFFLINE; + } + + rtMpNtOnSpecificRelease(pArgs); + return rc; +#endif +} + + + + +static VOID rtMpNtPokeCpuDummy(IN PKDPC Dpc, IN PVOID DeferredContext, IN PVOID SystemArgument1, IN PVOID SystemArgument2) +{ + NOREF(Dpc); + NOREF(DeferredContext); + NOREF(SystemArgument1); + NOREF(SystemArgument2); +} + + +/** Callback used by rtMpPokeCpuUsingBroadcastIpi. */ +static ULONG_PTR rtMpIpiGenericCall(ULONG_PTR Argument) +{ + NOREF(Argument); + return 0; +} + + +/** + * RTMpPokeCpu worker that uses broadcast IPIs for doing the work. + * + * @returns VINF_SUCCESS + * @param idCpu The CPU identifier. + */ +int rtMpPokeCpuUsingBroadcastIpi(RTCPUID idCpu) +{ + NOREF(idCpu); + g_pfnrtKeIpiGenericCall(rtMpIpiGenericCall, 0); + return VINF_SUCCESS; +} + + +/** + * RTMpPokeCpu worker that uses the Windows 7 and later version of + * HalRequestIpip to get the job done. + * + * @returns VINF_SUCCESS + * @param idCpu The CPU identifier. + */ +int rtMpPokeCpuUsingHalRequestIpiW7Plus(RTCPUID idCpu) +{ + /* idCpu is an HAL processor index, so we can use it directly. */ + PKAFFINITY_EX pTarget = (PKAFFINITY_EX)alloca(g_cbRtMpNtKaffinityEx); + pTarget->Size = g_cRtMpNtKaffinityExEntries; /* (just in case KeInitializeAffinityEx starts using it) */ + g_pfnrtKeInitializeAffinityEx(pTarget); + g_pfnrtKeAddProcessorAffinityEx(pTarget, idCpu); + + g_pfnrtHalRequestIpiW7Plus(0, pTarget); + return VINF_SUCCESS; +} + + +/** + * RTMpPokeCpu worker that uses the Vista and earlier version of HalRequestIpip + * to get the job done. + * + * @returns VINF_SUCCESS + * @param idCpu The CPU identifier. + */ +int rtMpPokeCpuUsingHalRequestIpiPreW7(RTCPUID idCpu) +{ + __debugbreak(); /** @todo this code needs testing!! */ + KAFFINITY Target = 1; + Target <<= idCpu; + g_pfnrtHalRequestIpiPreW7(Target); + return VINF_SUCCESS; +} + + +int rtMpPokeCpuUsingFailureNotSupported(RTCPUID idCpu) +{ + NOREF(idCpu); + return VERR_NOT_SUPPORTED; +} + + +int rtMpPokeCpuUsingDpc(RTCPUID idCpu) +{ + Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */ + + /* + * APC fallback. + */ + static KDPC s_aPokeDpcs[RTCPUSET_MAX_CPUS] = {0}; + static bool s_fPokeDPCsInitialized = false; + + if (!s_fPokeDPCsInitialized) + { + for (unsigned i = 0; i < g_cRtMpNtMaxCpus; i++) + { + KeInitializeDpc(&s_aPokeDpcs[i], rtMpNtPokeCpuDummy, NULL); + if (g_pfnrtKeSetImportanceDpc) + g_pfnrtKeSetImportanceDpc(&s_aPokeDpcs[i], HighImportance); + int rc = rtMpNtSetTargetProcessorDpc(&s_aPokeDpcs[i], idCpu); + if (RT_FAILURE(rc)) + return rc; + } + + s_fPokeDPCsInitialized = true; + } + + /* Raise the IRQL to DISPATCH_LEVEL so we can't be rescheduled to another cpu. + KeInsertQueueDpc must also be executed at IRQL >= DISPATCH_LEVEL. */ + KIRQL oldIrql; + KeRaiseIrql(DISPATCH_LEVEL, &oldIrql); + + if (g_pfnrtKeSetImportanceDpc) + g_pfnrtKeSetImportanceDpc(&s_aPokeDpcs[idCpu], HighImportance); + g_pfnrtKeSetTargetProcessorDpc(&s_aPokeDpcs[idCpu], (int)idCpu); + + /* Assuming here that high importance DPCs will be delivered immediately; or at least an IPI will be sent immediately. + Note! Not true on at least Vista & Windows 7 */ + BOOLEAN fRet = KeInsertQueueDpc(&s_aPokeDpcs[idCpu], 0, 0); + + KeLowerIrql(oldIrql); + return fRet == TRUE ? VINF_SUCCESS : VERR_ACCESS_DENIED /* already queued */; +} + + +RTDECL(int) RTMpPokeCpu(RTCPUID idCpu) +{ + if (!RTMpIsCpuOnline(idCpu)) + return !RTMpIsCpuPossible(idCpu) + ? VERR_CPU_NOT_FOUND + : VERR_CPU_OFFLINE; + /* Calls rtMpPokeCpuUsingDpc, rtMpPokeCpuUsingHalRequestIpiW7Plus or rtMpPokeCpuUsingBroadcastIpi. */ + return g_pfnrtMpPokeCpuWorker(idCpu); +} + + +RTDECL(bool) RTMpOnAllIsConcurrentSafe(void) +{ + return false; +} + |