summaryrefslogtreecommitdiffstats
path: root/src/VBox/Devices/Network/SrvIntNetR0.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:17:27 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:17:27 +0000
commitf215e02bf85f68d3a6106c2a1f4f7f063f819064 (patch)
tree6bb5b92c046312c4e95ac2620b10ddf482d3fa8b /src/VBox/Devices/Network/SrvIntNetR0.cpp
parentInitial commit. (diff)
downloadvirtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.tar.xz
virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.zip
Adding upstream version 7.0.14-dfsg.upstream/7.0.14-dfsg
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/VBox/Devices/Network/SrvIntNetR0.cpp')
-rw-r--r--src/VBox/Devices/Network/SrvIntNetR0.cpp6903
1 files changed, 6903 insertions, 0 deletions
diff --git a/src/VBox/Devices/Network/SrvIntNetR0.cpp b/src/VBox/Devices/Network/SrvIntNetR0.cpp
new file mode 100644
index 00000000..1a2b055d
--- /dev/null
+++ b/src/VBox/Devices/Network/SrvIntNetR0.cpp
@@ -0,0 +1,6903 @@
+/* $Id: SrvIntNetR0.cpp $ */
+/** @file
+ * Internal networking - The ring 0 service.
+ *
+ * @remarks No lazy code changes. If you don't understand exactly what you're
+ * doing, get an understanding or forget it.
+ * All changes shall be reviewed by bird before commit. If not around,
+ * email and let Frank and/or Klaus OK the changes before committing.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_SRV_INTNET
+#include <VBox/intnet.h>
+#include <VBox/intnetinline.h>
+#include <VBox/vmm/pdmnetinline.h>
+#include <VBox/sup.h>
+#include <VBox/vmm/pdm.h>
+#include <VBox/log.h>
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/handletable.h>
+#include <iprt/mp.h>
+#include <iprt/mem.h>
+#include <iprt/net.h>
+#include <iprt/semaphore.h>
+#include <iprt/spinlock.h>
+#include <iprt/string.h>
+#include <iprt/thread.h>
+#include <iprt/time.h>
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** @def INTNET_WITH_DHCP_SNOOPING
+ * Enabled DHCP snooping when in shared-mac-on-the-wire mode. */
+#define INTNET_WITH_DHCP_SNOOPING
+
+/** The maximum number of interface in a network. */
+#define INTNET_MAX_IFS (1023 + 1 + 16)
+
+/** The number of entries to grow the destination tables with. */
+#if 0
+# define INTNET_GROW_DSTTAB_SIZE 16
+#else
+# define INTNET_GROW_DSTTAB_SIZE 1
+#endif
+
+/** The wakeup bit in the INTNETIF::cBusy and INTNETRUNKIF::cBusy counters. */
+#define INTNET_BUSY_WAKEUP_MASK RT_BIT_32(30)
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/**
+ * MAC address lookup table entry.
+ */
+typedef struct INTNETMACTABENTRY
+{
+ /** The MAC address of this entry. */
+ RTMAC MacAddr;
+ /** Is it is effectively promiscuous mode. */
+ bool fPromiscuousEff;
+ /** Is it promiscuous and should it see unrelated trunk traffic. */
+ bool fPromiscuousSeeTrunk;
+ /** Is it active.
+ * We ignore the entry if this is clear and may end up sending packets addressed
+ * to this interface onto the trunk. The reasoning for this is that this could
+ * be the interface of a VM that just has been teleported to a different host. */
+ bool fActive;
+ /** Pointer to the network interface. */
+ struct INTNETIF *pIf;
+} INTNETMACTABENTRY;
+/** Pointer to a MAC address lookup table entry. */
+typedef INTNETMACTABENTRY *PINTNETMACTABENTRY;
+
+/**
+ * MAC address lookup table.
+ *
+ * @todo Having this in a separate structure didn't work out as well as it
+ * should. Consider merging it into INTNETNETWORK.
+ */
+typedef struct INTNETMACTAB
+{
+ /** The current number of entries. */
+ uint32_t cEntries;
+ /** The number of entries we've allocated space for. */
+ uint32_t cEntriesAllocated;
+ /** Table entries. */
+ PINTNETMACTABENTRY paEntries;
+
+ /** The number of interface entries currently in promicuous mode. */
+ uint32_t cPromiscuousEntries;
+ /** The number of interface entries currently in promicuous mode that
+ * shall not see unrelated trunk traffic. */
+ uint32_t cPromiscuousNoTrunkEntries;
+
+ /** The host MAC address (reported). */
+ RTMAC HostMac;
+ /** The effective host promiscuous setting (reported). */
+ bool fHostPromiscuousEff;
+ /** The real host promiscuous setting (reported). */
+ bool fHostPromiscuousReal;
+ /** Whether the host is active. */
+ bool fHostActive;
+
+ /** Whether the wire is promiscuous (config). */
+ bool fWirePromiscuousEff;
+ /** Whether the wire is promiscuous (config).
+ * (Shadows INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE in
+ * INTNETNETWORK::fFlags.) */
+ bool fWirePromiscuousReal;
+ /** Whether the wire is active. */
+ bool fWireActive;
+
+ /** Pointer to the trunk interface. */
+ struct INTNETTRUNKIF *pTrunk;
+} INTNETMACTAB;
+/** Pointer to a MAC address . */
+typedef INTNETMACTAB *PINTNETMACTAB;
+
+/**
+ * Destination table.
+ */
+typedef struct INTNETDSTTAB
+{
+ /** The trunk destinations. */
+ uint32_t fTrunkDst;
+ /** Pointer to the trunk interface (referenced) if fTrunkDst is non-zero. */
+ struct INTNETTRUNKIF *pTrunk;
+ /** The number of destination interfaces. */
+ uint32_t cIfs;
+ /** The interfaces (referenced). Variable sized array. */
+ struct
+ {
+ /** The destination interface. */
+ struct INTNETIF *pIf;
+ /** Whether to replace the destination MAC address.
+ * This is used when sharing MAC address with the host on the wire(less). */
+ bool fReplaceDstMac;
+ } aIfs[1];
+} INTNETDSTTAB;
+/** Pointer to a destination table. */
+typedef INTNETDSTTAB *PINTNETDSTTAB;
+/** Pointer to a const destination table. */
+typedef INTNETDSTTAB const *PCINTNETDSTTAB;
+
+/**
+ * Address and type.
+ */
+typedef struct INTNETADDR
+{
+ /** The address type. */
+ INTNETADDRTYPE enmType;
+ /** The address. */
+ RTNETADDRU Addr;
+} INTNETADDR;
+/** Pointer to an address. */
+typedef INTNETADDR *PINTNETADDR;
+/** Pointer to a const address. */
+typedef INTNETADDR const *PCINTNETADDR;
+
+
+/**
+ * Address cache for a specific network layer.
+ */
+typedef struct INTNETADDRCACHE
+{
+ /** Pointer to the table of addresses. */
+ uint8_t *pbEntries;
+ /** The number of valid address entries. */
+ uint8_t cEntries;
+ /** The number of allocated address entries. */
+ uint8_t cEntriesAlloc;
+ /** The address size. */
+ uint8_t cbAddress;
+ /** The size of an entry. */
+ uint8_t cbEntry;
+} INTNETADDRCACHE;
+/** Pointer to an address cache. */
+typedef INTNETADDRCACHE *PINTNETADDRCACHE;
+/** Pointer to a const address cache. */
+typedef INTNETADDRCACHE const *PCINTNETADDRCACHE;
+
+
+/**
+ * A network interface.
+ *
+ * Unless explicitly stated, all members are protect by the network semaphore.
+ */
+typedef struct INTNETIF
+{
+ /** The MAC address.
+ * This is shadowed by INTNETMACTABENTRY::MacAddr. */
+ RTMAC MacAddr;
+ /** Set if the INTNET::MacAddr member has been explicitly set. */
+ bool fMacSet;
+ /** Tracks the desired promiscuous setting of the interface. */
+ bool fPromiscuousReal;
+ /** Whether the interface is active or not.
+ * This is shadowed by INTNETMACTABENTRY::fActive. */
+ bool fActive;
+ /** Whether someone has indicated that the end is nigh by means of IntNetR0IfAbortWait. */
+ bool volatile fNoMoreWaits;
+ /** The flags specified when opening this interface. */
+ uint32_t fOpenFlags;
+ /** Number of yields done to try make the interface read pending data.
+ * We will stop yielding when this reaches a threshold assuming that the VM is
+ * paused or that it simply isn't worth all the delay. It is cleared when a
+ * successful send has been done. */
+ uint32_t cYields;
+ /** Pointer to the current exchange buffer (ring-0). */
+ PINTNETBUF pIntBuf;
+ /** Pointer to ring-3 mapping of the current exchange buffer. */
+ R3PTRTYPE(PINTNETBUF) pIntBufR3;
+ /** Pointer to the default exchange buffer for the interface. */
+ PINTNETBUF pIntBufDefault;
+ /** Pointer to ring-3 mapping of the default exchange buffer. */
+ R3PTRTYPE(PINTNETBUF) pIntBufDefaultR3;
+#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
+ /** Event semaphore which a receiver/consumer thread will sleep on while
+ * waiting for data to arrive. */
+ RTSEMEVENT volatile hRecvEvent;
+ /** Number of threads sleeping on the event semaphore. */
+ uint32_t volatile cSleepers;
+#else
+ /** The callback to call when there is something to receive/consume. */
+ PFNINTNETIFRECVAVAIL pfnRecvAvail;
+ /** Opaque user data to pass to the receive avail callback (pfnRecvAvail). */
+ void *pvUserRecvAvail;
+#endif
+ /** The interface handle.
+ * When this is INTNET_HANDLE_INVALID a sleeper which is waking up
+ * should return with the appropriate error condition. */
+ INTNETIFHANDLE volatile hIf;
+ /** The native handle of the destructor thread. This is NIL_RTNATIVETHREAD when
+ * the object is valid and set when intnetR0IfDestruct is in progress. This is
+ * used to cover an unlikely (impossible?) race between SUPDRVSESSION cleanup
+ * and lingering threads waiting for recv or similar. */
+ RTNATIVETHREAD volatile hDestructorThread;
+ /** Pointer to the network this interface is connected to.
+ * This is protected by the INTNET::hMtxCreateOpenDestroy. */
+ struct INTNETNETWORK *pNetwork;
+ /** The session this interface is associated with. */
+ PSUPDRVSESSION pSession;
+ /** The SUPR0 object id. */
+ void *pvObj;
+ /** The network layer address cache. (Indexed by type, 0 entry isn't used.)
+ * This is protected by the address spinlock of the network. */
+ INTNETADDRCACHE aAddrCache[kIntNetAddrType_End];
+ /** Spinlock protecting the input (producer) side of the receive ring. */
+ RTSPINLOCK hRecvInSpinlock;
+ /** Busy count for tracking destination table references and active sends.
+ * Usually incremented while owning the switch table spinlock. The 30th bit
+ * is used to indicate wakeup. */
+ uint32_t volatile cBusy;
+ /** The preallocated destination table.
+ * This is NULL when it's in use as a precaution against unserialized
+ * transmitting. This is grown when new interfaces are added to the network. */
+ PINTNETDSTTAB volatile pDstTab;
+ /** Pointer to the trunk's per interface data. Can be NULL. */
+ void *pvIfData;
+ /** Header buffer for when we're carving GSO frames. */
+ uint8_t abGsoHdrs[256];
+} INTNETIF;
+/** Pointer to an internal network interface. */
+typedef INTNETIF *PINTNETIF;
+
+
+/**
+ * A trunk interface.
+ */
+typedef struct INTNETTRUNKIF
+{
+ /** The port interface we present to the component. */
+ INTNETTRUNKSWPORT SwitchPort;
+ /** The port interface we get from the component. */
+ PINTNETTRUNKIFPORT pIfPort;
+ /** Pointer to the network we're connect to.
+ * This may be NULL if we're orphaned? */
+ struct INTNETNETWORK *pNetwork;
+ /** The current MAC address for the interface. (reported)
+ * Updated while owning the switch table spinlock. */
+ RTMAC MacAddr;
+ /** Whether to supply physical addresses with the outbound SGs. (reported) */
+ bool fPhysSG;
+ /** Explicit alignment. */
+ bool fUnused;
+ /** Busy count for tracking destination table references and active sends.
+ * Usually incremented while owning the switch table spinlock. The 30th bit
+ * is used to indicate wakeup. */
+ uint32_t volatile cBusy;
+ /** Mask of destinations that pfnXmit cope with disabled preemption for. */
+ uint32_t fNoPreemptDsts;
+ /** The GSO capabilities of the wire destination. (reported) */
+ uint32_t fWireGsoCapabilites;
+ /** The GSO capabilities of the host destination. (reported)
+ * This is as bit map where each bit represents the GSO type with the same
+ * number. */
+ uint32_t fHostGsoCapabilites;
+ /** The destination table spinlock, interrupt safe.
+ * Protects apTaskDstTabs and apIntDstTabs. */
+ RTSPINLOCK hDstTabSpinlock;
+ /** The number of entries in apIntDstTabs. */
+ uint32_t cIntDstTabs;
+ /** The task time destination tables.
+ * @remarks intnetR0NetworkEnsureTabSpace and others ASSUMES this immediately
+ * precedes apIntDstTabs so that these two tables can be used as one
+ * contiguous one. */
+ PINTNETDSTTAB apTaskDstTabs[2];
+ /** The interrupt / disabled-preemption time destination tables.
+ * This is a variable sized array. */
+ PINTNETDSTTAB apIntDstTabs[1];
+} INTNETTRUNKIF;
+/** Pointer to a trunk interface. */
+typedef INTNETTRUNKIF *PINTNETTRUNKIF;
+
+/** Converts a pointer to INTNETTRUNKIF::SwitchPort to a PINTNETTRUNKIF. */
+#define INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort) ((PINTNETTRUNKIF)(pSwitchPort))
+
+
+/**
+ * Internal representation of a network.
+ */
+typedef struct INTNETNETWORK
+{
+ /** The Next network in the chain.
+ * This is protected by the INTNET::hMtxCreateOpenDestroy. */
+ struct INTNETNETWORK *pNext;
+
+ /** The spinlock protecting MacTab, aAddrBlacklist and INTNETIF::aAddrCache.
+ * Interrupt safe. */
+ RTSPINLOCK hAddrSpinlock;
+ /** MAC address table.
+ * This doubles as interface collection. */
+ INTNETMACTAB MacTab;
+
+ /** The network layer address cache. (Indexed by type, 0 entry isn't used.
+ * Contains host addresses. We don't let guests spoof them. */
+ INTNETADDRCACHE aAddrBlacklist[kIntNetAddrType_End];
+
+ /** Wait for an interface to stop being busy so it can be removed or have its
+ * destination table replaced. We have to wait upon this while owning the
+ * network mutex. Will only ever have one waiter because of the big mutex. */
+ RTSEMEVENT hEvtBusyIf;
+ /** Pointer to the instance data. */
+ struct INTNET *pIntNet;
+ /** The SUPR0 object id. */
+ void *pvObj;
+ /** The trunk reconnection system thread. The thread gets started at trunk
+ * disconnection. It tries to reconnect the trunk to the bridged filter instance.
+ * The thread erases this handle right before it terminates.
+ */
+ RTTHREAD hTrunkReconnectThread;
+ /** Trunk reconnection thread termination flag. */
+ bool volatile fTerminateReconnectThread;
+ /** Pointer to the temporary buffer that is used when snooping fragmented packets.
+ * This is allocated after this structure if we're sharing the MAC address with
+ * the host. The buffer is INTNETNETWORK_TMP_SIZE big and aligned on a 64-byte boundary. */
+ uint8_t *pbTmp;
+ /** Network creation flags (INTNET_OPEN_FLAGS_*). */
+ uint32_t fFlags;
+ /** Any restrictive policies required as a minimum by some interface.
+ * (INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES) */
+ uint32_t fMinFlags;
+ /** The number of active interfaces (excluding the trunk). */
+ uint32_t cActiveIFs;
+ /** The length of the network name. */
+ uint8_t cchName;
+ /** The network name. */
+ char szName[INTNET_MAX_NETWORK_NAME];
+ /** The trunk type. */
+ INTNETTRUNKTYPE enmTrunkType;
+ /** The trunk name. */
+ char szTrunk[INTNET_MAX_TRUNK_NAME];
+} INTNETNETWORK;
+/** Pointer to an internal network. */
+typedef INTNETNETWORK *PINTNETNETWORK;
+/** Pointer to a const internal network. */
+typedef const INTNETNETWORK *PCINTNETNETWORK;
+
+/** The size of the buffer INTNETNETWORK::pbTmp points at. */
+#define INTNETNETWORK_TMP_SIZE 2048
+
+
+/**
+ * Internal networking instance.
+ */
+typedef struct INTNET
+{
+ /** Magic number (INTNET_MAGIC). */
+ uint32_t volatile u32Magic;
+ /** Mutex protecting the creation, opening and destruction of both networks and
+ * interfaces. (This means all operations affecting the pNetworks list.) */
+ RTSEMMUTEX hMtxCreateOpenDestroy;
+ /** List of networks. Protected by INTNET::Spinlock. */
+ PINTNETNETWORK volatile pNetworks;
+ /** Handle table for the interfaces. */
+ RTHANDLETABLE hHtIfs;
+} INTNET;
+/** Pointer to an internal network ring-0 instance. */
+typedef struct INTNET *PINTNET;
+
+/** Magic number for the internal network instance data (Hayao Miyazaki). */
+#define INTNET_MAGIC UINT32_C(0x19410105)
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/** Pointer to the internal network instance data. */
+static PINTNET volatile g_pIntNet = NULL;
+
+static const struct INTNETOPENNETWORKFLAGS
+{
+ uint32_t fRestrictive; /**< The restrictive flag (deny/disabled). */
+ uint32_t fRelaxed; /**< The relaxed flag (allow/enabled). */
+ uint32_t fFixed; /**< The config-fixed flag. */
+ uint32_t fPair; /**< The pair of restrictive and relaxed flags. */
+}
+/** Open network policy flags relating to the network. */
+g_afIntNetOpenNetworkNetFlags[] =
+{
+ { INTNET_OPEN_FLAGS_ACCESS_RESTRICTED, INTNET_OPEN_FLAGS_ACCESS_PUBLIC, INTNET_OPEN_FLAGS_ACCESS_FIXED, INTNET_OPEN_FLAGS_ACCESS_RESTRICTED | INTNET_OPEN_FLAGS_ACCESS_PUBLIC },
+ { INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS | INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS },
+ { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST },
+ { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE },
+ { INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED, INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED },
+ { INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE },
+ { INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED, INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED },
+ { INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE },
+},
+/** Open network policy flags relating to the new interface. */
+g_afIntNetOpenNetworkIfFlags[] =
+{
+ { INTNET_OPEN_FLAGS_IF_PROMISC_DENY, INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_DENY | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW },
+ { INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK, INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK },
+};
+
+
+/*********************************************************************************************************************************
+* Forward Declarations *
+*********************************************************************************************************************************/
+static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork);
+
+
+/**
+ * Checks if a pointer belongs to the list of known networks without
+ * accessing memory it points to.
+ *
+ * @returns true, if such network is in the list.
+ * @param pIntNet The pointer to the internal network instance (global).
+ * @param pNetwork The pointer that must be validated.
+ */
+DECLINLINE(bool) intnetR0NetworkIsValid(PINTNET pIntNet, PINTNETNETWORK pNetwork)
+{
+ for (PINTNETNETWORK pCurr = pIntNet->pNetworks; pCurr; pCurr = pCurr->pNext)
+ if (pCurr == pNetwork)
+ return true;
+ return false;
+}
+
+
+/**
+ * Worker for intnetR0SgWritePart that deals with the case where the
+ * request doesn't fit into the first segment.
+ *
+ * @returns true, unless the request or SG invalid.
+ * @param pSG The SG list to write to.
+ * @param off Where to start writing (offset into the SG).
+ * @param cb How much to write.
+ * @param pvBuf The buffer to containing the bits to write.
+ */
+static bool intnetR0SgWritePartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
+{
+ if (RT_UNLIKELY(off + cb > pSG->cbTotal))
+ return false;
+
+ /*
+ * Skip ahead to the segment where off starts.
+ */
+ unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
+ unsigned iSeg = 0;
+ while (off > pSG->aSegs[iSeg].cb)
+ {
+ off -= pSG->aSegs[iSeg++].cb;
+ AssertReturn(iSeg < cSegs, false);
+ }
+
+ /*
+ * Copy the data, hoping that it's all from one segment...
+ */
+ uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
+ if (cbCanCopy >= cb)
+ memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cb);
+ else
+ {
+ /* copy the portion in the current segment. */
+ memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cbCanCopy);
+ cb -= cbCanCopy;
+
+ /* copy the portions in the other segments. */
+ do
+ {
+ pvBuf = (uint8_t const *)pvBuf + cbCanCopy;
+ iSeg++;
+ AssertReturn(iSeg < cSegs, false);
+
+ cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
+ memcpy(pSG->aSegs[iSeg].pv, pvBuf, cbCanCopy);
+
+ cb -= cbCanCopy;
+ } while (cb > 0);
+ }
+
+ return true;
+}
+
+
+/**
+ * Writes to a part of an SG.
+ *
+ * @returns true on success, false on failure (out of bounds).
+ * @param pSG The SG list to write to.
+ * @param off Where to start writing (offset into the SG).
+ * @param cb How much to write.
+ * @param pvBuf The buffer to containing the bits to write.
+ */
+DECLINLINE(bool) intnetR0SgWritePart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
+{
+ Assert(off + cb > off);
+
+ /* The optimized case. */
+ if (RT_LIKELY( pSG->cSegsUsed == 1
+ || pSG->aSegs[0].cb >= off + cb))
+ {
+ Assert(pSG->cbTotal == pSG->aSegs[0].cb);
+ memcpy((uint8_t *)pSG->aSegs[0].pv + off, pvBuf, cb);
+ return true;
+ }
+ return intnetR0SgWritePartSlow(pSG, off, cb, pvBuf);
+}
+
+
+/**
+ * Reads a byte from a SG list.
+ *
+ * @returns The byte on success. 0xff on failure.
+ * @param pSG The SG list to read.
+ * @param off The offset (into the SG) off the byte.
+ */
+DECLINLINE(uint8_t) intnetR0SgReadByte(PCINTNETSG pSG, uint32_t off)
+{
+ if (RT_LIKELY(pSG->aSegs[0].cb > off))
+ return ((uint8_t const *)pSG->aSegs[0].pv)[off];
+
+ off -= pSG->aSegs[0].cb;
+ unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
+ for (unsigned iSeg = 1; iSeg < cSegs; iSeg++)
+ {
+ if (pSG->aSegs[iSeg].cb > off)
+ return ((uint8_t const *)pSG->aSegs[iSeg].pv)[off];
+ off -= pSG->aSegs[iSeg].cb;
+ }
+ return false;
+}
+
+
+/**
+ * Worker for intnetR0SgReadPart that deals with the case where the
+ * requested data isn't in the first segment.
+ *
+ * @returns true, unless the SG is invalid.
+ * @param pSG The SG list to read.
+ * @param off Where to start reading (offset into the SG).
+ * @param cb How much to read.
+ * @param pvBuf The buffer to read into.
+ */
+static bool intnetR0SgReadPartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
+{
+ if (RT_UNLIKELY(off + cb > pSG->cbTotal))
+ return false;
+
+ /*
+ * Skip ahead to the segment where off starts.
+ */
+ unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
+ unsigned iSeg = 0;
+ while (off > pSG->aSegs[iSeg].cb)
+ {
+ off -= pSG->aSegs[iSeg++].cb;
+ AssertReturn(iSeg < cSegs, false);
+ }
+
+ /*
+ * Copy the data, hoping that it's all from one segment...
+ */
+ uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
+ if (cbCanCopy >= cb)
+ memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cb);
+ else
+ {
+ /* copy the portion in the current segment. */
+ memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cbCanCopy);
+ cb -= cbCanCopy;
+
+ /* copy the portions in the other segments. */
+ do
+ {
+ pvBuf = (uint8_t *)pvBuf + cbCanCopy;
+ iSeg++;
+ AssertReturn(iSeg < cSegs, false);
+
+ cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
+ memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv, cbCanCopy);
+
+ cb -= cbCanCopy;
+ } while (cb > 0);
+ }
+
+ return true;
+}
+
+
+/**
+ * Reads a part of an SG into a buffer.
+ *
+ * @returns true on success, false on failure (out of bounds).
+ * @param pSG The SG list to read.
+ * @param off Where to start reading (offset into the SG).
+ * @param cb How much to read.
+ * @param pvBuf The buffer to read into.
+ */
+DECLINLINE(bool) intnetR0SgReadPart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
+{
+ Assert(off + cb > off);
+
+ /* The optimized case. */
+ if (RT_LIKELY(pSG->aSegs[0].cb >= off + cb))
+ {
+ AssertMsg(pSG->cbTotal >= pSG->aSegs[0].cb, ("%#x vs %#x\n", pSG->cbTotal, pSG->aSegs[0].cb));
+ memcpy(pvBuf, (uint8_t const *)pSG->aSegs[0].pv + off, cb);
+ return true;
+ }
+ return intnetR0SgReadPartSlow(pSG, off, cb, pvBuf);
+}
+
+
+/**
+ * Wait for a busy counter to reach zero.
+ *
+ * @param pNetwork The network.
+ * @param pcBusy The busy counter.
+ */
+static void intnetR0BusyWait(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
+{
+ if (ASMAtomicReadU32(pcBusy) == 0)
+ return;
+
+ /*
+ * We have to be a bit cautious here so we don't destroy the network or the
+ * semaphore before intnetR0BusyDec has signalled us.
+ */
+
+ /* Reset the semaphore and flip the wakeup bit. */
+ RTSemEventWait(pNetwork->hEvtBusyIf, 0); /* clear it */
+ uint32_t cCurBusy = ASMAtomicReadU32(pcBusy);
+ do
+ {
+ if (cCurBusy == 0)
+ return;
+ AssertMsg(!(cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
+ AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
+ } while (!ASMAtomicCmpXchgExU32(pcBusy, cCurBusy | INTNET_BUSY_WAKEUP_MASK, cCurBusy, &cCurBusy));
+
+ /* Wait for the count to reach zero. */
+ do
+ {
+ int rc2 = RTSemEventWait(pNetwork->hEvtBusyIf, 30000); NOREF(rc2);
+ //AssertMsg(RT_SUCCESS(rc2), ("rc=%Rrc *pcBusy=%#x (%#x)\n", rc2, ASMAtomicReadU32(pcBusy), cCurBusy ));
+ cCurBusy = ASMAtomicReadU32(pcBusy);
+ AssertMsg((cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
+ AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
+ } while ( cCurBusy != INTNET_BUSY_WAKEUP_MASK
+ || !ASMAtomicCmpXchgU32(pcBusy, 0, INTNET_BUSY_WAKEUP_MASK));
+}
+
+
+/**
+ * Decrements the busy counter and maybe wakes up any threads waiting for it to
+ * reach zero.
+ *
+ * @param pNetwork The network.
+ * @param pcBusy The busy counter.
+ */
+DECLINLINE(void) intnetR0BusyDec(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
+{
+ uint32_t cNewBusy = ASMAtomicDecU32(pcBusy);
+ if (RT_UNLIKELY( cNewBusy == INTNET_BUSY_WAKEUP_MASK
+ && pNetwork))
+ RTSemEventSignal(pNetwork->hEvtBusyIf);
+ AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
+}
+
+
+/**
+ * Increments the busy count of the specified interface.
+ *
+ * The caller must own the MAC address table spinlock.
+ *
+ * @param pIf The interface.
+ */
+DECLINLINE(void) intnetR0BusyDecIf(PINTNETIF pIf)
+{
+ intnetR0BusyDec(pIf->pNetwork, &pIf->cBusy);
+}
+
+
+/**
+ * Increments the busy count of the specified interface.
+ *
+ * The caller must own the MAC address table spinlock or an explicity reference.
+ *
+ * @param pTrunk The trunk.
+ */
+DECLINLINE(void) intnetR0BusyDecTrunk(PINTNETTRUNKIF pTrunk)
+{
+ if (pTrunk)
+ intnetR0BusyDec(pTrunk->pNetwork, &pTrunk->cBusy);
+}
+
+
+/**
+ * Increments the busy count of the specified interface.
+ *
+ * The caller must own the MAC address table spinlock or an explicity reference.
+ *
+ * @param pIf The interface.
+ */
+DECLINLINE(void) intnetR0BusyIncIf(PINTNETIF pIf)
+{
+ uint32_t cNewBusy = ASMAtomicIncU32(&pIf->cBusy);
+ AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
+ NOREF(cNewBusy);
+}
+
+
+/**
+ * Increments the busy count of the specified interface.
+ *
+ * The caller must own the MAC address table spinlock or an explicity reference.
+ *
+ * @param pTrunk The trunk.
+ */
+DECLINLINE(void) intnetR0BusyIncTrunk(PINTNETTRUNKIF pTrunk)
+{
+ if (!pTrunk) return;
+ uint32_t cNewBusy = ASMAtomicIncU32(&pTrunk->cBusy);
+ AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
+ NOREF(cNewBusy);
+}
+
+
+/**
+ * Retain an interface.
+ *
+ * @returns VBox status code, can assume success in most situations.
+ * @param pIf The interface instance.
+ * @param pSession The current session.
+ */
+DECLINLINE(int) intnetR0IfRetain(PINTNETIF pIf, PSUPDRVSESSION pSession)
+{
+ Assert(pIf->hDestructorThread == NIL_RTNATIVETHREAD);
+
+ int rc = SUPR0ObjAddRefEx(pIf->pvObj, pSession, true /* fNoBlocking */);
+ AssertRCReturn(rc, rc);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Release an interface previously retained by intnetR0IfRetain or
+ * by handle lookup/freeing.
+ *
+ * @returns true if destroyed, false if not.
+ * @param pIf The interface instance.
+ * @param pSession The current session.
+ */
+DECLINLINE(bool) intnetR0IfRelease(PINTNETIF pIf, PSUPDRVSESSION pSession)
+{
+ Assert(pIf->hDestructorThread == NIL_RTNATIVETHREAD);
+
+ int rc = SUPR0ObjRelease(pIf->pvObj, pSession);
+ AssertRC(rc);
+
+ return rc == VINF_OBJECT_DESTROYED;
+}
+
+
+/**
+ * RTHandleCreateEx callback that retains an object in the
+ * handle table before returning it.
+ *
+ * (Avoids racing the freeing of the handle.)
+ *
+ * @returns VBox status code.
+ * @param hHandleTable The handle table (ignored).
+ * @param pvObj The object (INTNETIF).
+ * @param pvCtx The context (SUPDRVSESSION).
+ * @param pvUser The user context (ignored).
+ */
+static DECLCALLBACK(int) intnetR0IfRetainHandle(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
+{
+ NOREF(pvUser);
+ NOREF(hHandleTable);
+
+ PINTNETIF pIf = (PINTNETIF)pvObj;
+ RTNATIVETHREAD hDtorThrd;
+ ASMAtomicUoReadHandle(&pIf->hDestructorThread, &hDtorThrd);
+ if (hDtorThrd == NIL_RTNATIVETHREAD)
+ return intnetR0IfRetain(pIf, (PSUPDRVSESSION)pvCtx);
+
+ /* Allow intnetR0IfDestruct to call RTHandleTableFreeWithCtx to free
+ the handle, but not even think about retaining a referenceas we don't
+ want to confuse SUPDrv and risk having the destructor called twice. */
+ if (hDtorThrd == RTThreadNativeSelf())
+ return VINF_SUCCESS;
+
+ return VERR_SEM_DESTROYED;
+}
+
+
+
+/**
+ * Checks if the interface has a usable MAC address or not.
+ *
+ * @returns true if MacAddr is usable, false if not.
+ * @param pIf The interface.
+ */
+DECL_FORCE_INLINE(bool) intnetR0IfHasMacAddr(PINTNETIF pIf)
+{
+ return pIf->fMacSet || !(pIf->MacAddr.au8[0] & 1);
+}
+
+
+/**
+ * Locates the MAC address table entry for the given interface.
+ *
+ * The caller holds the MAC address table spinlock, obviously.
+ *
+ * @returns Pointer to the entry on if found, NULL if not.
+ * @param pNetwork The network.
+ * @param pIf The interface.
+ */
+DECLINLINE(PINTNETMACTABENTRY) intnetR0NetworkFindMacAddrEntry(PINTNETNETWORK pNetwork, PINTNETIF pIf)
+{
+ uint32_t iIf = pNetwork->MacTab.cEntries;
+ while (iIf-- > 0)
+ {
+ if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
+ return &pNetwork->MacTab.paEntries[iIf];
+ }
+ return NULL;
+}
+
+
+/**
+ * Checks if the IPv6 address is a good interface address.
+ * @returns true/false.
+ * @param addr The address, network endian.
+ */
+DECLINLINE(bool) intnetR0IPv6AddrIsGood(RTNETADDRIPV6 addr)
+{
+ return !( ( addr.QWords.qw0 == 0 && addr.QWords.qw1 == 0) /* :: */
+ || ( (addr.Words.w0 & RT_H2BE_U16(0xff00)) == RT_H2BE_U16(0xff00)) /* multicast */
+ || ( addr.Words.w0 == 0 && addr.Words.w1 == 0
+ && addr.Words.w2 == 0 && addr.Words.w3 == 0
+ && addr.Words.w4 == 0 && addr.Words.w5 == 0
+ && addr.Words.w6 == 0 && addr.Words.w7 == RT_H2BE_U16(0x0001))); /* ::1 */
+}
+
+
+#if 0 /* unused */
+/**
+ * Checks if the IPv4 address is a broadcast address.
+ * @returns true/false.
+ * @param Addr The address, network endian.
+ */
+DECLINLINE(bool) intnetR0IPv4AddrIsBroadcast(RTNETADDRIPV4 Addr)
+{
+ /* Just check for 255.255.255.255 atm. */
+ return Addr.u == UINT32_MAX;
+}
+#endif /* unused */
+
+
+/**
+ * Checks if the IPv4 address is a good interface address.
+ * @returns true/false.
+ * @param Addr The address, network endian.
+ */
+DECLINLINE(bool) intnetR0IPv4AddrIsGood(RTNETADDRIPV4 Addr)
+{
+ /* Usual suspects. */
+ if ( Addr.u == UINT32_MAX /* 255.255.255.255 - broadcast. */
+ || Addr.au8[0] == 0) /* Current network, can be used as source address. */
+ return false;
+
+ /* Unusual suspects. */
+ if (RT_UNLIKELY( Addr.au8[0] == 127 /* Loopback */
+ || (Addr.au8[0] & 0xf0) == 224 /* Multicast */
+ ))
+ return false;
+ return true;
+}
+
+
+/**
+ * Gets the address size of a network layer type.
+ *
+ * @returns size in bytes.
+ * @param enmType The type.
+ */
+DECLINLINE(uint8_t) intnetR0AddrSize(INTNETADDRTYPE enmType)
+{
+ switch (enmType)
+ {
+ case kIntNetAddrType_IPv4: return 4;
+ case kIntNetAddrType_IPv6: return 16;
+ case kIntNetAddrType_IPX: return 4 + 6;
+ default: AssertFailedReturn(0);
+ }
+}
+
+
+/**
+ * Compares two address to see if they are equal, assuming naturally align structures.
+ *
+ * @returns true if equal, false if not.
+ * @param pAddr1 The first address.
+ * @param pAddr2 The second address.
+ * @param cbAddr The address size.
+ */
+DECLINLINE(bool) intnetR0AddrUIsEqualEx(PCRTNETADDRU pAddr1, PCRTNETADDRU pAddr2, uint8_t const cbAddr)
+{
+ switch (cbAddr)
+ {
+ case 4: /* IPv4 */
+ return pAddr1->au32[0] == pAddr2->au32[0];
+ case 16: /* IPv6 */
+ return pAddr1->au64[0] == pAddr2->au64[0]
+ && pAddr1->au64[1] == pAddr2->au64[1];
+ case 10: /* IPX */
+ return pAddr1->au64[0] == pAddr2->au64[0]
+ && pAddr1->au16[4] == pAddr2->au16[4];
+ default:
+ AssertFailedReturn(false);
+ }
+}
+
+
+/**
+ * Worker for intnetR0IfAddrCacheLookup that performs the lookup
+ * in the remaining cache entries after the caller has check the
+ * most likely ones.
+ *
+ * @returns -1 if not found, the index of the cache entry if found.
+ * @param pCache The cache.
+ * @param pAddr The address.
+ * @param cbAddr The address size (optimization).
+ */
+static int intnetR0IfAddrCacheLookupSlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
+{
+ unsigned i = pCache->cEntries - 2;
+ uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
+ while (i >= 1)
+ {
+ if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
+ return i;
+ pbEntry -= pCache->cbEntry;
+ i--;
+ }
+
+ return -1;
+}
+
+/**
+ * Lookup an address in a cache without any expectations.
+ *
+ * @returns -1 if not found, the index of the cache entry if found.
+ * @param pCache The cache.
+ * @param pAddr The address.
+ * @param cbAddr The address size (optimization).
+ */
+DECLINLINE(int) intnetR0IfAddrCacheLookup(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
+{
+ Assert(pCache->cbAddress == cbAddr);
+
+ /*
+ * The optimized case is when there is one cache entry and
+ * it doesn't match.
+ */
+ unsigned i = pCache->cEntries;
+ if ( i > 0
+ && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr))
+ return 0;
+ if (i <= 1)
+ return -1;
+
+ /*
+ * Check the last entry.
+ */
+ i--;
+ if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr))
+ return i;
+ if (i <= 1)
+ return -1;
+
+ return intnetR0IfAddrCacheLookupSlow(pCache, pAddr, cbAddr);
+}
+
+
+/** Same as intnetR0IfAddrCacheLookup except we expect the address to be present already. */
+DECLINLINE(int) intnetR0IfAddrCacheLookupLikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
+{
+ /** @todo implement this. */
+ return intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
+}
+
+#if 0 /* unused */
+
+/**
+ * Worker for intnetR0IfAddrCacheLookupUnlikely that performs
+ * the lookup in the remaining cache entries after the caller
+ * has check the most likely ones.
+ *
+ * The routine is expecting not to find the address.
+ *
+ * @returns -1 if not found, the index of the cache entry if found.
+ * @param pCache The cache.
+ * @param pAddr The address.
+ * @param cbAddr The address size (optimization).
+ */
+static int intnetR0IfAddrCacheInCacheUnlikelySlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
+{
+ /*
+ * Perform a full table lookup.
+ */
+ unsigned i = pCache->cEntries - 2;
+ uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
+ while (i >= 1)
+ {
+ if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
+ return i;
+ pbEntry -= pCache->cbEntry;
+ i--;
+ }
+
+ return -1;
+}
+
+
+/**
+ * Lookup an address in a cache expecting not to find it.
+ *
+ * @returns -1 if not found, the index of the cache entry if found.
+ * @param pCache The cache.
+ * @param pAddr The address.
+ * @param cbAddr The address size (optimization).
+ */
+DECLINLINE(int) intnetR0IfAddrCacheLookupUnlikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
+{
+ Assert(pCache->cbAddress == cbAddr);
+
+ /*
+ * The optimized case is when there is one cache entry and
+ * it doesn't match.
+ */
+ unsigned i = pCache->cEntries;
+ if (RT_UNLIKELY( i > 0
+ && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)))
+ return 0;
+ if (RT_LIKELY(i <= 1))
+ return -1;
+
+ /*
+ * Then check the last entry and return if there are just two cache entries.
+ */
+ i--;
+ if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr)))
+ return i;
+ if (i <= 1)
+ return -1;
+
+ return intnetR0IfAddrCacheInCacheUnlikelySlow(pCache, pAddr, cbAddr);
+}
+
+#endif /* unused */
+
+
+/**
+ * Deletes a specific cache entry.
+ *
+ * Worker for intnetR0NetworkAddrCacheDelete and intnetR0NetworkAddrCacheDeleteMinusIf.
+ *
+ * @param pIf The interface (for logging).
+ * @param pCache The cache.
+ * @param iEntry The entry to delete.
+ * @param pszMsg Log message.
+ */
+static void intnetR0IfAddrCacheDeleteIt(PINTNETIF pIf, PINTNETADDRCACHE pCache, int iEntry, const char *pszMsg)
+{
+ AssertReturnVoid(iEntry < pCache->cEntries);
+ AssertReturnVoid(iEntry >= 0);
+#ifdef LOG_ENABLED
+ INTNETADDRTYPE enmAddrType = (INTNETADDRTYPE)(uintptr_t)(pCache - &pIf->aAddrCache[0]);
+ PCRTNETADDRU pAddr = (PCRTNETADDRU)(pCache->pbEntries + iEntry * pCache->cbEntry);
+ switch (enmAddrType)
+ {
+ case kIntNetAddrType_IPv4:
+ Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv4 deleted #%d %RTnaipv4 %s\n",
+ pIf->hIf, &pIf->MacAddr, iEntry, pAddr->IPv4, pszMsg));
+ break;
+ case kIntNetAddrType_IPv6:
+ Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv6 deleted #%d %RTnaipv6 %s\n",
+ pIf->hIf, &pIf->MacAddr, iEntry, &pAddr->IPv6, pszMsg));
+ break;
+ default:
+ Log(("intnetR0IfAddrCacheDeleteIt: hIf=%RX32 MAC=%.6Rhxs type=%d #%d %.*Rhxs %s\n",
+ pIf->hIf, &pIf->MacAddr, enmAddrType, iEntry, pCache->cbAddress, pAddr, pszMsg));
+ break;
+ }
+#else
+ RT_NOREF2(pIf, pszMsg);
+#endif
+
+ pCache->cEntries--;
+ if (iEntry < pCache->cEntries)
+ memmove(pCache->pbEntries + iEntry * pCache->cbEntry,
+ pCache->pbEntries + (iEntry + 1) * pCache->cbEntry,
+ (pCache->cEntries - iEntry) * pCache->cbEntry);
+}
+
+
+/**
+ * Deletes an address from the cache, assuming it isn't actually in the cache.
+ *
+ * May or may not own the spinlock when calling this.
+ *
+ * @param pIf The interface (for logging).
+ * @param pCache The cache.
+ * @param pAddr The address.
+ * @param cbAddr The address size (optimization).
+ */
+DECLINLINE(void) intnetR0IfAddrCacheDelete(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr, const char *pszMsg)
+{
+ int i = intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
+ if (RT_UNLIKELY(i >= 0))
+ intnetR0IfAddrCacheDeleteIt(pIf, pCache, i, pszMsg);
+}
+
+
+/**
+ * Deletes the address from all the interface caches.
+ *
+ * This is used to remove stale entries that has been reassigned to
+ * other machines on the network.
+ *
+ * @param pNetwork The network.
+ * @param pAddr The address.
+ * @param enmType The address type.
+ * @param cbAddr The address size (optimization).
+ * @param pszMsg Log message.
+ */
+DECLINLINE(void) intnetR0NetworkAddrCacheDeleteLocked(PINTNETNETWORK pNetwork,
+ PCRTNETADDRU pAddr, INTNETADDRTYPE enmType,
+ uint8_t const cbAddr,
+ const char *pszMsg)
+{
+ uint32_t iIf = pNetwork->MacTab.cEntries;
+ while (iIf--)
+ {
+ PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
+
+ int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
+ if (RT_UNLIKELY(i >= 0))
+ intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
+ }
+}
+
+
+/**
+ * Deletes the address from all the interface caches.
+ *
+ * This is used to remove stale entries that has been reassigned to
+ * other machines on the network.
+ *
+ * @param pNetwork The network.
+ * @param pAddr The address.
+ * @param enmType The address type.
+ * @param cbAddr The address size (optimization).
+ * @param pszMsg Log message.
+ */
+DECLINLINE(void) intnetR0NetworkAddrCacheDelete(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType,
+ uint8_t const cbAddr, const char *pszMsg)
+{
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ intnetR0NetworkAddrCacheDeleteLocked(pNetwork, pAddr, enmType, cbAddr, pszMsg);
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+}
+
+
+#if 0 /* unused */
+/**
+ * Deletes the address from all the interface caches except the specified one.
+ *
+ * This is used to remove stale entries that has been reassigned to
+ * other machines on the network.
+ *
+ * @param pNetwork The network.
+ * @param pAddr The address.
+ * @param enmType The address type.
+ * @param cbAddr The address size (optimization).
+ * @param pszMsg Log message.
+ */
+DECLINLINE(void) intnetR0NetworkAddrCacheDeleteMinusIf(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCRTNETADDRU pAddr,
+ INTNETADDRTYPE const enmType, uint8_t const cbAddr, const char *pszMsg)
+{
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ uint32_t iIf = pNetwork->MacTab.cEntries;
+ while (iIf--)
+ {
+ PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
+ if (pIf != pIfSender)
+ {
+ int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
+ if (RT_UNLIKELY(i >= 0))
+ intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
+ }
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+}
+#endif /* unused */
+
+
+/**
+ * Lookup an address on the network, returning the (first) interface having it
+ * in its address cache.
+ *
+ * @returns Pointer to the interface on success, NULL if not found. The caller
+ * must release the interface by calling intnetR0BusyDecIf.
+ * @param pNetwork The network.
+ * @param pAddr The address to lookup.
+ * @param enmType The address type.
+ * @param cbAddr The size of the address.
+ */
+DECLINLINE(PINTNETIF) intnetR0NetworkAddrCacheLookupIf(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType, uint8_t const cbAddr)
+{
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ uint32_t iIf = pNetwork->MacTab.cEntries;
+ while (iIf--)
+ {
+ PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
+ int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
+ if (i >= 0)
+ {
+ intnetR0BusyIncIf(pIf);
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ return pIf;
+ }
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ return NULL;
+}
+
+
+/**
+ * Look up specified address in the network's blacklist.
+ *
+ * @param pNetwork The network.
+ * @param enmType The address type.
+ * @param pAddr The address.
+ */
+static bool intnetR0NetworkBlacklistLookup(PINTNETNETWORK pNetwork,
+ PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
+{
+ PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
+
+ if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
+ return false;
+
+ const uint8_t cbAddr = pCache->cbAddress;
+ Assert(cbAddr == intnetR0AddrSize(enmType));
+
+ for (unsigned i = 0; i < pCache->cEntries; ++i)
+ {
+ uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
+ if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
+ return true;
+ }
+
+ return false;
+}
+
+
+/**
+ * Deletes specified address from network's blacklist.
+ *
+ * @param pNetwork The network.
+ * @param enmType The address type.
+ * @param pAddr The address.
+ */
+static void intnetR0NetworkBlacklistDelete(PINTNETNETWORK pNetwork,
+ PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
+{
+ PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
+
+ if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
+ return;
+
+ const uint8_t cbAddr = pCache->cbAddress;
+ Assert(cbAddr == intnetR0AddrSize(enmType));
+
+ for (unsigned i = 0; i < pCache->cEntries; ++i)
+ {
+ uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
+ if (!intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
+ continue;
+
+ --pCache->cEntries;
+ memmove(pCache->pbEntries + i * pCache->cbEntry,
+ pCache->pbEntries + (i + 1) * pCache->cbEntry,
+ (pCache->cEntries - i) * pCache->cbEntry);
+ return;
+ }
+}
+
+
+/**
+ * Adds specified address from network's blacklist.
+ *
+ * @param pNetwork The network.
+ * @param enmType The address type.
+ * @param pAddr The address.
+ */
+static void intnetR0NetworkBlacklistAdd(PINTNETNETWORK pNetwork,
+ PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
+{
+ PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
+
+ if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
+ return;
+
+ const uint8_t cbAddr = pCache->cbAddress;
+ Assert(cbAddr == intnetR0AddrSize(enmType));
+
+ /* lookup */
+ for (unsigned i = 0; i < pCache->cEntries; ++i)
+ {
+ uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
+ if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
+ return; /* already exists */
+ }
+
+ if (pCache->cEntries >= pCache->cEntriesAlloc)
+ {
+ /* shift */
+ memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry,
+ pCache->cbEntry * (pCache->cEntries - 1));
+ --pCache->cEntries;
+ }
+
+ Assert(pCache->cEntries < pCache->cEntriesAlloc);
+
+ /* push */
+ uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
+ memcpy(pbEntry, pAddr, cbAddr);
+ memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - cbAddr);
+ ++pCache->cEntries;
+
+ Assert(pCache->cEntries <= pCache->cEntriesAlloc);
+}
+
+
+/**
+ * Adds an address to the cache, the caller is responsible for making sure it's
+ * not already in the cache.
+ *
+ * The caller must not
+ *
+ * @param pIf The interface (for logging).
+ * @param pCache The address cache.
+ * @param pAddr The address.
+ * @param pszMsg log message.
+ */
+static void intnetR0IfAddrCacheAddIt(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
+ const char *pszMsg)
+{
+ PINTNETNETWORK pNetwork = pIf->pNetwork;
+ AssertReturnVoid(pNetwork);
+
+ PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
+
+#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
+ const uint8_t cbAddr = pCache->cbAddress;
+ Assert(cbAddr == intnetR0AddrSize(enmAddrType));
+#endif
+
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ bool fBlacklisted = intnetR0NetworkBlacklistLookup(pNetwork, pAddr, enmAddrType);
+ if (fBlacklisted)
+ {
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+#ifdef LOG_ENABLED
+ switch (enmAddrType)
+ {
+ case kIntNetAddrType_IPv4:
+ Log(("%s: spoofing attempt for %RTnaipv4\n",
+ __FUNCTION__, pAddr->IPv4));
+ break;
+ case kIntNetAddrType_IPv6:
+ Log(("%s: spoofing attempt for %RTnaipv6\n",
+ __FUNCTION__, &pAddr->IPv6));
+ break;
+ default:
+ Log(("%s: spoofing attempt for %.*Rhxs (type %d)\n",
+ __FUNCTION__, cbAddr, pAddr, enmAddrType));
+ break;
+ }
+#endif
+ return;
+ }
+
+ if (RT_UNLIKELY(!pCache->cEntriesAlloc))
+ {
+ /* This shouldn't happen*/
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ return;
+ }
+
+ /* When the table is full, drop the older entry (FIFO). Do proper ageing? */
+ if (pCache->cEntries >= pCache->cEntriesAlloc)
+ {
+ Log(("intnetR0IfAddrCacheAddIt: type=%d replacing %.*Rhxs\n",
+ (int)(uintptr_t)(pCache - &pIf->aAddrCache[0]), pCache->cbAddress, pCache->pbEntries));
+ memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry, pCache->cbEntry * (pCache->cEntries - 1));
+ pCache->cEntries--;
+ Assert(pCache->cEntries < pCache->cEntriesAlloc);
+ }
+
+ /*
+ * Add the new entry to the end of the array.
+ */
+ uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
+ memcpy(pbEntry, pAddr, pCache->cbAddress);
+ memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - pCache->cbAddress);
+
+#ifdef LOG_ENABLED
+ switch (enmAddrType)
+ {
+ case kIntNetAddrType_IPv4:
+ Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv4 added #%d %RTnaipv4 %s\n",
+ pIf->hIf, &pIf->MacAddr, pCache->cEntries, pAddr->IPv4, pszMsg));
+ break;
+ case kIntNetAddrType_IPv6:
+ Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv6 added #%d %RTnaipv6 %s\n",
+ pIf->hIf, &pIf->MacAddr, pCache->cEntries, &pAddr->IPv6, pszMsg));
+ break;
+ default:
+ Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs type=%d added #%d %.*Rhxs %s\n",
+ pIf->hIf, &pIf->MacAddr, enmAddrType, pCache->cEntries, pCache->cbAddress, pAddr, pszMsg));
+ break;
+ }
+#else
+ RT_NOREF1(pszMsg);
+#endif
+ pCache->cEntries++;
+ Assert(pCache->cEntries <= pCache->cEntriesAlloc);
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+}
+
+
+/**
+ * A intnetR0IfAddrCacheAdd worker that performs the rest of the lookup.
+ *
+ * @param pIf The interface (for logging).
+ * @param pCache The address cache.
+ * @param pAddr The address.
+ * @param cbAddr The size of the address (optimization).
+ * @param pszMsg Log message.
+ */
+static void intnetR0IfAddrCacheAddSlow(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
+ const char *pszMsg)
+{
+ PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
+
+ const uint8_t cbAddr = pCache->cbAddress;
+ Assert(cbAddr == intnetR0AddrSize(enmAddrType));
+
+ /*
+ * Check all but the first and last entries, the caller
+ * has already checked those.
+ */
+ int i = pCache->cEntries - 2;
+ uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry;
+ while (i >= 1)
+ {
+ if (RT_LIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
+ return;
+ pbEntry += pCache->cbEntry;
+ i--;
+ }
+
+ /*
+ * Not found, add it.
+ */
+ intnetR0IfAddrCacheAddIt(pIf, enmAddrType, pAddr, pszMsg);
+}
+
+
+/**
+ * Adds an address to the cache if it's not already there.
+ *
+ * Must not own any spinlocks when calling this function.
+ *
+ * @param pIf The interface (for logging).
+ * @param pCache The address cache.
+ * @param pAddr The address.
+ * @param cbAddr The size of the address (optimization).
+ * @param pszMsg Log message.
+ */
+DECLINLINE(void) intnetR0IfAddrCacheAdd(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
+ const char *pszMsg)
+{
+ PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
+
+ const uint8_t cbAddr = pCache->cbAddress;
+ Assert(cbAddr == intnetR0AddrSize(enmAddrType));
+
+ /*
+ * The optimized case is when the address the first or last cache entry.
+ */
+ unsigned i = pCache->cEntries;
+ if (RT_LIKELY( i > 0
+ && ( intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)
+ || (i > 1
+ && intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * (i-1)), pAddr, cbAddr))) ))
+ return;
+
+ intnetR0IfAddrCacheAddSlow(pIf, enmAddrType, pAddr, pszMsg);
+}
+
+
+/**
+ * Destroys the specified address cache.
+ * @param pCache The address cache.
+ */
+static void intnetR0IfAddrCacheDestroy(PINTNETADDRCACHE pCache)
+{
+ void *pvFree = pCache->pbEntries;
+ pCache->pbEntries = NULL;
+ pCache->cEntries = 0;
+ pCache->cEntriesAlloc = 0;
+ RTMemFree(pvFree);
+}
+
+
+/**
+ * Initialize the address cache for the specified address type.
+ *
+ * The cache storage is preallocated and fixed size so that we can handle
+ * inserts from problematic contexts.
+ *
+ * @returns VINF_SUCCESS or VERR_NO_MEMORY.
+ * @param pCache The cache to initialize.
+ * @param enmAddrType The address type.
+ * @param fEnabled Whether the address cache is enabled or not.
+ */
+static int intnetR0IfAddrCacheInit(PINTNETADDRCACHE pCache, INTNETADDRTYPE enmAddrType, bool fEnabled)
+{
+ pCache->cEntries = 0;
+ pCache->cbAddress = intnetR0AddrSize(enmAddrType);
+ pCache->cbEntry = RT_ALIGN(pCache->cbAddress, 4);
+ if (fEnabled)
+ {
+ pCache->cEntriesAlloc = 32;
+ pCache->pbEntries = (uint8_t *)RTMemAllocZ(pCache->cEntriesAlloc * pCache->cbEntry);
+ if (!pCache->pbEntries)
+ return VERR_NO_MEMORY;
+ }
+ else
+ {
+ pCache->cEntriesAlloc = 0;
+ pCache->pbEntries = NULL;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Is it a multicast or broadcast MAC address?
+ *
+ * @returns true if multicast, false if not.
+ * @param pMacAddr The address to inspect.
+ */
+DECL_FORCE_INLINE(bool) intnetR0IsMacAddrMulticast(PCRTMAC pMacAddr)
+{
+ return !!(pMacAddr->au8[0] & 0x01);
+}
+
+
+/**
+ * Is it a dummy MAC address?
+ *
+ * We use dummy MAC addresses for interfaces which we don't know the MAC
+ * address of because they haven't sent anything (learning) or explicitly set
+ * it.
+ *
+ * @returns true if dummy, false if not.
+ * @param pMacAddr The address to inspect.
+ */
+DECL_FORCE_INLINE(bool) intnetR0IsMacAddrDummy(PCRTMAC pMacAddr)
+{
+ /* The dummy address are broadcast addresses, don't bother check it all. */
+ return pMacAddr->au16[0] == 0xffff;
+}
+
+
+/**
+ * Compares two MAC addresses.
+ *
+ * @returns true if equal, false if not.
+ * @param pDstAddr1 Address 1.
+ * @param pDstAddr2 Address 2.
+ */
+DECL_FORCE_INLINE(bool) intnetR0AreMacAddrsEqual(PCRTMAC pDstAddr1, PCRTMAC pDstAddr2)
+{
+ return pDstAddr1->au16[2] == pDstAddr2->au16[2]
+ && pDstAddr1->au16[1] == pDstAddr2->au16[1]
+ && pDstAddr1->au16[0] == pDstAddr2->au16[0];
+}
+
+
+/**
+ * Switch a unicast frame based on the network layer address (OSI level 3) and
+ * return a destination table.
+ *
+ * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
+ * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
+ * @param pNetwork The network to switch on.
+ * @param pDstMacAddr The destination MAC address.
+ * @param enmL3AddrType The level-3 destination address type.
+ * @param pL3Addr The level-3 destination address.
+ * @param cbL3Addr The size of the level-3 destination address.
+ * @param fSrc The frame source (INTNETTRUNKDIR_WIRE).
+ * @param pDstTab The destination output table.
+ */
+static INTNETSWDECISION intnetR0NetworkSwitchLevel3(PINTNETNETWORK pNetwork, PCRTMAC pDstMacAddr,
+ INTNETADDRTYPE enmL3AddrType, PCRTNETADDRU pL3Addr, uint8_t cbL3Addr,
+ uint32_t fSrc, PINTNETDSTTAB pDstTab)
+{
+ Assert(fSrc == INTNETTRUNKDIR_WIRE);
+
+ /*
+ * Grab the spinlock first and do the switching.
+ */
+ PINTNETMACTAB pTab = &pNetwork->MacTab;
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ pDstTab->fTrunkDst = 0;
+ pDstTab->pTrunk = 0;
+ pDstTab->cIfs = 0;
+
+ /* Find exactly matching or promiscuous interfaces. */
+ uint32_t cExactHits = 0;
+ uint32_t iIfMac = pTab->cEntries;
+ while (iIfMac-- > 0)
+ {
+ if (pTab->paEntries[iIfMac].fActive)
+ {
+ PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
+ bool fExact = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) >= 0;
+ if (fExact || pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
+ {
+ cExactHits += fExact;
+
+ uint32_t iIfDst = pDstTab->cIfs++;
+ pDstTab->aIfs[iIfDst].pIf = pIf;
+ pDstTab->aIfs[iIfDst].fReplaceDstMac = fExact;
+ intnetR0BusyIncIf(pIf);
+
+ if (fExact)
+ pDstMacAddr = &pIf->MacAddr; /* Avoids duplicates being sent to the host. */
+ }
+ }
+ }
+
+ /* Network only promicuous mode ifs should see related trunk traffic. */
+ if ( cExactHits
+ && fSrc
+ && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
+ {
+ iIfMac = pTab->cEntries;
+ while (iIfMac-- > 0)
+ {
+ if ( pTab->paEntries[iIfMac].fActive
+ && pTab->paEntries[iIfMac].fPromiscuousEff
+ && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
+ {
+ PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
+ if (intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) < 0)
+ {
+ uint32_t iIfDst = pDstTab->cIfs++;
+ pDstTab->aIfs[iIfDst].pIf = pIf;
+ pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
+ intnetR0BusyIncIf(pIf);
+ }
+ }
+ }
+ }
+
+ /* Does it match the host, or is the host promiscuous? */
+ if (pTab->fHostActive)
+ {
+ bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstMacAddr);
+ if ( fExact
+ || intnetR0IsMacAddrDummy(&pTab->HostMac)
+ || pTab->fHostPromiscuousEff)
+ {
+ cExactHits += fExact;
+ pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
+ }
+ }
+
+ /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
+ if (pTab->fWireActive && (!cExactHits || pTab->fWirePromiscuousEff))
+ pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
+ pDstTab->fTrunkDst &= ~fSrc;
+ if (pDstTab->fTrunkDst)
+ {
+ PINTNETTRUNKIF pTrunk = pTab->pTrunk;
+ pDstTab->pTrunk = pTrunk;
+ intnetR0BusyIncTrunk(pTrunk);
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ return pDstTab->cIfs
+ ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
+ : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
+}
+
+
+/**
+ * Pre-switch a unicast MAC address.
+ *
+ * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
+ * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
+ * @param pNetwork The network to switch on.
+ * @param fSrc The frame source.
+ * @param pSrcAddr The source address of the frame.
+ * @param pDstAddr The destination address of the frame.
+ */
+static INTNETSWDECISION intnetR0NetworkPreSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PCRTMAC pSrcAddr,
+ PCRTMAC pDstAddr)
+{
+ Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
+ Assert(fSrc);
+
+ /*
+ * Grab the spinlock first and do the switching.
+ */
+ INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
+ PINTNETMACTAB pTab = &pNetwork->MacTab;
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ /* Iterate the internal network interfaces and look for matching source and
+ destination addresses. */
+ uint32_t iIfMac = pTab->cEntries;
+ while (iIfMac-- > 0)
+ {
+ if (pTab->paEntries[iIfMac].fActive)
+ {
+ /* Unknown interface address? */
+ if (intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr))
+ break;
+
+ /* Paranoia - this shouldn't happen, right? */
+ if ( pSrcAddr
+ && intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pSrcAddr))
+ break;
+
+ /* Exact match? */
+ if (intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr))
+ {
+ enmSwDecision = pTab->fHostPromiscuousEff && fSrc == INTNETTRUNKDIR_WIRE
+ ? INTNETSWDECISION_BROADCAST
+ : INTNETSWDECISION_INTNET;
+ break;
+ }
+ }
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ return enmSwDecision;
+}
+
+
+/**
+ * Switch a unicast MAC address and return a destination table.
+ *
+ * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
+ * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
+ * @param pNetwork The network to switch on.
+ * @param fSrc The frame source.
+ * @param pIfSender The sender interface, NULL if trunk. Used to
+ * prevent sending an echo to the sender.
+ * @param pDstAddr The destination address of the frame.
+ * @param pDstTab The destination output table.
+ */
+static INTNETSWDECISION intnetR0NetworkSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
+ PCRTMAC pDstAddr, PINTNETDSTTAB pDstTab)
+{
+ AssertPtr(pDstTab);
+ Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
+
+ /*
+ * Grab the spinlock first and do the switching.
+ */
+ PINTNETMACTAB pTab = &pNetwork->MacTab;
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ pDstTab->fTrunkDst = 0;
+ pDstTab->pTrunk = 0;
+ pDstTab->cIfs = 0;
+
+ /* Find exactly matching or promiscuous interfaces. */
+ uint32_t cExactHits = 0;
+ uint32_t iIfMac = pTab->cEntries;
+ while (iIfMac-- > 0)
+ {
+ if (pTab->paEntries[iIfMac].fActive)
+ {
+ bool fExact = intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr);
+ if ( fExact
+ || intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr)
+ || ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
+ || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
+ )
+ {
+ cExactHits += fExact;
+
+ PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
+ if (RT_LIKELY(pIf != pIfSender)) /* paranoia */
+ {
+ uint32_t iIfDst = pDstTab->cIfs++;
+ pDstTab->aIfs[iIfDst].pIf = pIf;
+ pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
+ intnetR0BusyIncIf(pIf);
+ }
+ }
+ }
+ }
+
+ /* Network only promicuous mode ifs should see related trunk traffic. */
+ if ( cExactHits
+ && fSrc
+ && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
+ {
+ iIfMac = pTab->cEntries;
+ while (iIfMac-- > 0)
+ {
+ if ( pTab->paEntries[iIfMac].fPromiscuousEff
+ && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
+ && pTab->paEntries[iIfMac].fActive
+ && !intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr)
+ && !intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr) )
+ {
+ PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
+ uint32_t iIfDst = pDstTab->cIfs++;
+ pDstTab->aIfs[iIfDst].pIf = pIf;
+ pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
+ intnetR0BusyIncIf(pIf);
+ }
+ }
+ }
+
+ /* Does it match the host, or is the host promiscuous? */
+ if ( fSrc != INTNETTRUNKDIR_HOST
+ && pTab->fHostActive)
+ {
+ bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstAddr);
+ if ( fExact
+ || intnetR0IsMacAddrDummy(&pTab->HostMac)
+ || pTab->fHostPromiscuousEff)
+ {
+ cExactHits += fExact;
+ pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
+ }
+ }
+
+ /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
+ if ( fSrc != INTNETTRUNKDIR_WIRE
+ && pTab->fWireActive
+ && (!cExactHits || pTab->fWirePromiscuousEff)
+ )
+ pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
+
+ /* Grab the trunk if we're sending to it. */
+ if (pDstTab->fTrunkDst)
+ {
+ PINTNETTRUNKIF pTrunk = pTab->pTrunk;
+ pDstTab->pTrunk = pTrunk;
+ intnetR0BusyIncTrunk(pTrunk);
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ return pDstTab->cIfs
+ ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
+ : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
+}
+
+
+/**
+ * Create a destination table for a broadcast frame.
+ *
+ * @returns INTNETSWDECISION_BROADCAST.
+ * @param pNetwork The network to switch on.
+ * @param fSrc The frame source.
+ * @param pIfSender The sender interface, NULL if trunk. Used to
+ * prevent sending an echo to the sender.
+ * @param pDstTab The destination output table.
+ */
+static INTNETSWDECISION intnetR0NetworkSwitchBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
+ PINTNETDSTTAB pDstTab)
+{
+ AssertPtr(pDstTab);
+
+ /*
+ * Grab the spinlock first and record all active interfaces.
+ */
+ PINTNETMACTAB pTab = &pNetwork->MacTab;
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ pDstTab->fTrunkDst = 0;
+ pDstTab->pTrunk = 0;
+ pDstTab->cIfs = 0;
+
+ /* Regular interfaces. */
+ uint32_t iIfMac = pTab->cEntries;
+ while (iIfMac-- > 0)
+ {
+ if (pTab->paEntries[iIfMac].fActive)
+ {
+ PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
+ if (pIf != pIfSender)
+ {
+ uint32_t iIfDst = pDstTab->cIfs++;
+ pDstTab->aIfs[iIfDst].pIf = pIf;
+ pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
+ intnetR0BusyIncIf(pIf);
+ }
+ }
+ }
+
+ /* The trunk interface. */
+ if (pTab->fHostActive)
+ pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
+ if (pTab->fWireActive)
+ pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
+ pDstTab->fTrunkDst &= ~fSrc;
+ if (pDstTab->fTrunkDst)
+ {
+ PINTNETTRUNKIF pTrunk = pTab->pTrunk;
+ pDstTab->pTrunk = pTrunk;
+ intnetR0BusyIncTrunk(pTrunk);
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ return INTNETSWDECISION_BROADCAST;
+}
+
+
+/**
+ * Create a destination table with the trunk and any promiscuous interfaces.
+ *
+ * This is only used in a fallback case of the level-3 switching, so we can
+ * assume the wire as source and skip the sender interface filtering.
+ *
+ * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
+ * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
+ * @param pNetwork The network to switch on.
+ * @param fSrc The frame source.
+ * @param pDstTab The destination output table.
+ */
+static INTNETSWDECISION intnetR0NetworkSwitchTrunkAndPromisc(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
+{
+ Assert(fSrc == INTNETTRUNKDIR_WIRE);
+
+ /*
+ * Grab the spinlock first and do the switching.
+ */
+ PINTNETMACTAB pTab = &pNetwork->MacTab;
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ pDstTab->fTrunkDst = 0;
+ pDstTab->pTrunk = 0;
+ pDstTab->cIfs = 0;
+
+ /* Find promiscuous interfaces. */
+ uint32_t iIfMac = pTab->cEntries;
+ while (iIfMac-- > 0)
+ {
+ if ( pTab->paEntries[iIfMac].fActive
+ && ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
+ || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
+ )
+ {
+ PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
+ uint32_t iIfDst = pDstTab->cIfs++;
+ pDstTab->aIfs[iIfDst].pIf = pIf;
+ pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
+ intnetR0BusyIncIf(pIf);
+ }
+ }
+
+ /* The trunk interface. */
+ if (pTab->fHostActive)
+ pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
+ if (pTab->fWireActive)
+ pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
+ pDstTab->fTrunkDst &= ~fSrc;
+ if (pDstTab->fTrunkDst)
+ {
+ PINTNETTRUNKIF pTrunk = pTab->pTrunk;
+ pDstTab->pTrunk = pTrunk;
+ intnetR0BusyIncTrunk(pTrunk);
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ return !pDstTab->cIfs
+ ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK)
+ : (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST);
+}
+
+
+/**
+ * Create a destination table for a trunk frame.
+ *
+ * @returns INTNETSWDECISION_BROADCAST.
+ * @param pNetwork The network to switch on.
+ * @param fSrc The frame source.
+ * @param pDstTab The destination output table.
+ */
+static INTNETSWDECISION intnetR0NetworkSwitchTrunk(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
+{
+ AssertPtr(pDstTab);
+
+ /*
+ * Grab the spinlock first and record all active interfaces.
+ */
+ PINTNETMACTAB pTab= &pNetwork->MacTab;
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ pDstTab->fTrunkDst = 0;
+ pDstTab->pTrunk = 0;
+ pDstTab->cIfs = 0;
+
+ /* The trunk interface. */
+ if (pTab->fHostActive)
+ pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
+ if (pTab->fWireActive)
+ pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
+ pDstTab->fTrunkDst &= ~fSrc;
+ if (pDstTab->fTrunkDst)
+ {
+ PINTNETTRUNKIF pTrunk = pTab->pTrunk;
+ pDstTab->pTrunk = pTrunk;
+ intnetR0BusyIncTrunk(pTrunk);
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ return pDstTab->fTrunkDst ? INTNETSWDECISION_TRUNK : INTNETSWDECISION_DROP;
+}
+
+
+/**
+ * Wrapper around RTMemAlloc for allocating a destination table.
+ *
+ * @returns VINF_SUCCESS or VERR_NO_MEMORY.
+ * @param cEntries The size given as an entry count.
+ * @param ppDstTab Where to store the pointer (always).
+ */
+DECLINLINE(int) intnetR0AllocDstTab(uint32_t cEntries, PINTNETDSTTAB *ppDstTab)
+{
+ PINTNETDSTTAB pDstTab;
+ *ppDstTab = pDstTab = (PINTNETDSTTAB)RTMemAlloc(RT_UOFFSETOF_DYN(INTNETDSTTAB, aIfs[cEntries]));
+ if (RT_UNLIKELY(!pDstTab))
+ return VERR_NO_MEMORY;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Ensures that there is space for another interface in the MAC address lookup
+ * table as well as all the destination tables.
+ *
+ * The caller must own the create/open/destroy mutex.
+ *
+ * @returns VINF_SUCCESS, VERR_NO_MEMORY or VERR_OUT_OF_RANGE.
+ * @param pNetwork The network to operate on.
+ */
+static int intnetR0NetworkEnsureTabSpace(PINTNETNETWORK pNetwork)
+{
+ /*
+ * The cEntries and cEntriesAllocated members are only updated while
+ * owning the big mutex, so we only need the spinlock when doing the
+ * actual table replacing.
+ */
+ PINTNETMACTAB pTab = &pNetwork->MacTab;
+ int rc = VINF_SUCCESS;
+ AssertReturn(pTab->cEntries <= pTab->cEntriesAllocated, VERR_INTERNAL_ERROR_2);
+ if (pTab->cEntries + 1 > pTab->cEntriesAllocated)
+ {
+ uint32_t const cAllocated = pTab->cEntriesAllocated + INTNET_GROW_DSTTAB_SIZE;
+ if (cAllocated <= INTNET_MAX_IFS)
+ {
+ /*
+ * Resize the destination tables first, this can be kind of tedious.
+ */
+ for (uint32_t i = 0; i < pTab->cEntries; i++)
+ {
+ PINTNETIF pIf = pTab->paEntries[i].pIf; AssertPtr(pIf);
+ PINTNETDSTTAB pNew;
+ rc = intnetR0AllocDstTab(cAllocated, &pNew);
+ if (RT_FAILURE(rc))
+ break;
+
+ for (;;)
+ {
+ PINTNETDSTTAB pOld = pIf->pDstTab;
+ if ( pOld
+ && ASMAtomicCmpXchgPtr(&pIf->pDstTab, pNew, pOld))
+ {
+ RTMemFree(pOld);
+ break;
+ }
+ intnetR0BusyWait(pNetwork, &pIf->cBusy);
+ }
+ }
+
+ /*
+ * The trunk.
+ */
+ if ( RT_SUCCESS(rc)
+ && pNetwork->MacTab.pTrunk)
+ {
+ AssertCompileAdjacentMembers(INTNETTRUNKIF, apTaskDstTabs, apIntDstTabs);
+ PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
+ PINTNETDSTTAB * const ppEndDstTab = &pTrunk->apIntDstTabs[pTrunk->cIntDstTabs];
+ for (PINTNETDSTTAB *ppDstTab = &pTrunk->apTaskDstTabs[0];
+ ppDstTab != ppEndDstTab && RT_SUCCESS(rc);
+ ppDstTab++)
+ {
+ PINTNETDSTTAB pNew;
+ rc = intnetR0AllocDstTab(cAllocated, &pNew);
+ if (RT_FAILURE(rc))
+ break;
+
+ for (;;)
+ {
+ RTSpinlockAcquire(pTrunk->hDstTabSpinlock);
+ void *pvOld = *ppDstTab;
+ if (pvOld)
+ *ppDstTab = pNew;
+ RTSpinlockRelease(pTrunk->hDstTabSpinlock);
+ if (pvOld)
+ {
+ RTMemFree(pvOld);
+ break;
+ }
+ intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
+ }
+ }
+ }
+
+ /*
+ * The MAC Address table itself.
+ */
+ if (RT_SUCCESS(rc))
+ {
+ PINTNETMACTABENTRY paNew = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * cAllocated);
+ if (paNew)
+ {
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ PINTNETMACTABENTRY paOld = pTab->paEntries;
+ uint32_t i = pTab->cEntries;
+ while (i-- > 0)
+ {
+ paNew[i] = paOld[i];
+
+ paOld[i].fActive = false;
+ paOld[i].pIf = NULL;
+ }
+
+ pTab->paEntries = paNew;
+ pTab->cEntriesAllocated = cAllocated;
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+ RTMemFree(paOld);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+ }
+ }
+ else
+ rc = VERR_OUT_OF_RANGE;
+ }
+ return rc;
+}
+
+
+
+
+#ifdef INTNET_WITH_DHCP_SNOOPING
+
+/**
+ * Snoops IP assignments and releases from the DHCPv4 traffic.
+ *
+ * The caller is responsible for making sure this traffic between the
+ * BOOTPS and BOOTPC ports and validate the IP header. The UDP packet
+ * need not be validated beyond the ports.
+ *
+ * @param pNetwork The network this frame was seen on.
+ * @param pIpHdr Pointer to a valid IP header. This is for pseudo
+ * header validation, so only the minimum header size
+ * needs to be available and valid here.
+ * @param pUdpHdr Pointer to the UDP header in the frame.
+ * @param cbUdpPkt What's left of the frame when starting at the UDP header.
+ * @param fGso Set if this is a GSO frame, clear if regular.
+ */
+static void intnetR0NetworkSnoopDhcp(PINTNETNETWORK pNetwork, PCRTNETIPV4 pIpHdr, PCRTNETUDP pUdpHdr, uint32_t cbUdpPkt)
+{
+ /*
+ * Check if the DHCP message is valid and get the type.
+ */
+ if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
+ {
+ Log6(("Bad UDP packet\n"));
+ return;
+ }
+ PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
+ uint8_t MsgType;
+ if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &MsgType))
+ {
+ Log6(("Bad DHCP packet\n"));
+ return;
+ }
+
+#ifdef LOG_ENABLED
+ /*
+ * Log it.
+ */
+ const char *pszType = "unknown";
+ switch (MsgType)
+ {
+ case RTNET_DHCP_MT_DISCOVER: pszType = "discover"; break;
+ case RTNET_DHCP_MT_OFFER: pszType = "offer"; break;
+ case RTNET_DHCP_MT_REQUEST: pszType = "request"; break;
+ case RTNET_DHCP_MT_DECLINE: pszType = "decline"; break;
+ case RTNET_DHCP_MT_ACK: pszType = "ack"; break;
+ case RTNET_DHCP_MT_NAC: pszType = "nac"; break;
+ case RTNET_DHCP_MT_RELEASE: pszType = "release"; break;
+ case RTNET_DHCP_MT_INFORM: pszType = "inform"; break;
+ }
+ Log6(("DHCP msg: %d (%s) client %.6Rhxs ciaddr=%d.%d.%d.%d yiaddr=%d.%d.%d.%d\n", MsgType, pszType, &pDhcp->bp_chaddr,
+ pDhcp->bp_ciaddr.au8[0], pDhcp->bp_ciaddr.au8[1], pDhcp->bp_ciaddr.au8[2], pDhcp->bp_ciaddr.au8[3],
+ pDhcp->bp_yiaddr.au8[0], pDhcp->bp_yiaddr.au8[1], pDhcp->bp_yiaddr.au8[2], pDhcp->bp_yiaddr.au8[3]));
+#endif /* LOG_EANBLED */
+
+ /*
+ * Act upon the message.
+ */
+ switch (MsgType)
+ {
+#if 0
+ case RTNET_DHCP_MT_REQUEST:
+ /** @todo Check for valid non-broadcast requests w/ IP for any of the MACs we
+ * know, and add the IP to the cache. */
+ break;
+#endif
+
+
+ /*
+ * Lookup the interface by its MAC address and insert the IPv4 address into the cache.
+ * Delete the old client address first, just in case it changed in a renewal.
+ */
+ case RTNET_DHCP_MT_ACK:
+ if (intnetR0IPv4AddrIsGood(pDhcp->bp_yiaddr))
+ {
+ PINTNETIF pMatchingIf = NULL;
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ uint32_t iIf = pNetwork->MacTab.cEntries;
+ while (iIf-- > 0)
+ {
+ PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
+ if ( intnetR0IfHasMacAddr(pCur)
+ && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
+ {
+ intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
+ (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_ACK");
+ if (!pMatchingIf)
+ {
+ pMatchingIf = pCur;
+ intnetR0BusyIncIf(pMatchingIf);
+ }
+ }
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+ if (pMatchingIf)
+ {
+ intnetR0IfAddrCacheAdd(pMatchingIf, kIntNetAddrType_IPv4,
+ (PCRTNETADDRU)&pDhcp->bp_yiaddr, "DHCP_MT_ACK");
+ intnetR0BusyDecIf(pMatchingIf);
+ }
+ }
+ return;
+
+
+ /*
+ * Lookup the interface by its MAC address and remove the IPv4 address(es) from the cache.
+ */
+ case RTNET_DHCP_MT_RELEASE:
+ {
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ uint32_t iIf = pNetwork->MacTab.cEntries;
+ while (iIf-- > 0)
+ {
+ PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
+ if ( intnetR0IfHasMacAddr(pCur)
+ && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
+ {
+ intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
+ (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
+ intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
+ (PCRTNETADDRU)&pDhcp->bp_yiaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
+ }
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ break;
+ }
+ }
+
+}
+
+
+/**
+ * Worker for intnetR0TrunkIfSnoopAddr that takes care of what
+ * is likely to be a DHCP message.
+ *
+ * The caller has already check that the UDP source and destination ports
+ * are BOOTPS or BOOTPC.
+ *
+ * @param pNetwork The network this frame was seen on.
+ * @param pSG The gather list for the frame.
+ */
+static void intnetR0TrunkIfSnoopDhcp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
+{
+ /*
+ * Get a pointer to a linear copy of the full packet, using the
+ * temporary buffer if necessary.
+ */
+ PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
+ uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
+ if (pSG->cSegsUsed > 1)
+ {
+ cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
+ Log6(("intnetR0TrunkIfSnoopDhcp: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
+ if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
+ return;
+ //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
+ pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
+ }
+
+ /*
+ * Validate the IP header and find the UDP packet.
+ */
+ if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fChecksum*/))
+ {
+ Log(("intnetR0TrunkIfSnoopDhcp: bad ip header\n"));
+ return;
+ }
+ uint32_t cbIpHdr = pIpHdr->ip_hl * 4;
+
+ /*
+ * Hand it over to the common DHCP snooper.
+ */
+ intnetR0NetworkSnoopDhcp(pNetwork, pIpHdr, (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr), cbPacket - cbIpHdr);
+}
+
+#endif /* INTNET_WITH_DHCP_SNOOPING */
+
+
+/**
+ * Snoops up source addresses from ARP requests and purge these from the address
+ * caches.
+ *
+ * The purpose of this purging is to get rid of stale addresses.
+ *
+ * @param pNetwork The network this frame was seen on.
+ * @param pSG The gather list for the frame.
+ */
+static void intnetR0TrunkIfSnoopArp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
+{
+ /*
+ * Check the minimum size first.
+ */
+ if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
+ return;
+
+ /*
+ * Copy to temporary buffer if necessary.
+ */
+ uint32_t cbPacket = RT_MIN(pSG->cbTotal, sizeof(RTNETARPIPV4));
+ PCRTNETARPIPV4 pArpIPv4 = (PCRTNETARPIPV4)((uintptr_t)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
+ if ( pSG->cSegsUsed != 1
+ && pSG->aSegs[0].cb < cbPacket)
+ {
+ if ( (pSG->fFlags & (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP))
+ != (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP)
+ && !intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
+ return;
+ pArpIPv4 = (PCRTNETARPIPV4)pNetwork->pbTmp;
+ }
+
+ /*
+ * Ignore packets which doesn't interest us or we perceive as malformed.
+ */
+ if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
+ || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
+ || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
+ || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
+ return;
+ uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
+ if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
+ && ar_oper != RTNET_ARPOP_REPLY))
+ {
+ Log6(("ts-ar: op=%#x\n", ar_oper));
+ return;
+ }
+
+ /*
+ * Delete the source address if it's OK.
+ */
+ if ( !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_sha)
+ && ( pArpIPv4->ar_sha.au16[0]
+ || pArpIPv4->ar_sha.au16[1]
+ || pArpIPv4->ar_sha.au16[2])
+ && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
+ {
+ Log6(("ts-ar: %d.%d.%d.%d / %.6Rhxs\n", pArpIPv4->ar_spa.au8[0], pArpIPv4->ar_spa.au8[1],
+ pArpIPv4->ar_spa.au8[2], pArpIPv4->ar_spa.au8[3], &pArpIPv4->ar_sha));
+ intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_spa,
+ kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_spa), "tif/arp");
+ }
+}
+
+
+#ifdef INTNET_WITH_DHCP_SNOOPING
+/**
+ * Snoop up addresses from ARP and DHCP traffic from frames coming
+ * over the trunk connection.
+ *
+ * The caller is responsible for do some basic filtering before calling
+ * this function.
+ * For IPv4 this means checking against the minimum DHCPv4 frame size.
+ *
+ * @param pNetwork The network.
+ * @param pSG The SG list for the frame.
+ * @param EtherType The Ethertype of the frame.
+ */
+static void intnetR0TrunkIfSnoopAddr(PINTNETNETWORK pNetwork, PCINTNETSG pSG, uint16_t EtherType)
+{
+ switch (EtherType)
+ {
+ case RTNET_ETHERTYPE_IPV4:
+ {
+ uint32_t cbIpHdr;
+ uint8_t b;
+
+ Assert(pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN);
+ if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN)
+ {
+ /* check if the protocol is UDP */
+ PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
+ if (pIpHdr->ip_p != RTNETIPV4_PROT_UDP)
+ return;
+
+ /* get the TCP header length */
+ cbIpHdr = pIpHdr->ip_hl * 4;
+ }
+ else
+ {
+ /* check if the protocol is UDP */
+ if ( intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETIPV4, ip_p))
+ != RTNETIPV4_PROT_UDP)
+ return;
+
+ /* get the TCP header length */
+ b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + 0); /* (IPv4 first byte, a bitfield) */
+ cbIpHdr = (b & 0x0f) * 4;
+ }
+ if (cbIpHdr < RTNETIPV4_MIN_LEN)
+ return;
+
+ /* compare the ports. */
+ if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + cbIpHdr + RTNETUDP_MIN_LEN)
+ {
+ PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR) + cbIpHdr);
+ if ( ( RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPS
+ && RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS)
+ || ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPC
+ && RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC))
+ return;
+ }
+ else
+ {
+ /* get the lower byte of the UDP source port number. */
+ b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_UOFFSETOF(RTNETUDP, uh_sport) + 1);
+ if ( b != RTNETIPV4_PORT_BOOTPS
+ && b != RTNETIPV4_PORT_BOOTPC)
+ return;
+ uint8_t SrcPort = b;
+ b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_UOFFSETOF(RTNETUDP, uh_sport));
+ if (b)
+ return;
+
+ /* get the lower byte of the UDP destination port number. */
+ b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_UOFFSETOF(RTNETUDP, uh_dport) + 1);
+ if ( b != RTNETIPV4_PORT_BOOTPS
+ && b != RTNETIPV4_PORT_BOOTPC)
+ return;
+ if (b == SrcPort)
+ return;
+ b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_UOFFSETOF(RTNETUDP, uh_dport));
+ if (b)
+ return;
+ }
+ intnetR0TrunkIfSnoopDhcp(pNetwork, pSG);
+ break;
+ }
+
+ case RTNET_ETHERTYPE_ARP:
+ intnetR0TrunkIfSnoopArp(pNetwork, pSG);
+ break;
+ }
+}
+#endif /* INTNET_WITH_DHCP_SNOOPING */
+
+/**
+ * Deals with an IPv6 packet.
+ *
+ * This will fish out the source IP address and add it to the cache.
+ * Then it will look for DHCPRELEASE requests (?) and anything else
+ * that we might find useful later.
+ *
+ * @param pIf The interface that's sending the frame.
+ * @param pIpHdr Pointer to the IPv4 header in the frame.
+ * @param cbPacket The size of the packet, or more correctly the
+ * size of the frame without the ethernet header.
+ * @param fGso Set if this is a GSO frame, clear if regular.
+ */
+static void intnetR0IfSnoopIPv6SourceAddr(PINTNETIF pIf, PCRTNETIPV6 pIpHdr, uint32_t cbPacket, bool fGso)
+{
+ NOREF(fGso);
+
+ /*
+ * Check the header size first to prevent access invalid data.
+ */
+ if (cbPacket < RTNETIPV6_MIN_LEN)
+ return;
+
+ /*
+ * If the source address is good (not multicast) and
+ * not already in the address cache of the sender, add it.
+ */
+ RTNETADDRU Addr;
+ Addr.IPv6 = pIpHdr->ip6_src;
+
+ if ( intnetR0IPv6AddrIsGood(Addr.IPv6) && (pIpHdr->ip6_hlim == 0xff)
+ && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv6], &Addr, sizeof(Addr.IPv6)) < 0)
+ {
+ intnetR0IfAddrCacheAdd(pIf, kIntNetAddrType_IPv6, &Addr, "if/ipv6");
+ }
+}
+
+
+/**
+ * Deals with an IPv4 packet.
+ *
+ * This will fish out the source IP address and add it to the cache.
+ * Then it will look for DHCPRELEASE requests (?) and anything else
+ * that we might find useful later.
+ *
+ * @param pIf The interface that's sending the frame.
+ * @param pIpHdr Pointer to the IPv4 header in the frame.
+ * @param cbPacket The size of the packet, or more correctly the
+ * size of the frame without the ethernet header.
+ * @param fGso Set if this is a GSO frame, clear if regular.
+ */
+static void intnetR0IfSnoopIPv4SourceAddr(PINTNETIF pIf, PCRTNETIPV4 pIpHdr, uint32_t cbPacket, bool fGso)
+{
+ /*
+ * Check the header size first to prevent access invalid data.
+ */
+ if (cbPacket < RTNETIPV4_MIN_LEN)
+ return;
+ uint32_t cbHdr = (uint32_t)pIpHdr->ip_hl * 4;
+ if ( cbHdr < RTNETIPV4_MIN_LEN
+ || cbPacket < cbHdr)
+ return;
+
+ /*
+ * If the source address is good (not broadcast or my network) and
+ * not already in the address cache of the sender, add it. Validate
+ * the IP header before adding it.
+ */
+ bool fValidatedIpHdr = false;
+ RTNETADDRU Addr;
+ Addr.IPv4 = pIpHdr->ip_src;
+ if ( intnetR0IPv4AddrIsGood(Addr.IPv4)
+ && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv4], &Addr, sizeof(Addr.IPv4)) < 0)
+ {
+ if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
+ {
+ Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header\n"));
+ return;
+ }
+
+ intnetR0IfAddrCacheAddIt(pIf, kIntNetAddrType_IPv4, &Addr, "if/ipv4");
+ fValidatedIpHdr = true;
+ }
+
+#ifdef INTNET_WITH_DHCP_SNOOPING
+ /*
+ * Check for potential DHCP packets.
+ */
+ if ( pIpHdr->ip_p == RTNETIPV4_PROT_UDP /* DHCP is UDP. */
+ && cbPacket >= cbHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN /* Min DHCP packet len. */
+ && !fGso) /* GSO is not applicable to DHCP traffic. */
+ {
+ PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pIpHdr + cbHdr);
+ if ( ( RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPS
+ || RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPS)
+ && ( RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPC
+ || RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPC))
+ {
+ if ( fValidatedIpHdr
+ || RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
+ intnetR0NetworkSnoopDhcp(pIf->pNetwork, pIpHdr, pUdpHdr, cbPacket - cbHdr);
+ else
+ Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header (dhcp)\n"));
+ }
+ }
+#endif /* INTNET_WITH_DHCP_SNOOPING */
+}
+
+
+/**
+ * Snoop up source addresses from an ARP request or reply.
+ *
+ * @param pIf The interface that's sending the frame.
+ * @param pHdr The ARP header.
+ * @param cbPacket The size of the packet (might be larger than the ARP
+ * request 'cause of min ethernet frame size).
+ * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
+ * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
+ */
+static void intnetR0IfSnoopArpAddr(PINTNETIF pIf, PCRTNETARPIPV4 pArpIPv4, uint32_t cbPacket, uint16_t *pfSgFlags)
+{
+ /*
+ * Ignore packets which doesn't interest us or we perceive as malformed.
+ */
+ if (RT_UNLIKELY(cbPacket < sizeof(RTNETARPIPV4)))
+ return;
+ if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
+ || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
+ || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
+ || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
+ return;
+ uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
+ if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
+ && ar_oper != RTNET_ARPOP_REPLY))
+ {
+ Log6(("ar_oper=%#x\n", ar_oper));
+ return;
+ }
+
+ /*
+ * Tag the SG as ARP IPv4 for later editing, then check for addresses
+ * which can be removed or added to the address cache of the sender.
+ */
+ *pfSgFlags |= INTNETSG_FLAGS_ARP_IPV4;
+
+ if ( ar_oper == RTNET_ARPOP_REPLY
+ && !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_tha)
+ && ( pArpIPv4->ar_tha.au16[0]
+ || pArpIPv4->ar_tha.au16[1]
+ || pArpIPv4->ar_tha.au16[2])
+ && intnetR0IPv4AddrIsGood(pArpIPv4->ar_tpa))
+ intnetR0IfAddrCacheDelete(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv4],
+ (PCRTNETADDRU)&pArpIPv4->ar_tpa, sizeof(RTNETADDRIPV4), "if/arp");
+
+ if ( !memcmp(&pArpIPv4->ar_sha, &pIf->MacAddr, sizeof(RTMAC))
+ && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
+ {
+ intnetR0IfAddrCacheAdd(pIf, kIntNetAddrType_IPv4, (PCRTNETADDRU)&pArpIPv4->ar_spa, "if/arp");
+ }
+}
+
+
+
+/**
+ * Checks packets send by a normal interface for new network
+ * layer addresses.
+ *
+ * @param pIf The interface that's sending the frame.
+ * @param pbFrame The frame.
+ * @param cbFrame The size of the frame.
+ * @param fGso Set if this is a GSO frame, clear if regular.
+ * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
+ * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
+ */
+static void intnetR0IfSnoopAddr(PINTNETIF pIf, uint8_t const *pbFrame, uint32_t cbFrame, bool fGso, uint16_t *pfSgFlags)
+{
+ /*
+ * Fish out the ethertype and look for stuff we can handle.
+ */
+ if (cbFrame <= sizeof(RTNETETHERHDR))
+ return;
+ cbFrame -= sizeof(RTNETETHERHDR);
+
+ uint16_t EtherType = RT_H2BE_U16(((PCRTNETETHERHDR)pbFrame)->EtherType);
+ switch (EtherType)
+ {
+ case RTNET_ETHERTYPE_IPV4:
+ intnetR0IfSnoopIPv4SourceAddr(pIf, (PCRTNETIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
+ break;
+
+ case RTNET_ETHERTYPE_IPV6:
+ intnetR0IfSnoopIPv6SourceAddr(pIf, (PCRTNETIPV6)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
+ break;
+
+#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
+ case RTNET_ETHERTYPE_IPX_1:
+ case RTNET_ETHERTYPE_IPX_2:
+ case RTNET_ETHERTYPE_IPX_3:
+ intnetR0IfSnoopIpxSourceAddr(pIf, (PCINTNETIPX)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
+ break;
+#endif
+ case RTNET_ETHERTYPE_ARP:
+ intnetR0IfSnoopArpAddr(pIf, (PCRTNETARPIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
+ break;
+ }
+}
+
+
+/**
+ * Writes a frame packet to the ring buffer.
+ *
+ * @returns VBox status code.
+ * @param pBuf The buffer.
+ * @param pRingBuf The ring buffer to read from.
+ * @param pSG The gather list.
+ * @param pNewDstMac Set the destination MAC address to the address if specified.
+ */
+static int intnetR0RingWriteFrame(PINTNETRINGBUF pRingBuf, PCINTNETSG pSG, PCRTMAC pNewDstMac)
+{
+ PINTNETHDR pHdr = NULL; /* shut up gcc*/
+ void *pvDst = NULL; /* ditto */
+ int rc;
+ if (pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
+ rc = IntNetRingAllocateFrame(pRingBuf, pSG->cbTotal, &pHdr, &pvDst);
+ else
+ rc = IntNetRingAllocateGsoFrame(pRingBuf, pSG->cbTotal, &pSG->GsoCtx, &pHdr, &pvDst);
+ if (RT_SUCCESS(rc))
+ {
+ IntNetSgRead(pSG, pvDst);
+ if (pNewDstMac)
+ ((PRTNETETHERHDR)pvDst)->DstMac = *pNewDstMac;
+
+ IntNetRingCommitFrame(pRingBuf, pHdr);
+ return VINF_SUCCESS;
+ }
+ return rc;
+}
+
+
+/**
+ * Notifies consumers of incoming data from @a pIf that data is available.
+ */
+DECL_FORCE_INLINE(void) intnetR0IfNotifyRecv(PINTNETIF pIf)
+{
+#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
+ RTSemEventSignal(pIf->hRecvEvent);
+#else
+ pIf->pfnRecvAvail(pIf->hIf, pIf->pvUserRecvAvail);
+#endif
+}
+
+
+/**
+ * Sends a frame to a specific interface.
+ *
+ * @param pIf The interface.
+ * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
+ * @param pSG The gather buffer which data is being sent to the interface.
+ * @param pNewDstMac Set the destination MAC address to the address if specified.
+ */
+static void intnetR0IfSend(PINTNETIF pIf, PINTNETIF pIfSender, PINTNETSG pSG, PCRTMAC pNewDstMac)
+{
+ /*
+ * Grab the receive/producer lock and copy over the frame.
+ */
+ RTSpinlockAcquire(pIf->hRecvInSpinlock);
+ int rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
+ RTSpinlockRelease(pIf->hRecvInSpinlock);
+ if (RT_SUCCESS(rc))
+ {
+ pIf->cYields = 0;
+ intnetR0IfNotifyRecv(pIf);
+ return;
+ }
+
+ Log(("intnetR0IfSend: overflow cb=%d hIf=%RX32\n", pSG->cbTotal, pIf->hIf));
+
+ /*
+ * Scheduling hack, for unicore machines primarily.
+ */
+ if ( pIf->fActive
+ && pIf->cYields < 4 /* just twice */
+ && pIfSender /* but not if it's from the trunk */
+ && RTThreadPreemptIsEnabled(NIL_RTTHREAD)
+ )
+ {
+ unsigned cYields = 2;
+ while (--cYields > 0)
+ {
+ intnetR0IfNotifyRecv(pIf);
+ RTThreadYield();
+
+ RTSpinlockAcquire(pIf->hRecvInSpinlock);
+ rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
+ RTSpinlockRelease(pIf->hRecvInSpinlock);
+ if (RT_SUCCESS(rc))
+ {
+ STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsOk);
+ intnetR0IfNotifyRecv(pIf);
+ return;
+ }
+ pIf->cYields++;
+ }
+ STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsNok);
+ }
+
+ /* ok, the frame is lost. */
+ STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatLost);
+ intnetR0IfNotifyRecv(pIf);
+}
+
+
+/**
+ * Fallback path that does the GSO segmenting before passing the frame on to the
+ * trunk interface.
+ *
+ * The caller holds the trunk lock.
+ *
+ * @param pThis The trunk.
+ * @param pIfSender The IF sending the frame.
+ * @param pSG Pointer to the gather list.
+ * @param fDst The destination flags.
+ */
+static int intnetR0TrunkIfSendGsoFallback(PINTNETTRUNKIF pThis, PINTNETIF pIfSender, PINTNETSG pSG, uint32_t fDst)
+{
+ /*
+ * Since we're only using this for GSO frame coming from the internal
+ * network interfaces and never the trunk, we can assume there is only
+ * one segment. This simplifies the code quite a bit.
+ */
+ Assert(PDMNetGsoIsValid(&pSG->GsoCtx, sizeof(pSG->GsoCtx), pSG->cbTotal));
+ AssertReturn(pSG->cSegsUsed == 1, VERR_INTERNAL_ERROR_4);
+
+ union
+ {
+ uint8_t abBuf[sizeof(INTNETSG) + sizeof(INTNETSEG)];
+ INTNETSG SG;
+ } u;
+
+ /** @todo We have to adjust MSS so it does not exceed the value configured for
+ * the host's interface.
+ */
+
+ /*
+ * Carve out the frame segments with the header and frame in different
+ * scatter / gather segments.
+ */
+ uint32_t const cSegs = PDMNetGsoCalcSegmentCount(&pSG->GsoCtx, pSG->cbTotal);
+ for (uint32_t iSeg = 0; iSeg < cSegs; iSeg++)
+ {
+ uint32_t cbSegPayload, cbSegHdrs;
+ uint32_t offSegPayload = PDMNetGsoCarveSegment(&pSG->GsoCtx, (uint8_t *)pSG->aSegs[0].pv, pSG->cbTotal, iSeg, cSegs,
+ pIfSender->abGsoHdrs, &cbSegHdrs, &cbSegPayload);
+
+ IntNetSgInitTempSegs(&u.SG, cbSegHdrs + cbSegPayload, 2, 2);
+ u.SG.aSegs[0].Phys = NIL_RTHCPHYS;
+ u.SG.aSegs[0].pv = pIfSender->abGsoHdrs;
+ u.SG.aSegs[0].cb = cbSegHdrs;
+ u.SG.aSegs[1].Phys = NIL_RTHCPHYS;
+ u.SG.aSegs[1].pv = (uint8_t *)pSG->aSegs[0].pv + offSegPayload;
+ u.SG.aSegs[1].cb = (uint32_t)cbSegPayload;
+
+ int rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, &u.SG, fDst);
+ if (RT_FAILURE(rc))
+ return rc;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Checks if any of the given trunk destinations can handle this kind of GSO SG.
+ *
+ * @returns true if it can, false if it cannot.
+ * @param pThis The trunk.
+ * @param pSG The scatter / gather buffer.
+ * @param fDst The destination mask.
+ */
+DECLINLINE(bool) intnetR0TrunkIfCanHandleGsoFrame(PINTNETTRUNKIF pThis, PINTNETSG pSG, uint32_t fDst)
+{
+ uint8_t u8Type = pSG->GsoCtx.u8Type;
+ AssertReturn(u8Type < 32, false); /* paranoia */
+ uint32_t fMask = RT_BIT_32(u8Type);
+
+ if (fDst == INTNETTRUNKDIR_HOST)
+ return !!(pThis->fHostGsoCapabilites & fMask);
+ if (fDst == INTNETTRUNKDIR_WIRE)
+ return !!(pThis->fWireGsoCapabilites & fMask);
+ Assert(fDst == (INTNETTRUNKDIR_WIRE | INTNETTRUNKDIR_HOST));
+ return !!(pThis->fHostGsoCapabilites & pThis->fWireGsoCapabilites & fMask);
+}
+
+
+/**
+ * Calculates the checksum of a full ipv6 frame.
+ *
+ * @returns 16-bit hecksum value.
+ * @param pIpHdr The IPv6 header (network endian (big)).
+ * @param bProtocol The protocol number. This can be the same as the
+ * ip6_nxt field, but doesn't need to be.
+ * @param cbPkt The packet size (host endian of course). This can
+ * be the same as the ip6_plen field, but as with @a
+ * bProtocol it won't be when extension headers are
+ * present. For UDP this will be uh_ulen converted to
+ * host endian.
+ */
+static uint16_t computeIPv6FullChecksum(PCRTNETIPV6 pIpHdr)
+{
+ uint16_t const *data;
+ int len = RT_BE2H_U16(pIpHdr->ip6_plen);
+ uint32_t sum = RTNetIPv6PseudoChecksum(pIpHdr);
+
+ /* add the payload */
+ data = (uint16_t *) (pIpHdr + 1);
+ while(len > 1)
+ {
+ sum += *(data);
+ data++;
+ len -= 2;
+ }
+
+ if(len > 0)
+ sum += *((uint8_t *) data);
+
+ while(sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return (uint16_t) ~sum;
+}
+
+
+/**
+ * Rewrite VM MAC address with shared host MAC address inside IPv6
+ * Neighbor Discovery datagrams.
+ */
+static void intnetR0TrunkSharedMacEditIPv6FromIntNet(PINTNETTRUNKIF pThis, PINTNETIF pIfSender,
+ PRTNETETHERHDR pEthHdr, uint32_t cb)
+{
+ if (RT_UNLIKELY(cb < sizeof(*pEthHdr)))
+ return;
+
+ /* have IPv6 header */
+ PRTNETIPV6 pIPv6 = (PRTNETIPV6)(pEthHdr + 1);
+ cb -= sizeof(*pEthHdr);
+ if (RT_UNLIKELY(cb < sizeof(*pIPv6)))
+ return;
+
+ if ( pIPv6->ip6_nxt != RTNETIPV6_PROT_ICMPV6
+ || pIPv6->ip6_hlim != 0xff)
+ return;
+
+ PRTNETICMPV6HDR pICMPv6 = (PRTNETICMPV6HDR)(pIPv6 + 1);
+ cb -= sizeof(*pIPv6);
+ if (RT_UNLIKELY(cb < sizeof(*pICMPv6)))
+ return;
+
+ uint32_t hdrlen = 0;
+ uint8_t llaopt = RTNETIPV6_ICMP_ND_SLLA_OPT;
+
+ uint8_t type = pICMPv6->icmp6_type;
+ switch (type)
+ {
+ case RTNETIPV6_ICMP_TYPE_RS:
+ hdrlen = 8;
+ break;
+
+ case RTNETIPV6_ICMP_TYPE_RA:
+ hdrlen = 16;
+ break;
+
+ case RTNETIPV6_ICMP_TYPE_NS:
+ hdrlen = 24;
+ break;
+
+ case RTNETIPV6_ICMP_TYPE_NA:
+ hdrlen = 24;
+ llaopt = RTNETIPV6_ICMP_ND_TLLA_OPT;
+ break;
+
+ default:
+ return;
+ }
+
+ AssertReturnVoid(hdrlen > 0);
+ if (RT_UNLIKELY(cb < hdrlen))
+ return;
+
+ if (RT_UNLIKELY(pICMPv6->icmp6_code != 0))
+ return;
+
+ PRTNETNDP_LLA_OPT pLLAOpt = NULL;
+ char *pOpt = (char *)pICMPv6 + hdrlen;
+ cb -= hdrlen;
+
+ while (cb >= 8)
+ {
+ uint8_t opt = ((uint8_t *)pOpt)[0];
+ uint32_t optlen = (uint32_t)((uint8_t *)pOpt)[1] * 8;
+ if (RT_UNLIKELY(cb < optlen))
+ return;
+
+ if (opt == llaopt)
+ {
+ if (RT_UNLIKELY(optlen != 8))
+ return;
+ pLLAOpt = (PRTNETNDP_LLA_OPT)pOpt;
+ break;
+ }
+
+ pOpt += optlen;
+ cb -= optlen;
+ }
+
+ if (pLLAOpt == NULL)
+ return;
+
+ if (memcmp(&pLLAOpt->lla, &pIfSender->MacAddr, sizeof(RTMAC)) != 0)
+ return;
+
+ /* overwrite VM's MAC with host's MAC */
+ pLLAOpt->lla = pThis->MacAddr;
+
+ /* recompute the checksum */
+ pICMPv6->icmp6_cksum = 0;
+ pICMPv6->icmp6_cksum = computeIPv6FullChecksum(pIPv6);
+}
+
+
+/**
+ * Sends a frame down the trunk.
+ *
+ * @param pThis The trunk.
+ * @param pNetwork The network the frame is being sent to.
+ * @param pIfSender The IF sending the frame. Used for MAC address
+ * checks in shared MAC mode.
+ * @param fDst The destination flags.
+ * @param pSG Pointer to the gather list.
+ */
+static void intnetR0TrunkIfSend(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork, PINTNETIF pIfSender,
+ uint32_t fDst, PINTNETSG pSG)
+{
+ /*
+ * Quick sanity check.
+ */
+ AssertPtr(pThis);
+ AssertPtr(pNetwork);
+ AssertPtr(pIfSender);
+ AssertPtr(pSG);
+ Assert(fDst);
+ AssertReturnVoid(pThis->pIfPort);
+
+ /*
+ * Edit the frame if we're sharing the MAC address with the host on the wire.
+ *
+ * If the frame is headed for both the host and the wire, we'll have to send
+ * it to the host before making any modifications, and force the OS specific
+ * backend to copy it. We do this by marking it as TEMP (which is always the
+ * case right now).
+ */
+ if ( (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
+ && (fDst & INTNETTRUNKDIR_WIRE))
+ {
+ /*
+ * Dispatch it to the host before making changes.
+ */
+ if (fDst & INTNETTRUNKDIR_HOST)
+ {
+ Assert(pSG->fFlags & INTNETSG_FLAGS_TEMP); /* make sure copy is forced */
+ intnetR0TrunkIfSend(pThis, pNetwork, pIfSender, INTNETTRUNKDIR_HOST, pSG);
+ fDst &= ~INTNETTRUNKDIR_HOST;
+ }
+
+ /*
+ * Edit the source address so that it it's the same as the host.
+ */
+ /* ASSUME frame from IntNetR0IfSend! */
+ AssertReturnVoid(pSG->cSegsUsed == 1);
+ AssertReturnVoid(pSG->cbTotal >= sizeof(RTNETETHERHDR));
+ AssertReturnVoid(pIfSender);
+ PRTNETETHERHDR pEthHdr = (PRTNETETHERHDR)pSG->aSegs[0].pv;
+
+ pEthHdr->SrcMac = pThis->MacAddr;
+
+ /*
+ * Deal with tags from the snooping phase.
+ */
+ if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
+ {
+ /*
+ * APR IPv4: replace hardware (MAC) addresses because these end up
+ * in ARP caches. So, if we don't the other machines will
+ * send the packets to the MAC address of the guest
+ * instead of the one of the host, which won't work on
+ * wireless of course...
+ */
+ PRTNETARPIPV4 pArp = (PRTNETARPIPV4)(pEthHdr + 1);
+ if (!memcmp(&pArp->ar_sha, &pIfSender->MacAddr, sizeof(RTMAC)))
+ {
+ Log6(("tw: ar_sha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_sha, &pThis->MacAddr));
+ pArp->ar_sha = pThis->MacAddr;
+ }
+ if (!memcmp(&pArp->ar_tha, &pIfSender->MacAddr, sizeof(RTMAC))) /* just in case... */
+ {
+ Log6(("tw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_tha, &pThis->MacAddr));
+ pArp->ar_tha = pThis->MacAddr;
+ }
+ }
+ else if (pEthHdr->EtherType == RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6))
+ {
+ intnetR0TrunkSharedMacEditIPv6FromIntNet(pThis, pIfSender, pEthHdr, pSG->cbTotal);
+ }
+ }
+
+ /*
+ * Send the frame, handling the GSO fallback.
+ *
+ * Note! The trunk implementation will re-check that the trunk is active
+ * before sending, so we don't have to duplicate that effort here.
+ */
+ STAM_REL_PROFILE_START(&pIfSender->pIntBuf->StatSend2, a);
+ int rc;
+ if ( pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID
+ || intnetR0TrunkIfCanHandleGsoFrame(pThis, pSG, fDst) )
+ rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, pSG, fDst);
+ else
+ rc = intnetR0TrunkIfSendGsoFallback(pThis, pIfSender, pSG, fDst);
+ STAM_REL_PROFILE_STOP(&pIfSender->pIntBuf->StatSend2, a);
+
+ /** @todo failure statistics? */
+ Log2(("intnetR0TrunkIfSend: %Rrc fDst=%d\n", rc, fDst)); NOREF(rc);
+}
+
+
+/**
+ * Detect broadcasts packaged as unicast and convert them back to broadcast.
+ *
+ * WiFi routers try to use ethernet unicast instead of broadcast or
+ * multicast when possible. Look inside the packet and fix up
+ * ethernet destination to be proper broadcast or multicast if
+ * necessary.
+ *
+ * @returns true broadcast (pEthHdr & pSG are modified), false if not.
+ * @param pNetwork The network the frame is being sent to.
+ * @param pSG Pointer to the gather list for the frame. The
+ * ethernet destination address is modified when
+ * returning true.
+ * @param pEthHdr Pointer to the ethernet header. The ethernet
+ * destination address is modified when returning true.
+ */
+static bool intnetR0NetworkSharedMacDetectAndFixBroadcast(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
+{
+ NOREF(pNetwork);
+
+ switch (pEthHdr->EtherType)
+ {
+ case RT_H2N_U16_C(RTNET_ETHERTYPE_ARP):
+ {
+ uint16_t ar_oper;
+ if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETARPHDR, ar_oper),
+ sizeof(ar_oper), &ar_oper))
+ return false;
+
+ if (ar_oper == RT_H2N_U16_C(RTNET_ARPOP_REQUEST))
+ {
+ /* change to broadcast */
+ pEthHdr->DstMac.au16[0] = 0xffff;
+ pEthHdr->DstMac.au16[1] = 0xffff;
+ pEthHdr->DstMac.au16[2] = 0xffff;
+ }
+ else
+ return false;
+ break;
+ }
+
+ case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4):
+ {
+ RTNETADDRIPV4 ip_dst;
+ if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETIPV4, ip_dst),
+ sizeof(ip_dst), &ip_dst))
+ return false;
+
+ if (ip_dst.u == 0xffffffff) /* 255.255.255.255? */
+ {
+ /* change to broadcast */
+ pEthHdr->DstMac.au16[0] = 0xffff;
+ pEthHdr->DstMac.au16[1] = 0xffff;
+ pEthHdr->DstMac.au16[2] = 0xffff;
+ }
+ else if ((ip_dst.au8[0] & 0xf0) == 0xe0) /* IPv4 multicast? */
+ {
+ /* change to 01:00:5e:xx:xx:xx multicast ... */
+ pEthHdr->DstMac.au8[0] = 0x01;
+ pEthHdr->DstMac.au8[1] = 0x00;
+ pEthHdr->DstMac.au8[2] = 0x5e;
+ /* ... with lower 23 bits from the multicast IP address */
+ pEthHdr->DstMac.au8[3] = ip_dst.au8[1] & 0x7f;
+ pEthHdr->DstMac.au8[4] = ip_dst.au8[2];
+ pEthHdr->DstMac.au8[5] = ip_dst.au8[3];
+ }
+ else
+ return false;
+ break;
+ }
+
+ case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6):
+ {
+ RTNETADDRIPV6 ip6_dst;
+ if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETIPV6, ip6_dst),
+ sizeof(ip6_dst), &ip6_dst))
+ return false;
+
+ if (ip6_dst.au8[0] == 0xff) /* IPv6 multicast? */
+ {
+ pEthHdr->DstMac.au16[0] = 0x3333;
+ pEthHdr->DstMac.au16[1] = ip6_dst.au16[6];
+ pEthHdr->DstMac.au16[2] = ip6_dst.au16[7];
+ }
+ else
+ return false;
+ break;
+ }
+
+ default:
+ return false;
+ }
+
+
+ /*
+ * Update ethernet destination in the segment.
+ */
+ intnetR0SgWritePart(pSG, RT_UOFFSETOF(RTNETETHERHDR, DstMac), sizeof(pEthHdr->DstMac), &pEthHdr->DstMac);
+
+ return true;
+}
+
+
+/**
+ * Snoops a multicast ICMPv6 ND DAD from the wire via the trunk connection.
+ *
+ * @param pNetwork The network the frame is being sent to.
+ * @param pSG Pointer to the gather list for the frame.
+ * @param pEthHdr Pointer to the ethernet header.
+ */
+static void intnetR0NetworkSnoopNAFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
+{
+ NOREF(pEthHdr);
+
+ /*
+ * Check the minimum size and get a linear copy of the thing to work on,
+ * using the temporary buffer if necessary.
+ */
+ if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
+ sizeof(RTNETNDP)))
+ return;
+ PRTNETIPV6 pIPv6 = (PRTNETIPV6)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
+ if ( pSG->cSegsUsed != 1
+ && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
+ sizeof(RTNETNDP))
+ {
+ Log6(("fw: Copying IPv6 pkt %u\n", sizeof(RTNETIPV6)));
+ if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETIPV6)
+ + sizeof(RTNETNDP), pNetwork->pbTmp))
+ return;
+ pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
+ pIPv6 = (PRTNETIPV6)pNetwork->pbTmp;
+ }
+
+ PCRTNETNDP pNd = (PCRTNETNDP) (pIPv6 + 1);
+
+ /*
+ * a multicast NS with :: as source address means a DAD packet.
+ * if it comes from the wire and we have the DAD'd address in our cache,
+ * flush the entry as the address is being acquired by someone else on
+ * the network.
+ */
+ if ( pIPv6->ip6_hlim == 0xff
+ && pIPv6->ip6_nxt == RTNETIPV6_PROT_ICMPV6
+ && pNd->Hdr.icmp6_type == RTNETIPV6_ICMP_TYPE_NS
+ && pNd->Hdr.icmp6_code == 0
+ && pIPv6->ip6_src.QWords.qw0 == 0
+ && pIPv6->ip6_src.QWords.qw1 == 0)
+ {
+
+ intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU) &pNd->target_address,
+ kIntNetAddrType_IPv6, sizeof(RTNETADDRIPV6), "tif/ip6");
+ }
+}
+/**
+ * Edits an ARP packet arriving from the wire via the trunk connection.
+ *
+ * @param pNetwork The network the frame is being sent to.
+ * @param pSG Pointer to the gather list for the frame.
+ * The flags and data content may be updated.
+ * @param pEthHdr Pointer to the ethernet header. This may also be
+ * updated if it's a unicast...
+ */
+static void intnetR0NetworkEditArpFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
+{
+ /*
+ * Check the minimum size and get a linear copy of the thing to work on,
+ * using the temporary buffer if necessary.
+ */
+ if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
+ return;
+ PRTNETARPIPV4 pArpIPv4 = (PRTNETARPIPV4)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
+ if ( pSG->cSegsUsed != 1
+ && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4))
+ {
+ Log6(("fw: Copying ARP pkt %u\n", sizeof(RTNETARPIPV4)));
+ if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETARPIPV4), pNetwork->pbTmp))
+ return;
+ pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
+ pArpIPv4 = (PRTNETARPIPV4)pNetwork->pbTmp;
+ }
+
+ /*
+ * Ignore packets which doesn't interest us or we perceive as malformed.
+ */
+ if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
+ || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
+ || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
+ || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
+ return;
+ uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
+ if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
+ && ar_oper != RTNET_ARPOP_REPLY))
+ {
+ Log6(("ar_oper=%#x\n", ar_oper));
+ return;
+ }
+
+ /* Tag it as ARP IPv4. */
+ pSG->fFlags |= INTNETSG_FLAGS_ARP_IPV4;
+
+ /*
+ * The thing we're interested in here is a reply to a query made by a guest
+ * since we modified the MAC in the initial request the guest made.
+ */
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+ RTMAC MacAddrTrunk;
+ if (pNetwork->MacTab.pTrunk)
+ MacAddrTrunk = pNetwork->MacTab.pTrunk->MacAddr;
+ else
+ memset(&MacAddrTrunk, 0, sizeof(MacAddrTrunk));
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ if ( ar_oper == RTNET_ARPOP_REPLY
+ && !memcmp(&pArpIPv4->ar_tha, &MacAddrTrunk, sizeof(RTMAC)))
+ {
+ PINTNETIF pIf = intnetR0NetworkAddrCacheLookupIf(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_tpa,
+ kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_tpa));
+ if (pIf)
+ {
+ Log6(("fw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArpIPv4->ar_tha, &pIf->MacAddr));
+ pArpIPv4->ar_tha = pIf->MacAddr;
+ if (!memcmp(&pEthHdr->DstMac, &MacAddrTrunk, sizeof(RTMAC)))
+ {
+ Log6(("fw: DstMac %.6Rhxs -> %.6Rhxs\n", &pEthHdr->DstMac, &pIf->MacAddr));
+ pEthHdr->DstMac = pIf->MacAddr;
+ if ((void *)pEthHdr != pSG->aSegs[0].pv)
+ intnetR0SgWritePart(pSG, RT_UOFFSETOF(RTNETETHERHDR, DstMac), sizeof(RTMAC), &pIf->MacAddr);
+ }
+ intnetR0BusyDecIf(pIf);
+
+ /* Write back the packet if we've been making changes to a buffered copy. */
+ if (pSG->fFlags & INTNETSG_FLAGS_PKT_CP_IN_TMP)
+ intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR), sizeof(PRTNETARPIPV4), pArpIPv4);
+ }
+ }
+}
+
+
+/**
+ * Detects and edits an DHCP packet arriving from the internal net.
+ *
+ * @param pNetwork The network the frame is being sent to.
+ * @param pSG Pointer to the gather list for the frame.
+ * The flags and data content may be updated.
+ * @param pEthHdr Pointer to the ethernet header. This may also be
+ * updated if it's a unicast...
+ */
+static void intnetR0NetworkEditDhcpFromIntNet(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
+{
+ NOREF(pEthHdr);
+
+ /*
+ * Check the minimum size and get a linear copy of the thing to work on,
+ * using the temporary buffer if necessary.
+ */
+ if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN))
+ return;
+ /*
+ * Get a pointer to a linear copy of the full packet, using the
+ * temporary buffer if necessary.
+ */
+ PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
+ uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
+ if (pSG->cSegsUsed > 1)
+ {
+ cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
+ Log6(("intnetR0NetworkEditDhcpFromIntNet: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
+ if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
+ return;
+ //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
+ pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
+ }
+
+ /*
+ * Validate the IP header and find the UDP packet.
+ */
+ if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fCheckSum*/))
+ {
+ Log6(("intnetR0NetworkEditDhcpFromIntNet: bad ip header\n"));
+ return;
+ }
+ size_t cbIpHdr = pIpHdr->ip_hl * 4;
+ if ( pIpHdr->ip_p != RTNETIPV4_PROT_UDP /* DHCP is UDP. */
+ || cbPacket < cbIpHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN) /* Min DHCP packet len */
+ return;
+
+ size_t cbUdpPkt = cbPacket - cbIpHdr;
+ PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr);
+ /* We are only interested in DHCP packets coming from client to server. */
+ if ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS
+ || RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC)
+ return;
+
+ /*
+ * Check if the DHCP message is valid and get the type.
+ */
+ if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
+ {
+ Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad UDP packet\n"));
+ return;
+ }
+ PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
+ uint8_t bMsgType;
+ if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &bMsgType))
+ {
+ Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad DHCP packet\n"));
+ return;
+ }
+
+ switch (bMsgType)
+ {
+ case RTNET_DHCP_MT_DISCOVER:
+ case RTNET_DHCP_MT_REQUEST:
+ /*
+ * Must set the broadcast flag or we won't catch the respons.
+ */
+ if (!(pDhcp->bp_flags & RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST)))
+ {
+ Log6(("intnetR0NetworkEditDhcpFromIntNet: Setting broadcast flag in DHCP %#x, previously %x\n",
+ bMsgType, pDhcp->bp_flags));
+
+ /* Patch flags */
+ uint16_t uFlags = pDhcp->bp_flags | RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
+ intnetR0SgWritePart(pSG, (uintptr_t)&pDhcp->bp_flags - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR), sizeof(uFlags), &uFlags);
+
+ /* Patch UDP checksum */
+ if (pUdpHdr->uh_sum != 0)
+ {
+ uint32_t uChecksum = (uint32_t)~pUdpHdr->uh_sum + RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
+ while (uChecksum >> 16)
+ uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
+ uChecksum = ~uChecksum;
+ intnetR0SgWritePart(pSG,
+ (uintptr_t)&pUdpHdr->uh_sum - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR),
+ sizeof(pUdpHdr->uh_sum),
+ &uChecksum);
+ }
+ }
+
+#ifdef RT_OS_DARWIN
+ /*
+ * Work around little endian checksum issue in mac os x 10.7.0 GM.
+ */
+ if ( pIpHdr->ip_tos
+ && (pNetwork->fFlags & INTNET_OPEN_FLAGS_WORKAROUND_1))
+ {
+ /* Patch it. */
+ uint8_t uTos = pIpHdr->ip_tos;
+ uint8_t uZero = 0;
+ intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + 1, sizeof(uZero), &uZero);
+
+ /* Patch the IP header checksum. */
+ uint32_t uChecksum = (uint32_t)~pIpHdr->ip_sum - (uTos << 8);
+ while (uChecksum >> 16)
+ uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
+ uChecksum = ~uChecksum;
+
+ Log(("intnetR0NetworkEditDhcpFromIntNet: cleared ip_tos (was %#04x); ip_sum=%#06x -> %#06x\n",
+ uTos, RT_BE2H_U16(pIpHdr->ip_sum), RT_BE2H_U16(uChecksum) ));
+ intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETIPV4, ip_sum),
+ sizeof(pIpHdr->ip_sum), &uChecksum);
+ }
+#endif
+ break;
+ }
+}
+
+
+/**
+ * Checks if the callers context is okay for sending to the specified
+ * destinations.
+ *
+ * @returns true if it's okay, false if it isn't.
+ * @param pNetwork The network.
+ * @param pIfSender The interface sending or NULL if it's the trunk.
+ * @param pDstTab The destination table.
+ */
+DECLINLINE(bool) intnetR0NetworkIsContextOk(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCINTNETDSTTAB pDstTab)
+{
+ NOREF(pNetwork);
+
+ /* Sending to the trunk is the problematic path. If the trunk is the
+ sender we won't be sending to it, so no problem..
+ Note! fTrunkDst may be set event if if the trunk is the sender. */
+ if (!pIfSender)
+ return true;
+
+ uint32_t const fTrunkDst = pDstTab->fTrunkDst;
+ if (!fTrunkDst)
+ return true;
+
+ /* ASSUMES: that the trunk won't change its report while we're checking. */
+ PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
+ if (pTrunk && (fTrunkDst & pTrunk->fNoPreemptDsts) == fTrunkDst)
+ return true;
+
+ /* ASSUMES: That a preemption test detects HM contexts. (Will work on
+ non-preemptive systems as well.) */
+ if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
+ return true;
+ return false;
+}
+
+
+/**
+ * Checks if the callers context is okay for doing a broadcast given the
+ * specified source.
+ *
+ * @returns true if it's okay, false if it isn't.
+ * @param pNetwork The network.
+ * @param fSrc The source of the packet. (0 (intnet),
+ * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
+ */
+DECLINLINE(bool) intnetR0NetworkIsContextOkForBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc)
+{
+ /* Sending to the trunk is the problematic path. If the trunk is the
+ sender we won't be sending to it, so no problem. */
+ if (fSrc)
+ return true;
+
+ /* ASSUMES: That a preemption test detects HM contexts. (Will work on
+ non-preemptive systems as well.) */
+ if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
+ return true;
+
+ /* PARANOIA: Grab the spinlock to make sure the trunk structure cannot be
+ freed while we're touching it. */
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+ PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
+
+ bool fRc = !pTrunk
+ || pTrunk->fNoPreemptDsts == (INTNETTRUNKDIR_HOST | INTNETTRUNKDIR_WIRE)
+ || ( (!pNetwork->MacTab.fHostActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_HOST) )
+ && (!pNetwork->MacTab.fWireActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_WIRE) ) );
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+ return fRc;
+}
+
+
+/**
+ * Check context, edit, snoop and switch a broadcast frame when sharing MAC
+ * address on the wire.
+ *
+ * The caller must hold at least one interface on the network busy to prevent it
+ * from destructing beath us.
+ *
+ * @param pNetwork The network the frame is being sent to.
+ * @param fSrc The source of the packet. (0 (intnet),
+ * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
+ * @param pIfSender The sender interface, NULL if trunk. Used to
+ * prevent sending an echo to the sender.
+ * @param pSG Pointer to the gather list.
+ * @param pEthHdr Pointer to the ethernet header.
+ * @param pDstTab The destination output table.
+ */
+static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchBroadcast(PINTNETNETWORK pNetwork,
+ uint32_t fSrc, PINTNETIF pIfSender,
+ PINTNETSG pSG, PRTNETETHERHDR pEthHdr,
+ PINTNETDSTTAB pDstTab)
+{
+ /*
+ * Before doing any work here, we need to figure out if we can handle it
+ * in the current context. The restrictions are solely on the trunk.
+ *
+ * Note! Since at least one interface is busy, there won't be any changes
+ * to the parameters here (unless the trunk changes its capability
+ * report, which it shouldn't).
+ */
+ if (!intnetR0NetworkIsContextOkForBroadcast(pNetwork, fSrc))
+ return INTNETSWDECISION_BAD_CONTEXT;
+
+ /*
+ * Check for ICMPv6 Neighbor Advertisements coming from the trunk.
+ * If we see an advertisement for an IP in our cache, we can safely remove
+ * it as the IP has probably moved.
+ */
+ if ( (fSrc & INTNETTRUNKDIR_WIRE)
+ && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV6
+ && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
+ intnetR0NetworkSnoopNAFromWire(pNetwork, pSG, pEthHdr);
+
+
+ /*
+ * Check for ARP packets from the wire since we'll have to make
+ * modification to them if we're sharing the MAC address with the host.
+ */
+ if ( (fSrc & INTNETTRUNKDIR_WIRE)
+ && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_ARP
+ && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
+ intnetR0NetworkEditArpFromWire(pNetwork, pSG, pEthHdr);
+
+ /*
+ * Check for DHCP packets from the internal net since we'll have to set
+ * broadcast flag in DHCP requests if we're sharing the MAC address with
+ * the host. GSO is not applicable to DHCP traffic.
+ */
+ if ( !fSrc
+ && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV4
+ && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
+ intnetR0NetworkEditDhcpFromIntNet(pNetwork, pSG, pEthHdr);
+
+ /*
+ * Snoop address info from packet originating from the trunk connection.
+ */
+ if (fSrc)
+ {
+#ifdef INTNET_WITH_DHCP_SNOOPING
+ uint16_t EtherType = RT_BE2H_U16(pEthHdr->EtherType);
+ if ( ( EtherType == RTNET_ETHERTYPE_IPV4 /* for DHCP */
+ && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
+ && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID )
+ || (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4) )
+ intnetR0TrunkIfSnoopAddr(pNetwork, pSG, EtherType);
+#else
+ if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
+ intnetR0TrunkIfSnoopArp(pNetwork, pSG);
+#endif
+ }
+
+ /*
+ * Create the broadcast destination table.
+ */
+ return intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
+}
+
+
+/**
+ * Check context, snoop and switch a unicast frame using the network layer
+ * address of the link layer one (when sharing MAC address on the wire).
+ *
+ * This function is only used for frames coming from the wire (trunk).
+ *
+ * @returns true if it's addressed to someone on the network, otherwise false.
+ * @param pNetwork The network the frame is being sent to.
+ * @param pSG Pointer to the gather list.
+ * @param pEthHdr Pointer to the ethernet header.
+ * @param pDstTab The destination output table.
+ */
+static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchUnicast(PINTNETNETWORK pNetwork, PINTNETSG pSG,
+ PRTNETETHERHDR pEthHdr, PINTNETDSTTAB pDstTab)
+{
+ /*
+ * Extract the network address from the packet.
+ */
+ RTNETADDRU Addr;
+ INTNETADDRTYPE enmAddrType;
+ uint8_t cbAddr;
+ switch (RT_BE2H_U16(pEthHdr->EtherType))
+ {
+ case RTNET_ETHERTYPE_IPV4:
+ if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETIPV4, ip_dst), sizeof(Addr.IPv4), &Addr)))
+ {
+ Log(("intnetshareduni: failed to read ip_dst! cbTotal=%#x\n", pSG->cbTotal));
+ return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
+ }
+ enmAddrType = kIntNetAddrType_IPv4;
+ cbAddr = sizeof(Addr.IPv4);
+ Log6(("intnetshareduni: IPv4 %d.%d.%d.%d\n", Addr.au8[0], Addr.au8[1], Addr.au8[2], Addr.au8[3]));
+ break;
+
+ case RTNET_ETHERTYPE_IPV6:
+ if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETIPV6, ip6_dst), sizeof(Addr.IPv6), &Addr)))
+ {
+ Log(("intnetshareduni: failed to read ip6_dst! cbTotal=%#x\n", pSG->cbTotal));
+ return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
+ }
+ enmAddrType = kIntNetAddrType_IPv6;
+ cbAddr = sizeof(Addr.IPv6);
+ break;
+#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
+ case RTNET_ETHERTYPE_IPX_1:
+ case RTNET_ETHERTYPE_IPX_2:
+ case RTNET_ETHERTYPE_IPX_3:
+ if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPX, ipx_dstnet), sizeof(Addr.IPX), &Addr)))
+ {
+ Log(("intnetshareduni: failed to read ipx_dstnet! cbTotal=%#x\n", pSG->cbTotal));
+ return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
+ }
+ enmAddrType = kIntNetAddrType_IPX;
+ cbAddr = sizeof(Addr.IPX);
+ break;
+#endif
+
+ /*
+ * Treat ARP as broadcast (it shouldn't end up here normally,
+ * so it goes last in the switch).
+ */
+ case RTNET_ETHERTYPE_ARP:
+ Log6(("intnetshareduni: ARP\n"));
+ /** @todo revisit this broadcasting of unicast ARP frames! */
+ return intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, INTNETTRUNKDIR_WIRE, NULL, pSG, pEthHdr, pDstTab);
+
+ /*
+ * Unknown packets are sent to the trunk and any promiscuous interfaces.
+ */
+ default:
+ {
+ Log6(("intnetshareduni: unknown ethertype=%#x\n", RT_BE2H_U16(pEthHdr->EtherType)));
+ return intnetR0NetworkSwitchTrunkAndPromisc(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
+ }
+ }
+
+ /*
+ * Do level-3 switching.
+ */
+ INTNETSWDECISION enmSwDecision = intnetR0NetworkSwitchLevel3(pNetwork, &pEthHdr->DstMac,
+ enmAddrType, &Addr, cbAddr,
+ INTNETTRUNKDIR_WIRE, pDstTab);
+
+#ifdef INTNET_WITH_DHCP_SNOOPING
+ /*
+ * Perform DHCP snooping. GSO is not applicable to DHCP traffic
+ */
+ if ( enmAddrType == kIntNetAddrType_IPv4
+ && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
+ && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
+ intnetR0TrunkIfSnoopAddr(pNetwork, pSG, RT_BE2H_U16(pEthHdr->EtherType));
+#endif /* INTNET_WITH_DHCP_SNOOPING */
+
+ return enmSwDecision;
+}
+
+
+/**
+ * Release all the interfaces in the destination table when we realize that
+ * we're in a context where we cannot get the job done.
+ *
+ * @param pNetwork The network.
+ * @param pDstTab The destination table.
+ */
+static void intnetR0NetworkReleaseDstTab(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab)
+{
+ /* The trunk interface. */
+ if (pDstTab->fTrunkDst)
+ {
+ PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
+ if (pTrunk)
+ intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
+ pDstTab->pTrunk = NULL;
+ pDstTab->fTrunkDst = 0;
+ }
+
+ /* Regular interfaces. */
+ uint32_t iIf = pDstTab->cIfs;
+ while (iIf-- > 0)
+ {
+ PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
+ intnetR0BusyDecIf(pIf);
+ pDstTab->aIfs[iIf].pIf = NULL;
+ }
+ pDstTab->cIfs = 0;
+}
+
+
+/**
+ * Deliver the frame to the interfaces specified in the destination table.
+ *
+ * @param pNetwork The network.
+ * @param pDstTab The destination table.
+ * @param pSG The frame to send.
+ * @param pIfSender The sender interface. NULL if it originated via
+ * the trunk.
+ */
+static void intnetR0NetworkDeliver(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab, PINTNETSG pSG, PINTNETIF pIfSender)
+{
+ /*
+ * Do the interfaces first before sending it to the wire and risk having to
+ * modify it.
+ */
+ uint32_t iIf = pDstTab->cIfs;
+ while (iIf-- > 0)
+ {
+ PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
+ intnetR0IfSend(pIf, pIfSender, pSG,
+ pDstTab->aIfs[iIf].fReplaceDstMac ? &pIf->MacAddr: NULL);
+ intnetR0BusyDecIf(pIf);
+ pDstTab->aIfs[iIf].pIf = NULL;
+ }
+ pDstTab->cIfs = 0;
+
+ /*
+ * Send to the trunk.
+ *
+ * Note! The switching functions will include the trunk even when the frame
+ * source is the trunk. This is because we need it to figure out
+ * whether the other half of the trunk should see the frame or not
+ * and let the caller know.
+ *
+ * So, we'll ignore trunk sends here if the frame origin is
+ * INTNETTRUNKSWPORT::pfnRecv.
+ */
+ if (pDstTab->fTrunkDst)
+ {
+ PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
+ if (pTrunk)
+ {
+ if (pIfSender)
+ intnetR0TrunkIfSend(pTrunk, pNetwork, pIfSender, pDstTab->fTrunkDst, pSG);
+ intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
+ }
+ pDstTab->pTrunk = NULL;
+ pDstTab->fTrunkDst = 0;
+ }
+}
+
+
+/**
+ * Sends a frame.
+ *
+ * This function will distribute the frame to the interfaces it is addressed to.
+ * It will also update the MAC address of the sender.
+ *
+ * The caller must own the network mutex.
+ *
+ * @returns The switching decision.
+ * @param pNetwork The network the frame is being sent to.
+ * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
+ * @param fSrc The source flags. This 0 if it's not from the trunk.
+ * @param pSG Pointer to the gather list.
+ * @param pDstTab The destination table to use.
+ */
+static INTNETSWDECISION intnetR0NetworkSend(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, uint32_t fSrc,
+ PINTNETSG pSG, PINTNETDSTTAB pDstTab)
+{
+ /*
+ * Assert reality.
+ */
+ AssertPtr(pNetwork);
+ AssertPtrNull(pIfSender);
+ Assert(pIfSender ? fSrc == 0 : fSrc != 0);
+ Assert(!pIfSender || pNetwork == pIfSender->pNetwork);
+ AssertPtr(pSG);
+ Assert(pSG->cSegsUsed >= 1);
+ Assert(pSG->cSegsUsed <= pSG->cSegsAlloc);
+ if (pSG->cbTotal < sizeof(RTNETETHERHDR))
+ return INTNETSWDECISION_INVALID;
+
+ /*
+ * Get the ethernet header (might theoretically involve multiple segments).
+ */
+ RTNETETHERHDR EthHdr;
+ if (pSG->aSegs[0].cb >= sizeof(EthHdr))
+ EthHdr = *(PCRTNETETHERHDR)pSG->aSegs[0].pv;
+ else if (!intnetR0SgReadPart(pSG, 0, sizeof(EthHdr), &EthHdr))
+ return INTNETSWDECISION_INVALID;
+ if ( (EthHdr.DstMac.au8[0] == 0x08 && EthHdr.DstMac.au8[1] == 0x00 && EthHdr.DstMac.au8[2] == 0x27)
+ || (EthHdr.SrcMac.au8[0] == 0x08 && EthHdr.SrcMac.au8[1] == 0x00 && EthHdr.SrcMac.au8[2] == 0x27)
+ || (EthHdr.DstMac.au8[0] == 0x00 && EthHdr.DstMac.au8[1] == 0x16 && EthHdr.DstMac.au8[2] == 0xcb)
+ || (EthHdr.SrcMac.au8[0] == 0x00 && EthHdr.SrcMac.au8[1] == 0x16 && EthHdr.SrcMac.au8[2] == 0xcb)
+ || EthHdr.DstMac.au8[0] == 0xff
+ || EthHdr.SrcMac.au8[0] == 0xff)
+ Log2(("D=%.6Rhxs S=%.6Rhxs T=%04x f=%x z=%x\n",
+ &EthHdr.DstMac, &EthHdr.SrcMac, RT_BE2H_U16(EthHdr.EtherType), fSrc, pSG->cbTotal));
+
+ /*
+ * Learn the MAC address of the sender. No re-learning as the interface
+ * user will normally tell us the right MAC address.
+ *
+ * Note! We don't notify the trunk about these mainly because of the
+ * problematic contexts we might be called in.
+ */
+ if (RT_UNLIKELY( pIfSender
+ && !pIfSender->fMacSet
+ && memcmp(&EthHdr.SrcMac, &pIfSender->MacAddr, sizeof(pIfSender->MacAddr))
+ && !intnetR0IsMacAddrMulticast(&EthHdr.SrcMac)
+ ))
+ {
+ Log2(("IF MAC: %.6Rhxs -> %.6Rhxs\n", &pIfSender->MacAddr, &EthHdr.SrcMac));
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ PINTNETMACTABENTRY pIfEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIfSender);
+ if (pIfEntry)
+ pIfEntry->MacAddr = EthHdr.SrcMac;
+ pIfSender->MacAddr = EthHdr.SrcMac;
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ }
+
+ /*
+ * Deal with MAC address sharing as that may required editing of the
+ * packets before we dispatch them anywhere.
+ */
+ INTNETSWDECISION enmSwDecision;
+ if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
+ {
+ if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
+ enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
+ else if (fSrc & INTNETTRUNKDIR_WIRE)
+ {
+ if (intnetR0NetworkSharedMacDetectAndFixBroadcast(pNetwork, pSG, &EthHdr))
+ enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
+ else
+ enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchUnicast(pNetwork, pSG, &EthHdr, pDstTab);
+ }
+ else
+ enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
+ }
+ else if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
+ enmSwDecision = intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
+ else
+ enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
+
+ /*
+ * Deliver to the destinations if we can.
+ */
+ if (enmSwDecision != INTNETSWDECISION_BAD_CONTEXT)
+ {
+ if (intnetR0NetworkIsContextOk(pNetwork, pIfSender, pDstTab))
+ intnetR0NetworkDeliver(pNetwork, pDstTab, pSG, pIfSender);
+ else
+ {
+ intnetR0NetworkReleaseDstTab(pNetwork, pDstTab);
+ enmSwDecision = INTNETSWDECISION_BAD_CONTEXT;
+ }
+ }
+
+ return enmSwDecision;
+}
+
+
+/**
+ * Sends one or more frames.
+ *
+ * The function will first the frame which is passed as the optional arguments
+ * pvFrame and cbFrame. These are optional since it also possible to chain
+ * together one or more frames in the send buffer which the function will
+ * process after considering it's arguments.
+ *
+ * The caller is responsible for making sure that there are no concurrent calls
+ * to this method (with the same handle).
+ *
+ * @returns VBox status code.
+ * @param hIf The interface handle.
+ * @param pSession The caller's session.
+ */
+INTNETR0DECL(int) IntNetR0IfSend(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
+{
+ Log5(("IntNetR0IfSend: hIf=%RX32\n", hIf));
+
+ /*
+ * Validate input and translate the handle.
+ */
+ PINTNET pIntNet = g_pIntNet;
+ AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
+ AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
+
+ PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
+ if (!pIf)
+ return VERR_INVALID_HANDLE;
+ STAM_REL_PROFILE_START(&pIf->pIntBuf->StatSend1, a);
+
+ /*
+ * Make sure we've got a network.
+ */
+ int rc = VINF_SUCCESS;
+ intnetR0BusyIncIf(pIf);
+ PINTNETNETWORK pNetwork = pIf->pNetwork;
+ if (RT_LIKELY(pNetwork))
+ {
+ /*
+ * Grab the destination table.
+ */
+ PINTNETDSTTAB pDstTab = ASMAtomicXchgPtrT(&pIf->pDstTab, NULL, PINTNETDSTTAB);
+ if (RT_LIKELY(pDstTab))
+ {
+ /*
+ * Process the send buffer.
+ */
+ INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
+ INTNETSG Sg; /** @todo this will have to be changed if we're going to use async sending
+ * with buffer sharing for some OS or service. Darwin copies everything so
+ * I won't bother allocating and managing SGs right now. Sorry. */
+ PINTNETHDR pHdr;
+ while ((pHdr = IntNetRingGetNextFrameToRead(&pIf->pIntBuf->Send)) != NULL)
+ {
+ uint8_t const u8Type = pHdr->u8Type;
+ if (u8Type == INTNETHDR_TYPE_FRAME)
+ {
+ /* Send regular frame. */
+ void *pvCurFrame = IntNetHdrGetFramePtr(pHdr, pIf->pIntBuf);
+ IntNetSgInitTemp(&Sg, pvCurFrame, pHdr->cbFrame);
+ if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
+ intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, pHdr->cbFrame, false /*fGso*/, (uint16_t *)&Sg.fFlags);
+ enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
+ }
+ else if (u8Type == INTNETHDR_TYPE_GSO)
+ {
+ /* Send GSO frame if sane. */
+ PPDMNETWORKGSO pGso = IntNetHdrGetGsoContext(pHdr, pIf->pIntBuf);
+ uint32_t cbFrame = pHdr->cbFrame - sizeof(*pGso);
+ if (RT_LIKELY(PDMNetGsoIsValid(pGso, pHdr->cbFrame, cbFrame)))
+ {
+ void *pvCurFrame = pGso + 1;
+ IntNetSgInitTempGso(&Sg, pvCurFrame, cbFrame, pGso);
+ if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
+ intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, cbFrame, true /*fGso*/, (uint16_t *)&Sg.fFlags);
+ enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
+ }
+ else
+ {
+ STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
+ enmSwDecision = INTNETSWDECISION_DROP;
+ }
+ }
+ /* Unless it's a padding frame, we're getting babble from the producer. */
+ else
+ {
+ if (u8Type != INTNETHDR_TYPE_PADDING)
+ STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
+ enmSwDecision = INTNETSWDECISION_DROP;
+ }
+ if (enmSwDecision == INTNETSWDECISION_BAD_CONTEXT)
+ {
+ rc = VERR_TRY_AGAIN;
+ break;
+ }
+
+ /* Skip to the next frame. */
+ IntNetRingSkipFrame(&pIf->pIntBuf->Send);
+ }
+
+ /*
+ * Put back the destination table.
+ */
+ Assert(!pIf->pDstTab);
+ ASMAtomicWritePtr(&pIf->pDstTab, pDstTab);
+ }
+ else
+ rc = VERR_INTERNAL_ERROR_4;
+ }
+ else
+ rc = VERR_INTERNAL_ERROR_3;
+
+ /*
+ * Release the interface.
+ */
+ intnetR0BusyDecIf(pIf);
+ STAM_REL_PROFILE_STOP(&pIf->pIntBuf->StatSend1, a);
+ intnetR0IfRelease(pIf, pSession);
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for IntNetR0IfSend.
+ *
+ * @returns see IntNetR0IfSend.
+ * @param pSession The caller's session.
+ * @param pReq The request packet.
+ */
+INTNETR0DECL(int) IntNetR0IfSendReq(PSUPDRVSESSION pSession, PINTNETIFSENDREQ pReq)
+{
+ if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
+ return VERR_INVALID_PARAMETER;
+ return IntNetR0IfSend(pReq->hIf, pSession);
+}
+
+
+/**
+ * Maps the default buffer into ring 3.
+ *
+ * @returns VBox status code.
+ * @param hIf The interface handle.
+ * @param pSession The caller's session.
+ * @param ppRing3Buf Where to store the address of the ring-3 mapping
+ * (optional).
+ * @param ppRing0Buf Where to store the address of the ring-0 mapping
+ * (optional).
+ */
+INTNETR0DECL(int) IntNetR0IfGetBufferPtrs(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession,
+ R3PTRTYPE(PINTNETBUF) *ppRing3Buf, R0PTRTYPE(PINTNETBUF) *ppRing0Buf)
+{
+ LogFlow(("IntNetR0IfGetBufferPtrs: hIf=%RX32 ppRing3Buf=%p ppRing0Buf=%p\n", hIf, ppRing3Buf, ppRing0Buf));
+
+ /*
+ * Validate input.
+ */
+ PINTNET pIntNet = g_pIntNet;
+ AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
+ AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
+
+ AssertPtrNullReturn(ppRing3Buf, VERR_INVALID_PARAMETER);
+ AssertPtrNullReturn(ppRing0Buf, VERR_INVALID_PARAMETER);
+ if (ppRing3Buf)
+ *ppRing3Buf = 0;
+ if (ppRing0Buf)
+ *ppRing0Buf = 0;
+
+ PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
+ if (!pIf)
+ return VERR_INVALID_HANDLE;
+
+ /*
+ * ASSUMES that only the process that created an interface can use it.
+ * ASSUMES that we created the ring-3 mapping when selecting or
+ * allocating the buffer.
+ */
+ int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
+ if (RT_SUCCESS(rc))
+ {
+ if (ppRing3Buf)
+ *ppRing3Buf = pIf->pIntBufR3;
+ if (ppRing0Buf)
+ *ppRing0Buf = (R0PTRTYPE(PINTNETBUF))pIf->pIntBuf; /* tstIntNetR0 mess */
+
+ rc = RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
+ }
+
+ intnetR0IfRelease(pIf, pSession);
+ LogFlow(("IntNetR0IfGetBufferPtrs: returns %Rrc *ppRing3Buf=%p *ppRing0Buf=%p\n",
+ rc, ppRing3Buf ? *ppRing3Buf : NIL_RTR3PTR, ppRing0Buf ? *ppRing0Buf : NIL_RTR0PTR));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for IntNetR0IfGetBufferPtrs.
+ *
+ * @returns see IntNetR0IfGetRing3Buffer.
+ * @param pSession The caller's session.
+ * @param pReq The request packet.
+ */
+INTNETR0DECL(int) IntNetR0IfGetBufferPtrsReq(PSUPDRVSESSION pSession, PINTNETIFGETBUFFERPTRSREQ pReq)
+{
+ if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
+ return VERR_INVALID_PARAMETER;
+ return IntNetR0IfGetBufferPtrs(pReq->hIf, pSession, &pReq->pRing3Buf, &pReq->pRing0Buf);
+}
+
+
+#if 0
+/**
+ * Gets the physical addresses of the default interface buffer.
+ *
+ * @returns VBox status code.
+ * @param hIF The interface handle.
+ * @param paPages Where to store the addresses. (The reserved fields will be set to zero.)
+ * @param cPages
+ */
+INTNETR0DECL(int) IntNetR0IfGetPhysBuffer(INTNETIFHANDLE hIf, PSUPPAGE paPages, unsigned cPages)
+{
+ /*
+ * Validate input.
+ */
+ PINTNET pIntNet = g_pIntNet;
+ AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
+ AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
+
+ AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
+ AssertPtrReturn((uint8_t *)&paPages[cPages] - 1, VERR_INVALID_PARAMETER);
+ PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
+ if (!pIf)
+ return VERR_INVALID_HANDLE;
+
+ /*
+ * Grab the lock and get the data.
+ * ASSUMES that the handle isn't closed while we're here.
+ */
+ int rc = RTSemFastMutexRequest(pIf->pNetwork->FastMutex);
+ if (RT_SUCCESS(rc))
+ {
+ /** @todo make a SUPR0 api for obtaining the array. SUPR0/IPRT is keeping track of everything, there
+ * is no need for any extra bookkeeping here.. */
+
+ rc = RTSemFastMutexRelease(pIf->pNetwork->FastMutex);
+ }
+ intnetR0IfRelease(pIf, pSession);
+ return VERR_NOT_IMPLEMENTED;
+}
+#endif
+
+
+/**
+ * Sets the promiscuous mode property of an interface.
+ *
+ * @returns VBox status code.
+ * @param hIf The interface handle.
+ * @param pSession The caller's session.
+ * @param fPromiscuous Set if the interface should be in promiscuous mode, clear if not.
+ */
+INTNETR0DECL(int) IntNetR0IfSetPromiscuousMode(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fPromiscuous)
+{
+ LogFlow(("IntNetR0IfSetPromiscuousMode: hIf=%RX32 fPromiscuous=%d\n", hIf, fPromiscuous));
+
+ /*
+ * Validate & translate input.
+ */
+ PINTNET pIntNet = g_pIntNet;
+ AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
+ AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
+
+ PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
+ if (!pIf)
+ {
+ Log(("IntNetR0IfSetPromiscuousMode: returns VERR_INVALID_HANDLE\n"));
+ return VERR_INVALID_HANDLE;
+ }
+
+ /*
+ * Get the network, take the address spinlock, and make the change.
+ * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
+ */
+ int rc = VINF_SUCCESS;
+ intnetR0BusyIncIf(pIf);
+ PINTNETNETWORK pNetwork = pIf->pNetwork;
+ if (pNetwork)
+ {
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ if (pIf->fPromiscuousReal != fPromiscuous)
+ {
+ const bool fPromiscuousEff = fPromiscuous
+ && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW)
+ && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS);
+ Log(("IntNetR0IfSetPromiscuousMode: hIf=%RX32: Changed from %d -> %d (%d)\n",
+ hIf, !fPromiscuous, !!fPromiscuous, fPromiscuousEff));
+
+ pIf->fPromiscuousReal = fPromiscuous;
+
+ PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
+ if (RT_LIKELY(pEntry))
+ {
+ if (pEntry->fPromiscuousEff)
+ {
+ pNetwork->MacTab.cPromiscuousEntries--;
+ if (!pEntry->fPromiscuousSeeTrunk)
+ pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
+ Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
+ Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
+ }
+
+ pEntry->fPromiscuousEff = fPromiscuousEff;
+ pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
+ && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
+
+ if (pEntry->fPromiscuousEff)
+ {
+ pNetwork->MacTab.cPromiscuousEntries++;
+ if (!pEntry->fPromiscuousSeeTrunk)
+ pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
+ }
+ Assert(pNetwork->MacTab.cPromiscuousEntries <= pNetwork->MacTab.cEntries);
+ Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries <= pNetwork->MacTab.cEntries);
+ }
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ }
+ else
+ rc = VERR_WRONG_ORDER;
+
+ intnetR0BusyDecIf(pIf);
+ intnetR0IfRelease(pIf, pSession);
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for IntNetR0IfSetPromiscuousMode.
+ *
+ * @returns see IntNetR0IfSetPromiscuousMode.
+ * @param pSession The caller's session.
+ * @param pReq The request packet.
+ */
+INTNETR0DECL(int) IntNetR0IfSetPromiscuousModeReq(PSUPDRVSESSION pSession, PINTNETIFSETPROMISCUOUSMODEREQ pReq)
+{
+ if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
+ return VERR_INVALID_PARAMETER;
+ return IntNetR0IfSetPromiscuousMode(pReq->hIf, pSession, pReq->fPromiscuous);
+}
+
+
+/**
+ * Sets the MAC address of an interface.
+ *
+ * @returns VBox status code.
+ * @param hIf The interface handle.
+ * @param pSession The caller's session.
+ * @param pMAC The new MAC address.
+ */
+INTNETR0DECL(int) IntNetR0IfSetMacAddress(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, PCRTMAC pMac)
+{
+ LogFlow(("IntNetR0IfSetMacAddress: hIf=%RX32 pMac=%p:{%.6Rhxs}\n", hIf, pMac, pMac));
+
+ /*
+ * Validate & translate input.
+ */
+ PINTNET pIntNet = g_pIntNet;
+ AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
+ AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
+
+ AssertPtrReturn(pMac, VERR_INVALID_PARAMETER);
+ PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
+ if (!pIf)
+ {
+ Log(("IntNetR0IfSetMacAddress: returns VERR_INVALID_HANDLE\n"));
+ return VERR_INVALID_HANDLE;
+ }
+
+ /*
+ * Get the network, take the address spinlock, and make the change.
+ * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
+ */
+ int rc = VINF_SUCCESS;
+ intnetR0BusyIncIf(pIf);
+ PINTNETNETWORK pNetwork = pIf->pNetwork;
+ if (pNetwork)
+ {
+ PINTNETTRUNKIF pTrunk = NULL;
+
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ if (memcmp(&pIf->MacAddr, pMac, sizeof(pIf->MacAddr)))
+ {
+ Log(("IntNetR0IfSetMacAddress: hIf=%RX32: Changed from %.6Rhxs -> %.6Rhxs\n",
+ hIf, &pIf->MacAddr, pMac));
+
+ /* Update the two copies. */
+ PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
+ if (RT_LIKELY(pEntry))
+ pEntry->MacAddr = *pMac;
+ pIf->MacAddr = *pMac;
+ pIf->fMacSet = true;
+
+ /* Grab a busy reference to the trunk so we release the lock before notifying it. */
+ pTrunk = pNetwork->MacTab.pTrunk;
+ if (pTrunk)
+ intnetR0BusyIncTrunk(pTrunk);
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+ if (pTrunk)
+ {
+ Log(("IntNetR0IfSetMacAddress: pfnNotifyMacAddress hIf=%RX32\n", hIf));
+ PINTNETTRUNKIFPORT pIfPort = pTrunk->pIfPort;
+ if (pIfPort)
+ pIfPort->pfnNotifyMacAddress(pIfPort, pIf->pvIfData, pMac);
+ intnetR0BusyDecTrunk(pTrunk);
+ }
+ }
+ else
+ rc = VERR_WRONG_ORDER;
+
+ intnetR0BusyDecIf(pIf);
+ intnetR0IfRelease(pIf, pSession);
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for IntNetR0IfSetMacAddress.
+ *
+ * @returns see IntNetR0IfSetMacAddress.
+ * @param pSession The caller's session.
+ * @param pReq The request packet.
+ */
+INTNETR0DECL(int) IntNetR0IfSetMacAddressReq(PSUPDRVSESSION pSession, PINTNETIFSETMACADDRESSREQ pReq)
+{
+ if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
+ return VERR_INVALID_PARAMETER;
+ return IntNetR0IfSetMacAddress(pReq->hIf, pSession, &pReq->Mac);
+}
+
+
+/**
+ * Worker for intnetR0IfSetActive and intnetR0IfDestruct.
+ *
+ * This function will update the active interface count on the network and
+ * activate or deactivate the trunk connection if necessary.
+ *
+ * The call must own the giant lock (we cannot take it here).
+ *
+ * @returns VBox status code.
+ * @param pNetwork The network.
+ * @param fIf The interface.
+ * @param fActive What to do.
+ */
+static int intnetR0NetworkSetIfActive(PINTNETNETWORK pNetwork, PINTNETIF pIf, bool fActive)
+{
+ /* quick sanity check */
+ AssertPtr(pNetwork);
+ AssertPtr(pIf);
+
+ /*
+ * The address spinlock of the network protects the variables, while the
+ * big lock protects the calling of pfnSetState. Grab both lock at once
+ * to save us the extra hassle.
+ */
+ PINTNETTRUNKIF pTrunk = NULL;
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ /*
+ * Do the update.
+ */
+ if (pIf->fActive != fActive)
+ {
+ PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
+ if (RT_LIKELY(pEntry))
+ {
+ pEntry->fActive = fActive;
+ pIf->fActive = fActive;
+
+ if (fActive)
+ {
+ pNetwork->cActiveIFs++;
+ if (pNetwork->cActiveIFs == 1)
+ {
+ pTrunk = pNetwork->MacTab.pTrunk;
+ if (pTrunk)
+ {
+ pNetwork->MacTab.fHostActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
+ pNetwork->MacTab.fWireActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED);
+ }
+ }
+ }
+ else
+ {
+ pNetwork->cActiveIFs--;
+ if (pNetwork->cActiveIFs == 0)
+ {
+ pTrunk = pNetwork->MacTab.pTrunk;
+ pNetwork->MacTab.fHostActive = false;
+ pNetwork->MacTab.fWireActive = false;
+ }
+ }
+ }
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+ /*
+ * Tell the trunk if necessary.
+ * The wait for !busy is for the Solaris streams trunk driver (mostly).
+ */
+ if (pTrunk && pTrunk->pIfPort)
+ {
+ if (!fActive)
+ intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
+
+ pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, fActive ? INTNETTRUNKIFSTATE_ACTIVE : INTNETTRUNKIFSTATE_INACTIVE);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets the active property of an interface.
+ *
+ * @returns VBox status code.
+ * @param hIf The interface handle.
+ * @param pSession The caller's session.
+ * @param fActive The new state.
+ */
+INTNETR0DECL(int) IntNetR0IfSetActive(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fActive)
+{
+ LogFlow(("IntNetR0IfSetActive: hIf=%RX32 fActive=%RTbool\n", hIf, fActive));
+
+ /*
+ * Validate & translate input.
+ */
+ PINTNET pIntNet = g_pIntNet;
+ AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
+ AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
+
+ PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
+ if (!pIf)
+ {
+ Log(("IntNetR0IfSetActive: returns VERR_INVALID_HANDLE\n"));
+ return VERR_INVALID_HANDLE;
+ }
+
+ /*
+ * Hand it to the network since it might involve the trunk and things are
+ * tricky there wrt to locking order.
+ *
+ * 1. We take the giant lock here. This makes sure nobody is re-enabling
+ * the network while we're pausing it and vice versa. This also enables
+ * us to wait for the network to become idle before telling the trunk.
+ * (Important on Solaris.)
+ *
+ * 2. For paranoid reasons, we grab a busy reference to the calling
+ * interface. This is totally unnecessary but should hurt (when done
+ * after grabbing the giant lock).
+ */
+ int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
+ if (RT_SUCCESS(rc))
+ {
+ intnetR0BusyIncIf(pIf);
+
+ PINTNETNETWORK pNetwork = pIf->pNetwork;
+ if (pNetwork)
+ rc = intnetR0NetworkSetIfActive(pNetwork, pIf, fActive);
+ else
+ rc = VERR_WRONG_ORDER;
+
+ intnetR0BusyDecIf(pIf);
+ RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
+ }
+
+ intnetR0IfRelease(pIf, pSession);
+ LogFlow(("IntNetR0IfSetActive: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for IntNetR0IfSetActive.
+ *
+ * @returns see IntNetR0IfSetActive.
+ * @param pIntNet The internal networking instance.
+ * @param pSession The caller's session.
+ * @param pReq The request packet.
+ */
+INTNETR0DECL(int) IntNetR0IfSetActiveReq(PSUPDRVSESSION pSession, PINTNETIFSETACTIVEREQ pReq)
+{
+ if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
+ return VERR_INVALID_PARAMETER;
+ return IntNetR0IfSetActive(pReq->hIf, pSession, pReq->fActive);
+}
+
+
+/**
+ * Wait for the interface to get signaled.
+ * The interface will be signaled when is put into the receive buffer.
+ *
+ * @returns VBox status code.
+ * @param hIf The interface handle.
+ * @param pSession The caller's session.
+ * @param cMillies Number of milliseconds to wait. RT_INDEFINITE_WAIT should be
+ * used if indefinite wait is desired.
+ */
+INTNETR0DECL(int) IntNetR0IfWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, uint32_t cMillies)
+{
+ Log4(("IntNetR0IfWait: hIf=%RX32 cMillies=%u\n", hIf, cMillies));
+
+ /*
+ * Get and validate essential handles.
+ */
+ PINTNET pIntNet = g_pIntNet;
+ AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
+ AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
+
+ PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
+ if (!pIf)
+ {
+ Log(("IntNetR0IfWait: returns VERR_INVALID_HANDLE\n"));
+ return VERR_INVALID_HANDLE;
+ }
+
+#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) && defined(IN_RING3)
+ AssertReleaseFailed(); /* Should never be called. */
+ RT_NOREF(cMillies);
+ return VERR_NOT_SUPPORTED;
+#else
+ const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
+ const bool fNoMoreWaits = ASMAtomicUoReadBool(&pIf->fNoMoreWaits);
+ RTNATIVETHREAD hDtorThrd;
+ ASMAtomicReadHandle(&pIf->hDestructorThread, &hDtorThrd);
+ if (hDtorThrd != NIL_RTNATIVETHREAD)
+ {
+ /* See IntNetR0IfAbortWait for an explanation of hDestructorThread. */
+ Log(("IntNetR0IfWait: returns VERR_SEM_DESTROYED\n"));
+ return VERR_SEM_DESTROYED;
+ }
+
+ /* Check whether further waits have been barred by IntNetR0IfAbortWait. */
+ int rc;
+ if ( !fNoMoreWaits
+ && hRecvEvent != NIL_RTSEMEVENT)
+ {
+ /*
+ * It is tempting to check if there is data to be read here,
+ * but the problem with such an approach is that it will cause
+ * one unnecessary supervisor->user->supervisor trip. There is
+ * already a slight risk for such, so no need to increase it.
+ */
+
+ /*
+ * Increment the number of waiters before starting the wait.
+ * Upon wakeup we must assert reality, checking that we're not
+ * already destroyed or in the process of being destroyed. This
+ * code must be aligned with the waiting code in intnetR0IfDestruct.
+ */
+ ASMAtomicIncU32(&pIf->cSleepers);
+ rc = RTSemEventWaitNoResume(hRecvEvent, cMillies);
+ if (pIf->hRecvEvent == hRecvEvent)
+ {
+ ASMAtomicDecU32(&pIf->cSleepers);
+ ASMAtomicReadHandle(&pIf->hDestructorThread, &hDtorThrd);
+ if (hDtorThrd == NIL_RTNATIVETHREAD)
+ {
+ if (intnetR0IfRelease(pIf, pSession))
+ rc = VERR_SEM_DESTROYED;
+ }
+ else
+ rc = VERR_SEM_DESTROYED;
+ }
+ else
+ rc = VERR_SEM_DESTROYED;
+ }
+ else
+ {
+ rc = VERR_SEM_DESTROYED;
+ intnetR0IfRelease(pIf, pSession);
+ }
+
+ Log4(("IntNetR0IfWait: returns %Rrc\n", rc));
+ return rc;
+#endif
+}
+
+
+/**
+ * VMMR0 request wrapper for IntNetR0IfWait.
+ *
+ * @returns see IntNetR0IfWait.
+ * @param pSession The caller's session.
+ * @param pReq The request packet.
+ */
+INTNETR0DECL(int) IntNetR0IfWaitReq(PSUPDRVSESSION pSession, PINTNETIFWAITREQ pReq)
+{
+ if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
+ return VERR_INVALID_PARAMETER;
+ return IntNetR0IfWait(pReq->hIf, pSession, pReq->cMillies);
+}
+
+
+/**
+ * Wake up any threads waiting on the interface.
+ *
+ * @returns VBox status code.
+ * @param hIf The interface handle.
+ * @param pSession The caller's session.
+ * @param fNoMoreWaits When set, no more waits are permitted.
+ */
+INTNETR0DECL(int) IntNetR0IfAbortWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fNoMoreWaits)
+{
+ Log4(("IntNetR0IfAbortWait: hIf=%RX32 fNoMoreWaits=%RTbool\n", hIf, fNoMoreWaits));
+
+ /*
+ * Get and validate essential handles.
+ */
+ PINTNET pIntNet = g_pIntNet;
+ AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
+ AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
+
+ PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
+ if (!pIf)
+ {
+ Log(("IntNetR0IfAbortWait: returns VERR_INVALID_HANDLE\n"));
+ return VERR_INVALID_HANDLE;
+ }
+
+#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) && defined(IN_RING3)
+ AssertReleaseFailed();
+ RT_NOREF(fNoMoreWaits);
+ return VERR_NOT_SUPPORTED;
+#else
+ const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
+ RTNATIVETHREAD hDtorThrd;
+ ASMAtomicReadHandle(&pIf->hDestructorThread, &hDtorThrd);
+ if (hDtorThrd != NIL_RTNATIVETHREAD)
+ {
+ /* This can only happen if we for some reason race SUPDRVSESSION cleanup,
+ i.e. the object count is set to zero without yet having removed it from
+ the object table, so we got a spurious "reference". We must drop that
+ reference and let the destructor get on with its work. (Not entirely sure
+ if this is practically possible on any of the platforms, i.e. whether it's
+ we can actually close a SUPDrv handle/descriptor with active threads still
+ in NtDeviceIoControlFile/ioctl, but better safe than sorry.) */
+ Log(("IntNetR0IfAbortWait: returns VERR_SEM_DESTROYED\n"));
+ return VERR_SEM_DESTROYED;
+ }
+
+ /* a bit of paranoia */
+ int rc = VINF_SUCCESS;
+ if (hRecvEvent != NIL_RTSEMEVENT)
+ {
+ /*
+ * Set fNoMoreWaits if requested to do so and then wake up all the sleeping
+ * threads (usually just one). We leave the semaphore in the signalled
+ * state so the next caller will return immediately.
+ */
+ if (fNoMoreWaits)
+ ASMAtomicWriteBool(&pIf->fNoMoreWaits, true);
+
+ uint32_t cSleepers = ASMAtomicReadU32(&pIf->cSleepers) + 1;
+ while (cSleepers-- > 0)
+ {
+ int rc2 = RTSemEventSignal(pIf->hRecvEvent);
+ AssertRC(rc2);
+ }
+ }
+ else
+ rc = VERR_SEM_DESTROYED;
+
+ intnetR0IfRelease(pIf, pSession);
+
+ Log4(("IntNetR0IfWait: returns %Rrc\n", VINF_SUCCESS));
+ return VINF_SUCCESS;
+#endif
+}
+
+
+/**
+ * VMMR0 request wrapper for IntNetR0IfAbortWait.
+ *
+ * @returns see IntNetR0IfWait.
+ * @param pSession The caller's session.
+ * @param pReq The request packet.
+ */
+INTNETR0DECL(int) IntNetR0IfAbortWaitReq(PSUPDRVSESSION pSession, PINTNETIFABORTWAITREQ pReq)
+{
+ if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
+ return VERR_INVALID_PARAMETER;
+ return IntNetR0IfAbortWait(pReq->hIf, pSession, pReq->fNoMoreWaits);
+}
+
+
+/**
+ * Close an interface.
+ *
+ * @returns VBox status code.
+ * @param pIntNet The instance handle.
+ * @param hIf The interface handle.
+ * @param pSession The caller's session.
+ */
+INTNETR0DECL(int) IntNetR0IfClose(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
+{
+ LogFlow(("IntNetR0IfClose: hIf=%RX32\n", hIf));
+
+ /*
+ * Validate and free the handle.
+ */
+ PINTNET pIntNet = g_pIntNet;
+ AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
+ AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
+
+ PINTNETIF pIf = (PINTNETIF)RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pSession);
+ if (!pIf)
+ return VERR_INVALID_HANDLE;
+
+ /* Mark the handle as freed so intnetR0IfDestruct won't free it again. */
+ ASMAtomicWriteU32(&pIf->hIf, INTNET_HANDLE_INVALID);
+
+#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
+ /*
+ * Signal the event semaphore to wake up any threads in IntNetR0IfWait
+ * and give them a moment to get out and release the interface.
+ */
+ uint32_t i = pIf->cSleepers;
+ while (i-- > 0)
+ {
+ RTSemEventSignal(pIf->hRecvEvent);
+ RTThreadYield();
+ }
+ RTSemEventSignal(pIf->hRecvEvent);
+#endif
+
+ /*
+ * Release the references to the interface object (handle + free lookup).
+ */
+ void *pvObj = pIf->pvObj;
+ intnetR0IfRelease(pIf, pSession); /* (RTHandleTableFreeWithCtx) */
+
+ int rc = SUPR0ObjRelease(pvObj, pSession);
+ LogFlow(("IntNetR0IfClose: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for IntNetR0IfCloseReq.
+ *
+ * @returns see IntNetR0IfClose.
+ * @param pSession The caller's session.
+ * @param pReq The request packet.
+ */
+INTNETR0DECL(int) IntNetR0IfCloseReq(PSUPDRVSESSION pSession, PINTNETIFCLOSEREQ pReq)
+{
+ if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
+ return VERR_INVALID_PARAMETER;
+ return IntNetR0IfClose(pReq->hIf, pSession);
+}
+
+
+/**
+ * Interface destructor callback.
+ * This is called for reference counted objectes when the count reaches 0.
+ *
+ * @param pvObj The object pointer.
+ * @param pvUser1 Pointer to the interface.
+ * @param pvUser2 Pointer to the INTNET instance data.
+ */
+static DECLCALLBACK(void) intnetR0IfDestruct(void *pvObj, void *pvUser1, void *pvUser2)
+{
+ PINTNETIF pIf = (PINTNETIF)pvUser1;
+ PINTNET pIntNet = (PINTNET)pvUser2;
+ Log(("intnetR0IfDestruct: pvObj=%p pIf=%p pIntNet=%p hIf=%RX32\n", pvObj, pIf, pIntNet, pIf->hIf));
+ RT_NOREF1(pvObj);
+
+ /*
+ * For paranoid reasons we must now mark the interface as destroyed.
+ * This is so that any waiting threads can take evasive action (kind
+ * of theoretical case), and we can reject everyone else referencing
+ * the object via the handle table before we get around to removing it.
+ */
+ ASMAtomicWriteHandle(&pIf->hDestructorThread, RTThreadNativeSelf());
+
+ /*
+ * We grab the INTNET create/open/destroy semaphore to make sure nobody is
+ * adding or removing interfaces while we're in here.
+ */
+ RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
+
+ /*
+ * Delete the interface handle so the object no longer can be used.
+ * (Can happen if the client didn't close its session.)
+ */
+ INTNETIFHANDLE hIf = ASMAtomicXchgU32(&pIf->hIf, INTNET_HANDLE_INVALID);
+ if (hIf != INTNET_HANDLE_INVALID)
+ {
+ void *pvObj2 = RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pIf->pSession); NOREF(pvObj2);
+ AssertMsg(pvObj2 == pIf, ("%p, %p, hIf=%RX32 pSession=%p\n", pvObj2, pIf, hIf, pIf->pSession));
+ }
+
+ /*
+ * If we've got a network deactivate and detach ourselves from it. Because
+ * of cleanup order we might have been orphaned by the network destructor.
+ */
+ PINTNETNETWORK pNetwork = pIf->pNetwork;
+ if (pNetwork)
+ {
+ /* set inactive. */
+ intnetR0NetworkSetIfActive(pNetwork, pIf, false /*fActive*/);
+
+ /* remove ourselves from the switch table. */
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ uint32_t iIf = pNetwork->MacTab.cEntries;
+ while (iIf-- > 0)
+ if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
+ {
+ if (pNetwork->MacTab.paEntries[iIf].fPromiscuousEff)
+ {
+ pNetwork->MacTab.cPromiscuousEntries--;
+ if (!pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk)
+ pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
+ }
+ Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
+ Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
+
+ if (iIf + 1 < pNetwork->MacTab.cEntries)
+ memmove(&pNetwork->MacTab.paEntries[iIf],
+ &pNetwork->MacTab.paEntries[iIf + 1],
+ (pNetwork->MacTab.cEntries - iIf - 1) * sizeof(pNetwork->MacTab.paEntries[0]));
+ pNetwork->MacTab.cEntries--;
+ break;
+ }
+
+ /* recalc the min flags. */
+ if (pIf->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
+ {
+ uint32_t fMinFlags = 0;
+ iIf = pNetwork->MacTab.cEntries;
+ while (iIf-- > 0)
+ {
+ PINTNETIF pIf2 = pNetwork->MacTab.paEntries[iIf].pIf;
+ if ( pIf2 /* paranoia */
+ && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
+ fMinFlags |= pIf2->fOpenFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
+ }
+ pNetwork->fMinFlags = fMinFlags;
+ }
+
+ PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+ /* Notify the trunk about the interface being destroyed. */
+ if (pTrunk && pTrunk->pIfPort)
+ pTrunk->pIfPort->pfnDisconnectInterface(pTrunk->pIfPort, pIf->pvIfData);
+
+ /* Wait for the interface to quiesce while we still can. */
+ intnetR0BusyWait(pNetwork, &pIf->cBusy);
+
+ /* Release our reference to the network. */
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+ pIf->pNetwork = NULL;
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+ SUPR0ObjRelease(pNetwork->pvObj, pIf->pSession);
+ }
+
+ RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
+
+#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
+ /*
+ * Wakeup anyone waiting on this interface. (Kind of unlikely, but perhaps
+ * not quite impossible.)
+ *
+ * We *must* make sure they have woken up properly and realized
+ * that the interface is no longer valid.
+ */
+ if (pIf->hRecvEvent != NIL_RTSEMEVENT)
+ {
+ RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
+ unsigned cMaxWait = 0x1000;
+ while (pIf->cSleepers && cMaxWait-- > 0)
+ {
+ RTSemEventSignal(hRecvEvent);
+ RTThreadYield();
+ }
+ if (pIf->cSleepers)
+ {
+ RTThreadSleep(1);
+
+ cMaxWait = pIf->cSleepers;
+ while (pIf->cSleepers && cMaxWait-- > 0)
+ {
+ RTSemEventSignal(hRecvEvent);
+ RTThreadSleep(10);
+ }
+ }
+
+ RTSemEventDestroy(hRecvEvent);
+ pIf->hRecvEvent = NIL_RTSEMEVENT;
+ }
+#endif
+
+ /*
+ * Unmap user buffer.
+ */
+ if (pIf->pIntBuf != pIf->pIntBufDefault)
+ {
+ /** @todo user buffer */
+ }
+
+ /*
+ * Unmap and Free the default buffer.
+ */
+ if (pIf->pIntBufDefault)
+ {
+ SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
+ pIf->pIntBufDefault = NULL;
+ pIf->pIntBufDefaultR3 = 0;
+ pIf->pIntBuf = NULL;
+ pIf->pIntBufR3 = 0;
+ }
+
+ /*
+ * Free remaining resources
+ */
+ RTSpinlockDestroy(pIf->hRecvInSpinlock);
+ pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
+
+ RTMemFree(pIf->pDstTab);
+ pIf->pDstTab = NULL;
+
+ for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
+ intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
+
+ pIf->pvObj = NULL;
+ RTMemFree(pIf);
+}
+
+
+/* Forward declaration of trunk reconnection thread function. */
+static DECLCALLBACK(int) intnetR0TrunkReconnectThread(RTTHREAD hThread, void *pvUser);
+
+/**
+ * Creates a new network interface.
+ *
+ * The call must have opened the network for the new interface and is
+ * responsible for closing it on failure. On success it must leave the network
+ * opened so the interface destructor can close it.
+ *
+ * @returns VBox status code.
+ * @param pNetwork The network, referenced. The reference is consumed
+ * on success.
+ * @param pSession The session handle.
+ * @param cbSend The size of the send buffer.
+ * @param cbRecv The size of the receive buffer.
+ * @param fFlags The open network flags.
+ * @param pfnRecvAvail The receive available callback to call instead of
+ * signalling the semaphore (R3 service only).
+ * @param pvUser The opaque user data to pass to the callback.
+ * @param phIf Where to store the interface handle.
+ */
+static int intnetR0NetworkCreateIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession, unsigned cbSend, unsigned cbRecv,
+ uint32_t fFlags, PFNINTNETIFRECVAVAIL pfnRecvAvail, void *pvUser, PINTNETIFHANDLE phIf)
+{
+ LogFlow(("intnetR0NetworkCreateIf: pNetwork=%p pSession=%p cbSend=%u cbRecv=%u fFlags=%#x phIf=%p\n",
+ pNetwork, pSession, cbSend, cbRecv, fFlags, phIf));
+
+ /*
+ * Assert input.
+ */
+ AssertPtr(pNetwork);
+ AssertPtr(phIf);
+#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
+ Assert(pfnRecvAvail == NULL);
+ Assert(pvUser == NULL);
+ RT_NOREF(pfnRecvAvail, pvUser);
+#endif
+
+ /*
+ * Adjust the flags with defaults for the interface policies.
+ * Note: Main restricts promiscuous mode per interface.
+ */
+ uint32_t const fDefFlags = INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
+ | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK;
+ for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
+ if (!(fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair))
+ fFlags |= g_afIntNetOpenNetworkIfFlags[i].fPair & fDefFlags;
+
+ /*
+ * Make sure that all destination tables as well as the have space of
+ */
+ int rc = intnetR0NetworkEnsureTabSpace(pNetwork);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /*
+ * Allocate the interface and initialize it.
+ */
+ PINTNETIF pIf = (PINTNETIF)RTMemAllocZ(sizeof(*pIf));
+ if (!pIf)
+ return VERR_NO_MEMORY;
+
+ memset(&pIf->MacAddr, 0xff, sizeof(pIf->MacAddr)); /* broadcast */
+ //pIf->fMacSet = false;
+ //pIf->fPromiscuousReal = false;
+ //pIf->fActive = false;
+ //pIf->fNoMoreWaits = false;
+ pIf->fOpenFlags = fFlags;
+ //pIf->cYields = 0;
+ //pIf->pIntBuf = 0;
+ //pIf->pIntBufR3 = NIL_RTR3PTR;
+ //pIf->pIntBufDefault = 0;
+ //pIf->pIntBufDefaultR3 = NIL_RTR3PTR;
+#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
+ pIf->hRecvEvent = NIL_RTSEMEVENT;
+#else
+ pIf->pfnRecvAvail = pfnRecvAvail;
+ pIf->pvUserRecvAvail = pvUser;
+#endif
+ //pIf->cSleepers = 0;
+ pIf->hIf = INTNET_HANDLE_INVALID;
+ pIf->hDestructorThread = NIL_RTNATIVETHREAD;
+ pIf->pNetwork = pNetwork;
+ pIf->pSession = pSession;
+ //pIf->pvObj = NULL;
+ //pIf->aAddrCache = {0};
+ pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
+ pIf->cBusy = 0;
+ //pIf->pDstTab = NULL;
+ //pIf->pvIfData = NULL;
+
+ for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
+ rc = intnetR0IfAddrCacheInit(&pIf->aAddrCache[i], (INTNETADDRTYPE)i,
+ !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
+ if (RT_SUCCESS(rc))
+ rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, (PINTNETDSTTAB *)&pIf->pDstTab);
+#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
+ if (RT_SUCCESS(rc))
+ rc = RTSemEventCreate((PRTSEMEVENT)&pIf->hRecvEvent);
+#endif
+ if (RT_SUCCESS(rc))
+ rc = RTSpinlockCreate(&pIf->hRecvInSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hRecvInSpinlock");
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Create the default buffer.
+ */
+ /** @todo adjust with minimums and apply defaults here. */
+ cbRecv = RT_ALIGN(RT_MAX(cbRecv, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
+ cbSend = RT_ALIGN(RT_MAX(cbSend, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
+ const unsigned cbBuf = RT_ALIGN(sizeof(*pIf->pIntBuf), INTNETRINGBUF_ALIGNMENT) + cbRecv + cbSend;
+ rc = SUPR0MemAlloc(pIf->pSession, cbBuf, (PRTR0PTR)&pIf->pIntBufDefault, (PRTR3PTR)&pIf->pIntBufDefaultR3);
+ if (RT_SUCCESS(rc))
+ {
+ ASMMemZero32(pIf->pIntBufDefault, cbBuf); /** @todo I thought I specified these buggers as clearing the memory... */
+
+ pIf->pIntBuf = pIf->pIntBufDefault;
+ pIf->pIntBufR3 = pIf->pIntBufDefaultR3;
+ IntNetBufInit(pIf->pIntBuf, cbBuf, cbRecv, cbSend);
+
+ /*
+ * Register the interface with the session and create a handle for it.
+ */
+ pIf->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK_INTERFACE,
+ intnetR0IfDestruct, pIf, pNetwork->pIntNet);
+ if (pIf->pvObj)
+ {
+ rc = RTHandleTableAllocWithCtx(pNetwork->pIntNet->hHtIfs, pIf, pSession, (uint32_t *)&pIf->hIf);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Finally add the interface to the network, consuming the
+ * network reference of the caller.
+ */
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ uint32_t iIf = pNetwork->MacTab.cEntries;
+ Assert(iIf + 1 <= pNetwork->MacTab.cEntriesAllocated);
+
+ pNetwork->MacTab.paEntries[iIf].MacAddr = pIf->MacAddr;
+ pNetwork->MacTab.paEntries[iIf].fActive = false;
+ pNetwork->MacTab.paEntries[iIf].fPromiscuousEff = false;
+ pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk = false;
+ pNetwork->MacTab.paEntries[iIf].pIf = pIf;
+
+ pNetwork->MacTab.cEntries = iIf + 1;
+ pIf->pNetwork = pNetwork;
+
+ /*
+ * Grab a busy reference (paranoia) to the trunk before releasing
+ * the spinlock and then notify it about the new interface.
+ */
+ PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
+ if (pTrunk)
+ intnetR0BusyIncTrunk(pTrunk);
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+ if (pTrunk)
+ {
+ Log(("intnetR0NetworkCreateIf: pfnConnectInterface hIf=%RX32\n", pIf->hIf));
+ if (pTrunk->pIfPort)
+ rc = pTrunk->pIfPort->pfnConnectInterface(pTrunk->pIfPort, pIf, &pIf->pvIfData);
+ intnetR0BusyDecTrunk(pTrunk);
+ }
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * We're good!
+ */
+ *phIf = pIf->hIf;
+ Log(("intnetR0NetworkCreateIf: returns VINF_SUCCESS *phIf=%RX32 cbSend=%u cbRecv=%u cbBuf=%u\n",
+ *phIf, pIf->pIntBufDefault->cbSend, pIf->pIntBufDefault->cbRecv, pIf->pIntBufDefault->cbBuf));
+ return VINF_SUCCESS;
+ }
+ }
+
+ SUPR0ObjAddRef(pNetwork->pvObj, pSession);
+ SUPR0ObjRelease(pIf->pvObj, pSession);
+ LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
+ return rc;
+ }
+
+ /* clean up */
+ SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
+ pIf->pIntBufDefault = NULL;
+ pIf->pIntBuf = NULL;
+ }
+ }
+
+ RTSpinlockDestroy(pIf->hRecvInSpinlock);
+ pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
+#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
+ RTSemEventDestroy(pIf->hRecvEvent);
+ pIf->hRecvEvent = NIL_RTSEMEVENT;
+#else
+ pIf->pfnRecvAvail = NULL;
+ pIf->pvUserRecvAvail = NULL;
+#endif
+ RTMemFree(pIf->pDstTab);
+ for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
+ intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
+ RTMemFree(pIf);
+ LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/** @interface_method_impl{INTNETTRUNKSWPORT,pfnSetSGPhys} */
+static DECLCALLBACK(bool) intnetR0TrunkIfPortSetSGPhys(PINTNETTRUNKSWPORT pSwitchPort, bool fEnable)
+{
+ PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
+ AssertMsgFailed(("Not implemented because it wasn't required on Darwin\n"));
+ return ASMAtomicXchgBool(&pThis->fPhysSG, fEnable);
+}
+
+
+/** @interface_method_impl{INTNETTRUNKSWPORT,pfnReportMacAddress} */
+static DECLCALLBACK(void) intnetR0TrunkIfPortReportMacAddress(PINTNETTRUNKSWPORT pSwitchPort, PCRTMAC pMacAddr)
+{
+ PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
+
+ /*
+ * Get the network instance and grab the address spinlock before making
+ * any changes.
+ */
+ intnetR0BusyIncTrunk(pThis);
+ PINTNETNETWORK pNetwork = pThis->pNetwork;
+ if (pNetwork)
+ {
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ pNetwork->MacTab.HostMac = *pMacAddr;
+ pThis->MacAddr = *pMacAddr;
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ }
+ else
+ pThis->MacAddr = *pMacAddr;
+ intnetR0BusyDecTrunk(pThis);
+}
+
+
+/** @interface_method_impl{INTNETTRUNKSWPORT,pfnReportPromiscuousMode} */
+static DECLCALLBACK(void) intnetR0TrunkIfPortReportPromiscuousMode(PINTNETTRUNKSWPORT pSwitchPort, bool fPromiscuous)
+{
+ PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
+
+ /*
+ * Get the network instance and grab the address spinlock before making
+ * any changes.
+ */
+ intnetR0BusyIncTrunk(pThis);
+ PINTNETNETWORK pNetwork = pThis->pNetwork;
+ if (pNetwork)
+ {
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ pNetwork->MacTab.fHostPromiscuousReal = fPromiscuous
+ || (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE);
+ pNetwork->MacTab.fHostPromiscuousEff = pNetwork->MacTab.fHostPromiscuousReal
+ && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ }
+ intnetR0BusyDecTrunk(pThis);
+}
+
+
+/** @interface_method_impl{INTNETTRUNKSWPORT,pfnReportGsoCapabilities} */
+static DECLCALLBACK(void) intnetR0TrunkIfPortReportGsoCapabilities(PINTNETTRUNKSWPORT pSwitchPort,
+ uint32_t fGsoCapabilities, uint32_t fDst)
+{
+ PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
+
+ for (unsigned iBit = PDMNETWORKGSOTYPE_END; iBit < 32; iBit++)
+ Assert(!(fGsoCapabilities & RT_BIT_32(iBit)));
+ Assert(!(fDst & ~INTNETTRUNKDIR_VALID_MASK));
+ Assert(fDst);
+
+ if (fDst & INTNETTRUNKDIR_HOST)
+ pThis->fHostGsoCapabilites = fGsoCapabilities;
+
+ if (fDst & INTNETTRUNKDIR_WIRE)
+ pThis->fWireGsoCapabilites = fGsoCapabilities;
+}
+
+
+/** @interface_method_impl{INTNETTRUNKSWPORT,pfnReportNoPreemptDsts} */
+static DECLCALLBACK(void) intnetR0TrunkIfPortReportNoPreemptDsts(PINTNETTRUNKSWPORT pSwitchPort, uint32_t fNoPreemptDsts)
+{
+ PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
+ Assert(!(fNoPreemptDsts & ~INTNETTRUNKDIR_VALID_MASK));
+
+ pThis->fNoPreemptDsts = fNoPreemptDsts;
+}
+
+
+/** @interface_method_impl{INTNETTRUNKSWPORT,pfnDisconnect} */
+static DECLCALLBACK(void) intnetR0TrunkIfPortDisconnect(PINTNETTRUNKSWPORT pSwitchPort, PINTNETTRUNKIFPORT pIfPort,
+ PFNINTNETTRUNKIFPORTRELEASEBUSY pfnReleaseBusy)
+{
+ PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
+
+ /*
+ * The caller has marked the trunk instance busy on his side before making
+ * the call (see method docs) to let us safely grab the network and internal
+ * network instance pointers without racing the network destruction code
+ * (intnetR0TrunkIfDestroy (called by intnetR0TrunkIfDestroy) will wait for
+ * the interface to stop being busy before setting pNetwork to NULL and
+ * freeing up the resources).
+ */
+ PINTNETNETWORK pNetwork = pThis->pNetwork;
+ if (pNetwork)
+ {
+ PINTNET pIntNet = pNetwork->pIntNet;
+ Assert(pNetwork->pIntNet);
+
+ /*
+ * We must decrease the callers busy count here to prevent deadlocking
+ * when requesting the big mutex ownership. This will of course
+ * unblock anyone stuck in intnetR0TrunkIfDestroy doing pfnWaitForIdle
+ * (the other deadlock party), so we have to revalidate the network
+ * pointer after taking ownership of the big mutex.
+ */
+ if (pfnReleaseBusy)
+ pfnReleaseBusy(pIfPort);
+
+ RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
+
+ if (intnetR0NetworkIsValid(pIntNet, pNetwork))
+ {
+ Assert(pNetwork->MacTab.pTrunk == pThis); /* Must be valid as long as tehre are no concurrent calls to this method. */
+ Assert(pThis->pIfPort == pIfPort); /* Ditto */
+
+ /*
+ * Disconnect the trunk and destroy it, similar to what is done int
+ * intnetR0NetworkDestruct.
+ */
+ pIfPort->pfnSetState(pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
+
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+ pNetwork->MacTab.pTrunk = NULL;
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+ /*
+ * Create a system thread that will attempt to re-connect this trunk periodically
+ * hoping that the corresponding filter module reappears in the system. The thread
+ * will go away if it succeeds in re-connecting the trunk or if it is signalled.
+ */
+ int rc = RTThreadCreate(&pNetwork->hTrunkReconnectThread, intnetR0TrunkReconnectThread, pNetwork,
+ 0, RTTHREADTYPE_INFREQUENT_POLLER, RTTHREADFLAGS_WAITABLE, "TRNKRECON");
+ AssertRC(rc);
+
+ intnetR0TrunkIfDestroy(pThis, pNetwork);
+ }
+
+ RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
+ }
+ /*
+ * We must always release the busy reference.
+ */
+ else if (pfnReleaseBusy)
+ pfnReleaseBusy(pIfPort);
+}
+
+
+/** @interface_method_impl{INTNETTRUNKSWPORT,pfnPreRecv} */
+static DECLCALLBACK(INTNETSWDECISION) intnetR0TrunkIfPortPreRecv(PINTNETTRUNKSWPORT pSwitchPort,
+ void const *pvSrc, size_t cbSrc, uint32_t fSrc)
+{
+ PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
+
+ /* assert some sanity */
+ AssertPtr(pvSrc);
+ AssertReturn(cbSrc >= 6, INTNETSWDECISION_BROADCAST);
+ Assert(fSrc);
+
+ /*
+ * Mark the trunk as busy, make sure we've got a network and that there are
+ * some active interfaces around.
+ */
+ INTNETSWDECISION enmSwDecision = INTNETSWDECISION_TRUNK;
+ intnetR0BusyIncTrunk(pThis);
+ PINTNETNETWORK pNetwork = pThis->pNetwork;
+ if (RT_LIKELY( pNetwork
+ && pNetwork->cActiveIFs > 0 ))
+ {
+ /*
+ * Lazy bird! No pre-switching of multicast and shared-MAC-on-wire.
+ */
+ PCRTNETETHERHDR pEthHdr = (PCRTNETETHERHDR)pvSrc;
+ if (intnetR0IsMacAddrMulticast(&pEthHdr->DstMac))
+ enmSwDecision = INTNETSWDECISION_BROADCAST;
+ else if ( fSrc == INTNETTRUNKDIR_WIRE
+ && (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE))
+ enmSwDecision = INTNETSWDECISION_BROADCAST;
+ else
+ enmSwDecision = intnetR0NetworkPreSwitchUnicast(pNetwork,
+ fSrc,
+ cbSrc >= 12 ? &pEthHdr->SrcMac : NULL,
+ &pEthHdr->DstMac);
+ }
+
+ intnetR0BusyDecTrunk(pThis);
+ return enmSwDecision;
+}
+
+
+/** @interface_method_impl{INTNETTRUNKSWPORT,pfnRecv} */
+static DECLCALLBACK(bool) intnetR0TrunkIfPortRecv(PINTNETTRUNKSWPORT pSwitchPort, void *pvIf, PINTNETSG pSG, uint32_t fSrc)
+{
+ PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
+
+ /* assert some sanity */
+ AssertPtr(pSG);
+ Assert(fSrc);
+ NOREF(pvIf); /* later */
+
+ /*
+ * Mark the trunk as busy, make sure we've got a network and that there are
+ * some active interfaces around.
+ */
+ bool fRc = false /* don't drop it */;
+ intnetR0BusyIncTrunk(pThis);
+ PINTNETNETWORK pNetwork = pThis->pNetwork;
+ if (RT_LIKELY( pNetwork
+ && pNetwork->cActiveIFs > 0 ))
+ {
+ /*
+ * Grab or allocate a destination table.
+ */
+ bool const fIntCtx = RTThreadPreemptIsEnabled(NIL_RTTHREAD) || RTThreadIsInInterrupt(NIL_RTTHREAD);
+ unsigned iDstTab = 0;
+ PINTNETDSTTAB pDstTab = NULL;
+ RTSpinlockAcquire(pThis->hDstTabSpinlock);
+ if (fIntCtx)
+ {
+ /* Interrupt or restricted context. */
+ iDstTab = RTMpCpuIdToSetIndex(RTMpCpuId());
+ iDstTab %= pThis->cIntDstTabs;
+ pDstTab = pThis->apIntDstTabs[iDstTab];
+ if (RT_LIKELY(pDstTab))
+ pThis->apIntDstTabs[iDstTab] = NULL;
+ else
+ {
+ iDstTab = pThis->cIntDstTabs;
+ while (iDstTab-- > 0)
+ {
+ pDstTab = pThis->apIntDstTabs[iDstTab];
+ if (pDstTab)
+ {
+ pThis->apIntDstTabs[iDstTab] = NULL;
+ break;
+ }
+ }
+ }
+ RTSpinlockRelease(pThis->hDstTabSpinlock);
+ Assert(!pDstTab || iDstTab < pThis->cIntDstTabs);
+ }
+ else
+ {
+ /* Task context, fallback is to allocate a table. */
+ AssertCompile(RT_ELEMENTS(pThis->apTaskDstTabs) == 2); /* for loop rollout */
+ pDstTab = pThis->apIntDstTabs[iDstTab = 0];
+ if (!pDstTab)
+ pDstTab = pThis->apIntDstTabs[iDstTab = 1];
+ if (pDstTab)
+ {
+ pThis->apIntDstTabs[iDstTab] = NULL;
+ RTSpinlockRelease(pThis->hDstTabSpinlock);
+ Assert(iDstTab < RT_ELEMENTS(pThis->apTaskDstTabs));
+ }
+ else
+ {
+ RTSpinlockRelease(pThis->hDstTabSpinlock);
+ intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pDstTab);
+ iDstTab = 65535;
+ }
+ }
+ if (RT_LIKELY(pDstTab))
+ {
+ /*
+ * Finally, get down to business of sending the frame.
+ */
+ INTNETSWDECISION enmSwDecision = intnetR0NetworkSend(pNetwork, NULL, fSrc, pSG, pDstTab);
+ AssertMsg(enmSwDecision != INTNETSWDECISION_BAD_CONTEXT, ("fSrc=%#x fTrunkDst=%#x hdr=%.14Rhxs\n", fSrc, pDstTab->fTrunkDst, pSG->aSegs[0].pv));
+ if (enmSwDecision == INTNETSWDECISION_INTNET)
+ fRc = true; /* drop it */
+
+ /*
+ * Free the destination table.
+ */
+ if (iDstTab == 65535)
+ RTMemFree(pDstTab);
+ else
+ {
+ RTSpinlockAcquire(pThis->hDstTabSpinlock);
+ if (fIntCtx && !pThis->apIntDstTabs[iDstTab])
+ pThis->apIntDstTabs[iDstTab] = pDstTab;
+ else if (!fIntCtx && !pThis->apTaskDstTabs[iDstTab])
+ pThis->apTaskDstTabs[iDstTab] = pDstTab;
+ else
+ {
+ /* this shouldn't happen! */
+ PINTNETDSTTAB *papDstTabs = fIntCtx ? &pThis->apIntDstTabs[0] : &pThis->apTaskDstTabs[0];
+ iDstTab = fIntCtx ? pThis->cIntDstTabs : RT_ELEMENTS(pThis->apTaskDstTabs);
+ while (iDstTab-- > 0)
+ if (!papDstTabs[iDstTab])
+ {
+ papDstTabs[iDstTab] = pDstTab;
+ break;
+ }
+ }
+ RTSpinlockRelease(pThis->hDstTabSpinlock);
+ Assert(iDstTab < RT_MAX(RT_ELEMENTS(pThis->apTaskDstTabs), pThis->cIntDstTabs));
+ }
+ }
+ }
+
+ intnetR0BusyDecTrunk(pThis);
+ return fRc;
+}
+
+
+/** @interface_method_impl{INTNETTRUNKSWPORT,pfnSGRetain} */
+static DECLCALLBACK(void) intnetR0TrunkIfPortSGRetain(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
+{
+ PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
+ PINTNETNETWORK pNetwork = pThis->pNetwork;
+
+ /* assert some sanity */
+ AssertPtrReturnVoid(pNetwork);
+ AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
+ AssertPtr(pSG);
+ Assert(pSG->cUsers > 0 && pSG->cUsers < 256);
+
+ /* do it. */
+ ++pSG->cUsers;
+}
+
+
+/** @interface_method_impl{INTNETTRUNKSWPORT,pfnSGRelease} */
+static DECLCALLBACK(void) intnetR0TrunkIfPortSGRelease(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
+{
+ PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
+ PINTNETNETWORK pNetwork = pThis->pNetwork;
+
+ /* assert some sanity */
+ AssertPtrReturnVoid(pNetwork);
+ AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
+ AssertPtr(pSG);
+ Assert(pSG->cUsers > 0);
+
+ /*
+ * Free it?
+ */
+ if (!--pSG->cUsers)
+ {
+ /** @todo later */
+ }
+}
+
+
+/** @interface_method_impl{INTNETTRUNKSWPORT,pfnNotifyHostAddress} */
+static DECLCALLBACK(void) intnetR0NetworkNotifyHostAddress(PINTNETTRUNKSWPORT pSwitchPort,
+ bool fAdded,
+ INTNETADDRTYPE enmType, const void *pvAddr)
+{
+ PINTNETTRUNKIF pTrunkIf = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
+ PINTNETNETWORK pNetwork = pTrunkIf->pNetwork;
+ PCRTNETADDRU pAddr = (PCRTNETADDRU)pvAddr;
+ uint8_t cbAddr;
+
+ if (enmType == kIntNetAddrType_IPv4)
+ {
+ Log(("%s: %s %RTnaipv4\n",
+ __FUNCTION__, (fAdded ? "add" : "del"),
+ pAddr->IPv4));
+ cbAddr = 4;
+ }
+ else if (enmType == kIntNetAddrType_IPv6)
+ {
+ Log(("%s: %s %RTnaipv6\n",
+ __FUNCTION__, (fAdded ? "add" : "del"),
+ pAddr));
+ cbAddr = 16;
+ }
+ else
+ {
+ Log(("%s: unexpected address type %d\n", __FUNCTION__, enmType));
+ return;
+ }
+
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+ if (fAdded) /* one of host interfaces got a new address */
+ {
+ /* blacklist it to prevent spoofing by guests */
+ intnetR0NetworkBlacklistAdd(pNetwork, pAddr, enmType);
+
+ /* kick out any guest that uses it */
+ intnetR0NetworkAddrCacheDeleteLocked(pNetwork, pAddr, enmType, cbAddr, "tif/host");
+ }
+ else /* address deleted from one of host interfaces */
+ {
+ /* stop blacklisting it, guests may use it now */
+ intnetR0NetworkBlacklistDelete(pNetwork, pAddr, enmType);
+ }
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+}
+
+
+/**
+ * Shutdown the trunk interface.
+ *
+ * @param pThis The trunk.
+ * @param pNetworks The network.
+ *
+ * @remarks The caller must hold the global lock.
+ */
+static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork)
+{
+ /* assert sanity */
+ if (!pThis)
+ return;
+ AssertPtr(pThis);
+ Assert(pThis->pNetwork == pNetwork);
+ AssertPtrNull(pThis->pIfPort);
+
+ /*
+ * The interface has already been deactivated, we just to wait for
+ * it to become idle before we can disconnect and release it.
+ */
+ PINTNETTRUNKIFPORT pIfPort = pThis->pIfPort;
+ if (pIfPort)
+ {
+ /* unset it */
+ pThis->pIfPort = NULL;
+
+ /* wait in portions so we can complain every now an then. */
+ uint64_t StartTS = RTTimeSystemNanoTS();
+ int rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
+ if (RT_FAILURE(rc))
+ {
+ LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc).\n",
+ pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
+ Assert(rc == VERR_TIMEOUT);
+ while ( RT_FAILURE(rc)
+ && RTTimeSystemNanoTS() - StartTS < UINT64_C(30000000000)) /* 30 sec */
+ rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
+ if (rc == VERR_TIMEOUT)
+ {
+ LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc).\n",
+ pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
+ while ( rc == VERR_TIMEOUT
+ && RTTimeSystemNanoTS() - StartTS < UINT64_C(360000000000)) /* 360 sec */
+ rc = pIfPort->pfnWaitForIdle(pIfPort, 30*1000);
+ if (RT_FAILURE(rc))
+ {
+ LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc), giving up.\n",
+ pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
+ AssertRC(rc);
+ }
+ }
+ }
+
+ /* disconnect & release it. */
+ pIfPort->pfnDisconnectAndRelease(pIfPort);
+ }
+
+ /*
+ * Free up the resources.
+ */
+ pThis->pNetwork = NULL; /* Must not be cleared while busy, see intnetR0TrunkIfPortDisconnect. */
+ RTSpinlockDestroy(pThis->hDstTabSpinlock);
+ for (unsigned i = 0; i < RT_ELEMENTS(pThis->apTaskDstTabs); i++)
+ {
+ Assert(pThis->apTaskDstTabs[i]);
+ RTMemFree(pThis->apTaskDstTabs[i]);
+ pThis->apTaskDstTabs[i] = NULL;
+ }
+ for (unsigned i = 0; i < pThis->cIntDstTabs; i++)
+ {
+ Assert(pThis->apIntDstTabs[i]);
+ RTMemFree(pThis->apIntDstTabs[i]);
+ pThis->apIntDstTabs[i] = NULL;
+ }
+ RTMemFree(pThis);
+}
+
+
+/**
+ * Creates the trunk connection (if any).
+ *
+ * @returns VBox status code.
+ *
+ * @param pNetwork The newly created network.
+ * @param pSession The session handle.
+ */
+static int intnetR0NetworkCreateTrunkIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession)
+{
+ const char *pszName;
+ switch (pNetwork->enmTrunkType)
+ {
+ /*
+ * The 'None' case, simple.
+ */
+ case kIntNetTrunkType_None:
+ case kIntNetTrunkType_WhateverNone:
+#ifdef VBOX_WITH_NAT_SERVICE
+ /*
+ * Well, here we don't want load anything special,
+ * just communicate between processes via internal network.
+ */
+ case kIntNetTrunkType_SrvNat:
+#endif
+ return VINF_SUCCESS;
+
+ /* Can't happen, but makes GCC happy. */
+ default:
+ return VERR_NOT_IMPLEMENTED;
+
+ /*
+ * Translate enum to component factory name.
+ */
+ case kIntNetTrunkType_NetFlt:
+ pszName = "VBoxNetFlt";
+ break;
+ case kIntNetTrunkType_NetAdp:
+#if defined(RT_OS_DARWIN) && !defined(VBOXNETADP_DO_NOT_USE_NETFLT)
+ pszName = "VBoxNetFlt";
+#else /* VBOXNETADP_DO_NOT_USE_NETFLT */
+ pszName = "VBoxNetAdp";
+#endif /* VBOXNETADP_DO_NOT_USE_NETFLT */
+ break;
+#ifndef VBOX_WITH_NAT_SERVICE
+ case kIntNetTrunkType_SrvNat:
+ pszName = "VBoxSrvNat";
+ break;
+#endif
+ }
+
+ /*
+ * Allocate the trunk interface and associated destination tables.
+ *
+ * We take a very optimistic view on the parallelism of the host
+ * network stack and NIC driver. So, we allocate one table for each
+ * possible CPU to deal with interrupt time requests and one for task
+ * time calls.
+ */
+ RTCPUID cCpus = RTMpGetCount(); Assert(cCpus > 0);
+ PINTNETTRUNKIF pTrunk = (PINTNETTRUNKIF)RTMemAllocZ(RT_UOFFSETOF_DYN(INTNETTRUNKIF, apIntDstTabs[cCpus]));
+ if (!pTrunk)
+ return VERR_NO_MEMORY;
+
+ Assert(pNetwork->MacTab.cEntriesAllocated > 0);
+ int rc = VINF_SUCCESS;
+ pTrunk->cIntDstTabs = cCpus;
+ for (unsigned i = 0; i < cCpus && RT_SUCCESS(rc); i++)
+ rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apIntDstTabs[i]);
+ for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs) && RT_SUCCESS(rc); i++)
+ rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apTaskDstTabs[i]);
+
+ if (RT_SUCCESS(rc))
+ {
+ pTrunk->SwitchPort.u32Version = INTNETTRUNKSWPORT_VERSION;
+ pTrunk->SwitchPort.pfnPreRecv = intnetR0TrunkIfPortPreRecv;
+ pTrunk->SwitchPort.pfnRecv = intnetR0TrunkIfPortRecv;
+ pTrunk->SwitchPort.pfnSGRetain = intnetR0TrunkIfPortSGRetain;
+ pTrunk->SwitchPort.pfnSGRelease = intnetR0TrunkIfPortSGRelease;
+ pTrunk->SwitchPort.pfnSetSGPhys = intnetR0TrunkIfPortSetSGPhys;
+ pTrunk->SwitchPort.pfnReportMacAddress = intnetR0TrunkIfPortReportMacAddress;
+ pTrunk->SwitchPort.pfnReportPromiscuousMode = intnetR0TrunkIfPortReportPromiscuousMode;
+ pTrunk->SwitchPort.pfnReportGsoCapabilities = intnetR0TrunkIfPortReportGsoCapabilities;
+ pTrunk->SwitchPort.pfnReportNoPreemptDsts = intnetR0TrunkIfPortReportNoPreemptDsts;
+ if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
+ pTrunk->SwitchPort.pfnNotifyHostAddress = intnetR0NetworkNotifyHostAddress;
+ pTrunk->SwitchPort.pfnDisconnect = intnetR0TrunkIfPortDisconnect;
+ pTrunk->SwitchPort.u32VersionEnd = INTNETTRUNKSWPORT_VERSION;
+ //pTrunk->pIfPort = NULL;
+ pTrunk->pNetwork = pNetwork;
+ pTrunk->MacAddr.au8[0] = 0xff;
+ pTrunk->MacAddr.au8[1] = 0xff;
+ pTrunk->MacAddr.au8[2] = 0xff;
+ pTrunk->MacAddr.au8[3] = 0xff;
+ pTrunk->MacAddr.au8[4] = 0xff;
+ pTrunk->MacAddr.au8[5] = 0xff;
+ //pTrunk->fPhysSG = false;
+ //pTrunk->fUnused = false;
+ //pTrunk->cBusy = 0;
+ //pTrunk->fNoPreemptDsts = 0;
+ //pTrunk->fWireGsoCapabilites = 0;
+ //pTrunk->fHostGsoCapabilites = 0;
+ //pTrunk->abGsoHdrs = {0};
+ pTrunk->hDstTabSpinlock = NIL_RTSPINLOCK;
+ //pTrunk->apTaskDstTabs = above;
+ //pTrunk->cIntDstTabs = above;
+ //pTrunk->apIntDstTabs = above;
+
+ /*
+ * Create the lock (we've NIL'ed the members above to simplify cleanup).
+ */
+ rc = RTSpinlockCreate(&pTrunk->hDstTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hDstTabSpinlock");
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * There are a couple of bits in MacTab as well pertaining to the
+ * trunk. We have to set this before it's reported.
+ *
+ * Note! We don't need to lock the MacTab here - creation time.
+ */
+ pNetwork->MacTab.pTrunk = pTrunk;
+ pNetwork->MacTab.HostMac = pTrunk->MacAddr;
+ pNetwork->MacTab.fHostPromiscuousReal = false;
+ pNetwork->MacTab.fHostPromiscuousEff = (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE)
+ && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
+ pNetwork->MacTab.fHostActive = false;
+ pNetwork->MacTab.fWirePromiscuousReal = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE);
+ pNetwork->MacTab.fWirePromiscuousEff = pNetwork->MacTab.fWirePromiscuousReal
+ && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE);
+ pNetwork->MacTab.fWireActive = false;
+
+#ifdef IN_RING0 /* (testcase is ring-3) */
+ /*
+ * Query the factory we want, then use it create and connect the trunk.
+ */
+ PINTNETTRUNKFACTORY pTrunkFactory = NULL;
+ rc = SUPR0ComponentQueryFactory(pSession, pszName, INTNETTRUNKFACTORY_UUID_STR, (void **)&pTrunkFactory);
+ if (RT_SUCCESS(rc))
+ {
+ rc = pTrunkFactory->pfnCreateAndConnect(pTrunkFactory,
+ pNetwork->szTrunk,
+ &pTrunk->SwitchPort,
+ pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE
+ ? INTNETTRUNKFACTORY_FLAG_NO_PROMISC
+ : 0,
+ &pTrunk->pIfPort);
+ pTrunkFactory->pfnRelease(pTrunkFactory);
+ if (RT_SUCCESS(rc))
+ {
+ Assert(pTrunk->pIfPort);
+
+ Log(("intnetR0NetworkCreateTrunkIf: VINF_SUCCESS - pszName=%s szTrunk=%s%s Network=%s\n",
+ pszName, pNetwork->szTrunk, pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE ? " shared-mac" : "", pNetwork->szName));
+ return VINF_SUCCESS;
+ }
+ }
+#else /* IN_RING3 */
+ NOREF(pSession);
+ rc = VERR_NOT_SUPPORTED;
+#endif /* IN_RING3 */
+
+ pNetwork->MacTab.pTrunk = NULL;
+ }
+
+ /* bail out and clean up. */
+ RTSpinlockDestroy(pTrunk->hDstTabSpinlock);
+ }
+
+ for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs); i++)
+ RTMemFree(pTrunk->apTaskDstTabs[i]);
+ for (unsigned i = 0; i < pTrunk->cIntDstTabs; i++)
+ RTMemFree(pTrunk->apIntDstTabs[i]);
+ RTMemFree(pTrunk);
+
+ LogFlow(("intnetR0NetworkCreateTrunkIf: %Rrc - pszName=%s szTrunk=%s Network=%s\n",
+ rc, pszName, pNetwork->szTrunk, pNetwork->szName));
+ return rc;
+}
+
+
+/**
+ * Trunk reconnection thread function. It runs until signalled by another thread or by itself (upon
+ * successful trunk re-connection).
+ *
+ * Note that this function erases pNetwork->hTrunkReconnectThread right before it terminates!
+ */
+static DECLCALLBACK(int) intnetR0TrunkReconnectThread(RTTHREAD hThread, void *pvUser)
+{
+ RT_NOREF1(hThread);
+ PINTNETNETWORK pNetwork = (PINTNETNETWORK)pvUser;
+ PINTNET pIntNet = pNetwork->pIntNet;
+ Assert(pNetwork->pIntNet);
+
+ /*
+ * We attempt to reconnect the trunk every 5 seconds until somebody signals us.
+ */
+ while (!pNetwork->fTerminateReconnectThread && RTThreadUserWait(hThread, 5 * RT_MS_1SEC) == VERR_TIMEOUT)
+ {
+ /*
+ * Make sure nobody else is modifying networks.
+ * It is essential we give up on waiting for the big mutex much earlier than intnetR0NetworkDestruct
+ * gives up on waiting for us to terminate! This is why we wait for 1 second while network destruction
+ * code waits for 5 seconds. Otherwise the network may be already gone by the time we get the mutex.
+ */
+ if (RT_FAILURE(RTSemMutexRequestNoResume(pIntNet->hMtxCreateOpenDestroy, RT_MS_1SEC)))
+ continue;
+#if 0
+ /*
+ * This thread should be long gone by the time the network has been destroyed, but if we are
+ * really paranoid we should include the following code.
+ */
+ /*
+ * The network could have been destroyed while we were waiting on the big mutex, let us verify
+ * it is still valid by going over the list of existing networks.
+ */
+ PINTNETNETWORK pExistingNetwork = pIntNet->pNetworks;
+ for (; pExistingNetwork; pExistingNetwork = pExistingNetwork->pNext)
+ if (pExistingNetwork == pNetwork)
+ break;
+ /* We need the network to exist and to have at least one interface. */
+ if (pExistingNetwork && pNetwork->MacTab.cEntries)
+#else
+ /* We need the network to have at least one interface. */
+ if (pNetwork->MacTab.cEntries)
+#endif
+ {
+ PINTNETIF pAnyIf = pNetwork->MacTab.paEntries[0].pIf;
+ PSUPDRVSESSION pAnySession = pAnyIf ? pAnyIf->pSession : NULL;
+ if (pAnySession)
+ {
+ /* Attempt to re-connect trunk and if successful, terminate thread. */
+ if (RT_SUCCESS(intnetR0NetworkCreateTrunkIf(pNetwork, pAnySession)))
+ {
+ /* The network has active interfaces, we need to activate the trunk. */
+ if (pNetwork->cActiveIFs)
+ {
+ PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
+ /* The intnetR0NetworkCreateTrunkIf call resets fHostActive and fWireActive. */
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+ pNetwork->MacTab.fHostActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
+ pNetwork->MacTab.fWireActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED);
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, INTNETTRUNKIFSTATE_ACTIVE);
+ }
+ pNetwork->fTerminateReconnectThread = true;
+ RTThreadUserSignal(hThread); /* Signal ourselves, so we break the loop after releasing the mutex */
+ }
+ }
+ }
+ RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
+ }
+
+ /*
+ * Destroy our handle in INTNETNETWORK so everyone knows we are gone.
+ * Note that this is the only place where this handle gets wiped out.
+ */
+ pNetwork->hTrunkReconnectThread = NIL_RTTHREAD;
+
+ return VINF_SUCCESS;
+}
+
+
+
+/**
+ * Object destructor callback.
+ * This is called for reference counted objectes when the count reaches 0.
+ *
+ * @param pvObj The object pointer.
+ * @param pvUser1 Pointer to the network.
+ * @param pvUser2 Pointer to the INTNET instance data.
+ */
+static DECLCALLBACK(void) intnetR0NetworkDestruct(void *pvObj, void *pvUser1, void *pvUser2)
+{
+ PINTNETNETWORK pNetwork = (PINTNETNETWORK)pvUser1;
+ PINTNET pIntNet = (PINTNET)pvUser2;
+ Log(("intnetR0NetworkDestruct: pvObj=%p pNetwork=%p pIntNet=%p %s\n", pvObj, pNetwork, pIntNet, pNetwork->szName));
+ Assert(pNetwork->pIntNet == pIntNet);
+ RT_NOREF1(pvObj);
+
+ /* Take the big create/open/destroy sem. */
+ RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
+
+ /*
+ * Tell the trunk, if present, that we're about to disconnect it and wish
+ * no further calls from it.
+ */
+ PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
+ if (pTrunk)
+ pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
+
+ /*
+ * Deactivate and orphan any remaining interfaces and wait for them to idle.
+ *
+ * Note! Normally there are no more interfaces at this point, however, when
+ * supdrvCloseSession / supdrvCleanupSession release the objects the
+ * order is undefined. So, it's quite possible that the network will
+ * be dereference and destroyed before the interfaces.
+ */
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ uint32_t iIf = pNetwork->MacTab.cEntries;
+ while (iIf-- > 0)
+ {
+ pNetwork->MacTab.paEntries[iIf].fActive = false;
+ pNetwork->MacTab.paEntries[iIf].pIf->fActive = false;
+ }
+
+ pNetwork->MacTab.fHostActive = false;
+ pNetwork->MacTab.fWireActive = false;
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+ /* Wait for all the interfaces to quiesce. (Interfaces cannot be
+ removed / added since we're holding the big lock.) */
+ if (pTrunk)
+ intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
+ else if (pNetwork->hTrunkReconnectThread != NIL_RTTHREAD)
+ {
+ /*
+ * There is no trunk and we have the trunk reconnection thread running.
+ * Signal the thread and wait for it to terminate.
+ */
+ pNetwork->fTerminateReconnectThread = true;
+ RTThreadUserSignal(pNetwork->hTrunkReconnectThread);
+ /*
+ * The tread cannot be re-connecting the trunk at the moment since we hold the big
+ * mutex, thus 5 second wait is definitely enough. Note that the wait time must
+ * exceed the time the reconnection thread waits on acquiring the big mutex, otherwise
+ * we will give up waiting for thread termination prematurely. Unfortunately it seems
+ * we have no way to terminate the thread if it failed to stop gracefully.
+ *
+ * Note that it is ok if the thread has already wiped out hTrunkReconnectThread by now,
+ * this means we no longer need to wait for it.
+ */
+ RTThreadWait(pNetwork->hTrunkReconnectThread, 5 * RT_MS_1SEC, NULL);
+ }
+
+ iIf = pNetwork->MacTab.cEntries;
+ while (iIf-- > 0)
+ intnetR0BusyWait(pNetwork, &pNetwork->MacTab.paEntries[iIf].pIf->cBusy);
+
+ /* Orphan the interfaces (not trunk). Don't bother with calling
+ pfnDisconnectInterface here since the networking is going away. */
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+ while ((iIf = pNetwork->MacTab.cEntries) > 0)
+ {
+ PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf - 1].pIf;
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+ intnetR0BusyWait(pNetwork, &pIf->cBusy);
+
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+ if ( iIf == pNetwork->MacTab.cEntries /* paranoia */
+ && pIf->cBusy)
+ {
+ pIf->pNetwork = NULL;
+ pNetwork->MacTab.cEntries--;
+ }
+ }
+
+ /*
+ * Zap the trunk pointer while we still own the spinlock, destroy the
+ * trunk after we've left it. Note that this might take a while...
+ */
+ pNetwork->MacTab.pTrunk = NULL;
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+
+ if (pTrunk)
+ intnetR0TrunkIfDestroy(pTrunk, pNetwork);
+
+ /*
+ * Unlink the network.
+ * Note that it needn't be in the list if we failed during creation.
+ */
+ PINTNETNETWORK pPrev = pIntNet->pNetworks;
+ if (pPrev == pNetwork)
+ pIntNet->pNetworks = pNetwork->pNext;
+ else
+ {
+ for (; pPrev; pPrev = pPrev->pNext)
+ if (pPrev->pNext == pNetwork)
+ {
+ pPrev->pNext = pNetwork->pNext;
+ break;
+ }
+ }
+ pNetwork->pNext = NULL;
+ pNetwork->pvObj = NULL;
+
+ /*
+ * Free resources.
+ */
+ RTSemEventDestroy(pNetwork->hEvtBusyIf);
+ pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
+ RTSpinlockDestroy(pNetwork->hAddrSpinlock);
+ pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
+ RTMemFree(pNetwork->MacTab.paEntries);
+ pNetwork->MacTab.paEntries = NULL;
+ for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
+ intnetR0IfAddrCacheDestroy(&pNetwork->aAddrBlacklist[i]);
+ RTMemFree(pNetwork);
+
+ /* Release the create/destroy sem. */
+ RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
+}
+
+
+/**
+ * Checks if the open network flags are compatible.
+ *
+ * @returns VBox status code.
+ * @param pNetwork The network.
+ * @param fFlags The open network flags.
+ */
+static int intnetR0CheckOpenNetworkFlags(PINTNETNETWORK pNetwork, uint32_t fFlags)
+{
+ uint32_t const fNetFlags = pNetwork->fFlags;
+
+ if ( (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
+ ^ (fNetFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE))
+ return VERR_INTNET_INCOMPATIBLE_FLAGS;
+
+ if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_EXACT)
+ {
+ for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
+ if ( (fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair)
+ && (fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair)
+ != (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair) )
+ return VERR_INTNET_INCOMPATIBLE_FLAGS;
+ }
+
+ if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
+ {
+ for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
+ if ( (fFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
+ && !(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
+ && (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed) )
+ return VERR_INTNET_INCOMPATIBLE_FLAGS;
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Adapts flag changes on network opening.
+ *
+ * @returns VBox status code.
+ * @param pNetwork The network.
+ * @param fFlags The open network flags.
+ */
+static int intnetR0AdaptOpenNetworkFlags(PINTNETNETWORK pNetwork, uint32_t fFlags)
+{
+ /*
+ * Upgrade the minimum policy flags.
+ */
+ uint32_t fNetMinFlags = pNetwork->fMinFlags;
+ Assert(!(fNetMinFlags & INTNET_OPEN_FLAGS_RELAXED_MASK));
+ if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
+ {
+ fNetMinFlags |= fFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
+ if (fNetMinFlags != pNetwork->fMinFlags)
+ {
+ LogRel(("INTNET: %s - min flags changed %#x -> %#x\n", pNetwork->szName, pNetwork->fMinFlags, fNetMinFlags));
+ pNetwork->fMinFlags = fNetMinFlags;
+ }
+ }
+
+ /*
+ * Calculate the new network flags.
+ * (Depends on fNetMinFlags being recalculated first.)
+ */
+ uint32_t fNetFlags = pNetwork->fFlags;
+
+ for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
+ {
+ Assert(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair);
+ Assert(!(fNetMinFlags & g_afIntNetOpenNetworkNetFlags[i].fRelaxed));
+
+ if (!(fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair))
+ continue;
+ if (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed)
+ continue;
+
+ if ( (fNetMinFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
+ || (fFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive) )
+ {
+ fNetFlags &= ~g_afIntNetOpenNetworkNetFlags[i].fPair;
+ fNetFlags |= g_afIntNetOpenNetworkNetFlags[i].fRestrictive;
+ }
+ else if (!(fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
+ {
+ fNetFlags &= ~g_afIntNetOpenNetworkNetFlags[i].fPair;
+ fNetFlags |= g_afIntNetOpenNetworkNetFlags[i].fRelaxed;
+ }
+ }
+
+ for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
+ {
+ Assert(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair);
+ fNetFlags |= fFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed;
+ }
+
+ /*
+ * Apply the flags if they changed.
+ */
+ uint32_t const fOldNetFlags = pNetwork->fFlags;
+ if (fOldNetFlags != fNetFlags)
+ {
+ LogRel(("INTNET: %s - flags changed %#x -> %#x\n", pNetwork->szName, fOldNetFlags, fNetFlags));
+
+ RTSpinlockAcquire(pNetwork->hAddrSpinlock);
+
+ pNetwork->fFlags = fNetFlags;
+
+ /* Recalculate some derived switcher variables. */
+ bool fActiveTrunk = pNetwork->MacTab.pTrunk
+ && pNetwork->cActiveIFs > 0;
+ pNetwork->MacTab.fHostActive = fActiveTrunk
+ && (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
+ pNetwork->MacTab.fHostPromiscuousEff = ( pNetwork->MacTab.fHostPromiscuousReal
+ || (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE))
+ && (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
+
+ pNetwork->MacTab.fWireActive = fActiveTrunk
+ && (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
+ pNetwork->MacTab.fWirePromiscuousReal= RT_BOOL(fNetFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE);
+ pNetwork->MacTab.fWirePromiscuousEff = pNetwork->MacTab.fWirePromiscuousReal
+ && (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE);
+
+ if ((fOldNetFlags ^ fNetFlags) & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS)
+ {
+ pNetwork->MacTab.cPromiscuousEntries = 0;
+ pNetwork->MacTab.cPromiscuousNoTrunkEntries = 0;
+
+ uint32_t iIf = pNetwork->MacTab.cEntries;
+ while (iIf-- > 0)
+ {
+ PINTNETMACTABENTRY pEntry = &pNetwork->MacTab.paEntries[iIf];
+ PINTNETIF pIf2 = pEntry->pIf;
+ if ( pIf2 /* paranoia */
+ && pIf2->fPromiscuousReal)
+ {
+ bool fPromiscuousEff = (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS)
+ && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW);
+ pEntry->fPromiscuousEff = fPromiscuousEff;
+ pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
+ && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
+
+ if (pEntry->fPromiscuousEff)
+ {
+ pNetwork->MacTab.cPromiscuousEntries++;
+ if (!pEntry->fPromiscuousSeeTrunk)
+ pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
+ }
+ }
+ }
+ }
+
+ RTSpinlockRelease(pNetwork->hAddrSpinlock);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Opens an existing network.
+ *
+ * The call must own the INTNET::hMtxCreateOpenDestroy.
+ *
+ * @returns VBox status code.
+ * @param pIntNet The instance data.
+ * @param pSession The current session.
+ * @param pszNetwork The network name. This has a valid length.
+ * @param enmTrunkType The trunk type.
+ * @param pszTrunk The trunk name. Its meaning is specific to the type.
+ * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
+ * @param ppNetwork Where to store the pointer to the network on success.
+ */
+static int intnetR0OpenNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
+ const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
+{
+ LogFlow(("intnetR0OpenNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
+ pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
+
+ /* just pro forma validation, the caller is internal. */
+ AssertPtr(pIntNet);
+ AssertPtr(pSession);
+ AssertPtr(pszNetwork);
+ Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
+ AssertPtr(pszTrunk);
+ Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
+ AssertPtr(ppNetwork);
+ *ppNetwork = NULL;
+
+ /*
+ * Search networks by name.
+ */
+ PINTNETNETWORK pCur;
+ uint8_t cchName = (uint8_t)strlen(pszNetwork);
+ Assert(cchName && cchName < sizeof(pCur->szName)); /* caller ensures this */
+
+ pCur = pIntNet->pNetworks;
+ while (pCur)
+ {
+ if ( pCur->cchName == cchName
+ && !memcmp(pCur->szName, pszNetwork, cchName))
+ {
+ /*
+ * Found the network, now check that we have the same ideas
+ * about the trunk setup and security.
+ */
+ int rc;
+ if ( enmTrunkType == kIntNetTrunkType_WhateverNone
+#ifdef VBOX_WITH_NAT_SERVICE
+ || enmTrunkType == kIntNetTrunkType_SrvNat /** @todo what does it mean */
+#endif
+ || ( pCur->enmTrunkType == enmTrunkType
+ && !strcmp(pCur->szTrunk, pszTrunk)))
+ {
+ rc = intnetR0CheckOpenNetworkFlags(pCur, fFlags);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Increment the reference and check that the session
+ * can access this network.
+ */
+ rc = SUPR0ObjAddRef(pCur->pvObj, pSession);
+ if (RT_SUCCESS(rc))
+ {
+ if (pCur->fFlags & INTNET_OPEN_FLAGS_ACCESS_RESTRICTED)
+ rc = SUPR0ObjVerifyAccess(pCur->pvObj, pSession, pCur->szName);
+ if (RT_SUCCESS(rc))
+ *ppNetwork = pCur;
+ else
+ SUPR0ObjRelease(pCur->pvObj, pSession);
+ }
+ else if (rc == VERR_WRONG_ORDER)
+ rc = VERR_NOT_FOUND; /* destruction race, pretend the other isn't there. */
+ }
+ }
+ else
+ {
+ rc = VERR_INTNET_INCOMPATIBLE_TRUNK;
+ LogRel(("intnetR0OpenNetwork failed. rc=%Rrc pCur->szTrunk=%s pszTrunk=%s pCur->enmTrunkType=%d enmTrunkType=%d\n",
+ rc, pCur->szTrunk, pszTrunk, pCur->enmTrunkType, enmTrunkType));
+ }
+
+ LogFlow(("intnetR0OpenNetwork: returns %Rrc *ppNetwork=%p\n", rc, *ppNetwork));
+ return rc;
+ }
+
+ pCur = pCur->pNext;
+ }
+
+ LogFlow(("intnetR0OpenNetwork: returns VERR_NOT_FOUND\n"));
+ return VERR_NOT_FOUND;
+}
+
+
+/**
+ * Creates a new network.
+ *
+ * The call must own the INTNET::hMtxCreateOpenDestroy and has already attempted
+ * opening the network and found it to be non-existing.
+ *
+ * @returns VBox status code.
+ * @param pIntNet The instance data.
+ * @param pSession The session handle.
+ * @param pszNetwork The name of the network. This must be at least one character long and no longer
+ * than the INTNETNETWORK::szName.
+ * @param enmTrunkType The trunk type.
+ * @param pszTrunk The trunk name. Its meaning is specific to the type.
+ * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
+ * @param ppNetwork Where to store the network. In the case of failure
+ * whatever is returned here should be dereferenced
+ * outside the INTNET::hMtxCreateOpenDestroy.
+ */
+static int intnetR0CreateNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
+ const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
+{
+ LogFlow(("intnetR0CreateNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
+ pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
+
+ /* just pro forma validation, the caller is internal. */
+ AssertPtr(pIntNet);
+ AssertPtr(pSession);
+ AssertPtr(pszNetwork);
+ Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
+ AssertPtr(pszTrunk);
+ Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
+ AssertPtr(ppNetwork);
+
+ *ppNetwork = NULL;
+
+ /*
+ * Adjust the flags with defaults for the network policies.
+ * Note: Main restricts promiscuous mode on the per interface level.
+ */
+ fFlags &= ~( INTNET_OPEN_FLAGS_IF_FIXED
+ | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
+ | INTNET_OPEN_FLAGS_IF_PROMISC_DENY
+ | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK
+ | INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK
+ | INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES
+ | INTNET_OPEN_FLAGS_REQUIRE_EXACT);
+ uint32_t fDefFlags = INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS
+ | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST
+ | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE
+ | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED
+ | INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE
+ | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED
+ | INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE;
+ if ( enmTrunkType == kIntNetTrunkType_WhateverNone
+#ifdef VBOX_WITH_NAT_SERVICE
+ || enmTrunkType == kIntNetTrunkType_SrvNat /* simialar security */
+#endif
+ || enmTrunkType == kIntNetTrunkType_None)
+ fDefFlags |= INTNET_OPEN_FLAGS_ACCESS_RESTRICTED;
+ else
+ fDefFlags |= INTNET_OPEN_FLAGS_ACCESS_PUBLIC;
+ for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
+ if (!(fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair))
+ fFlags |= g_afIntNetOpenNetworkNetFlags[i].fPair & fDefFlags;
+
+ /*
+ * Allocate and initialize.
+ */
+ size_t cb = sizeof(INTNETNETWORK);
+ if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
+ cb += INTNETNETWORK_TMP_SIZE + 64;
+ PINTNETNETWORK pNetwork = (PINTNETNETWORK)RTMemAllocZ(cb);
+ if (!pNetwork)
+ return VERR_NO_MEMORY;
+ //pNetwork->pNext = NULL;
+ //pNetwork->pIfs = NULL;
+ //pNetwork->fTerminateReconnectThread = false;
+ pNetwork->hTrunkReconnectThread = NIL_RTTHREAD;
+ pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
+ pNetwork->MacTab.cEntries = 0;
+ pNetwork->MacTab.cEntriesAllocated = INTNET_GROW_DSTTAB_SIZE;
+ //pNetwork->MacTab.cPromiscuousEntries = 0;
+ //pNetwork->MacTab.cPromiscuousNoTrunkEntries = 0;
+ pNetwork->MacTab.paEntries = NULL;
+ pNetwork->MacTab.fHostPromiscuousReal = false;
+ pNetwork->MacTab.fHostPromiscuousEff = false;
+ pNetwork->MacTab.fHostActive = false;
+ pNetwork->MacTab.fWirePromiscuousReal = false;
+ pNetwork->MacTab.fWirePromiscuousEff = false;
+ pNetwork->MacTab.fWireActive = false;
+ pNetwork->MacTab.pTrunk = NULL;
+ pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
+ pNetwork->pIntNet = pIntNet;
+ //pNetwork->pvObj = NULL;
+ if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
+ pNetwork->pbTmp = RT_ALIGN_PT(pNetwork + 1, 64, uint8_t *);
+ //else
+ // pNetwork->pbTmp = NULL;
+ pNetwork->fFlags = fFlags;
+ //pNetwork->fMinFlags = 0;
+ //pNetwork->cActiveIFs = 0;
+ size_t cchName = strlen(pszNetwork);
+ pNetwork->cchName = (uint8_t)cchName;
+ Assert(cchName && cchName < sizeof(pNetwork->szName)); /* caller's responsibility. */
+ memcpy(pNetwork->szName, pszNetwork, cchName); /* '\0' at courtesy of alloc. */
+ pNetwork->enmTrunkType = enmTrunkType;
+ Assert(strlen(pszTrunk) < sizeof(pNetwork->szTrunk)); /* caller's responsibility. */
+ strcpy(pNetwork->szTrunk, pszTrunk);
+
+ /*
+ * Create the semaphore, spinlock and allocate the interface table.
+ */
+ int rc = RTSemEventCreate(&pNetwork->hEvtBusyIf);
+ if (RT_SUCCESS(rc))
+ rc = RTSpinlockCreate(&pNetwork->hAddrSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hAddrSpinlock");
+ if (RT_SUCCESS(rc))
+ {
+ pNetwork->MacTab.paEntries = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * pNetwork->MacTab.cEntriesAllocated);
+ if (!pNetwork->MacTab.paEntries)
+ rc = VERR_NO_MEMORY;
+ }
+ if (RT_SUCCESS(rc))
+ {
+ for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
+ rc = intnetR0IfAddrCacheInit(&pNetwork->aAddrBlacklist[i], (INTNETADDRTYPE)i,
+ !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
+ }
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Register the object in the current session and link it into the network list.
+ */
+ pNetwork->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK, intnetR0NetworkDestruct, pNetwork, pIntNet);
+ if (pNetwork->pvObj)
+ {
+ pNetwork->pNext = pIntNet->pNetworks;
+ pIntNet->pNetworks = pNetwork;
+
+ /*
+ * Check if the current session is actually allowed to create and
+ * open the network. It is possible to implement network name
+ * based policies and these must be checked now. SUPR0ObjRegister
+ * does no such checks.
+ */
+ rc = SUPR0ObjVerifyAccess(pNetwork->pvObj, pSession, pNetwork->szName);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Connect the trunk.
+ */
+ rc = intnetR0NetworkCreateTrunkIf(pNetwork, pSession);
+ if (RT_SUCCESS(rc))
+ {
+ *ppNetwork = pNetwork;
+ LogFlow(("intnetR0CreateNetwork: returns VINF_SUCCESS *ppNetwork=%p\n", pNetwork));
+ return VINF_SUCCESS;
+ }
+ }
+
+ SUPR0ObjRelease(pNetwork->pvObj, pSession);
+ LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
+ return rc;
+ }
+
+ /* cleanup */
+ rc = VERR_NO_MEMORY;
+ }
+
+ RTSemEventDestroy(pNetwork->hEvtBusyIf);
+ pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
+ RTSpinlockDestroy(pNetwork->hAddrSpinlock);
+ pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
+ RTMemFree(pNetwork->MacTab.paEntries);
+ pNetwork->MacTab.paEntries = NULL;
+ RTMemFree(pNetwork);
+
+ LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Opens a network interface and connects it to the specified network.
+ *
+ * @returns VBox status code.
+ * @param pSession The session handle.
+ * @param pszNetwork The network name.
+ * @param enmTrunkType The trunk type.
+ * @param pszTrunk The trunk name. Its meaning is specific to the type.
+ * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
+ * @param fRestrictAccess Whether new participants should be subjected to
+ * access check or not.
+ * @param cbSend The send buffer size.
+ * @param cbRecv The receive buffer size.
+ * @param pfnRecvAvail The receive available callback to call instead of
+ * signalling the semaphore (R3 service only).
+ * @param pvUser The opaque user data to pass to the callback.
+ * @param phIf Where to store the handle to the network interface.
+ */
+INTNETR0DECL(int) IntNetR0Open(PSUPDRVSESSION pSession, const char *pszNetwork,
+ INTNETTRUNKTYPE enmTrunkType, const char *pszTrunk, uint32_t fFlags,
+ uint32_t cbSend, uint32_t cbRecv, PFNINTNETIFRECVAVAIL pfnRecvAvail, void *pvUser,
+ PINTNETIFHANDLE phIf)
+{
+ LogFlow(("IntNetR0Open: pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x cbSend=%u cbRecv=%u phIf=%p\n",
+ pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, cbSend, cbRecv, phIf));
+
+ /*
+ * Validate input.
+ */
+ PINTNET pIntNet = g_pIntNet;
+ AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
+ AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
+
+ AssertPtrReturn(pszNetwork, VERR_INVALID_PARAMETER);
+ const char *pszNetworkEnd = RTStrEnd(pszNetwork, INTNET_MAX_NETWORK_NAME);
+ AssertReturn(pszNetworkEnd, VERR_INVALID_PARAMETER);
+ size_t cchNetwork = pszNetworkEnd - pszNetwork;
+ AssertReturn(cchNetwork, VERR_INVALID_PARAMETER);
+
+ if (pszTrunk)
+ {
+ AssertPtrReturn(pszTrunk, VERR_INVALID_PARAMETER);
+ const char *pszTrunkEnd = RTStrEnd(pszTrunk, INTNET_MAX_TRUNK_NAME);
+ AssertReturn(pszTrunkEnd, VERR_INVALID_PARAMETER);
+ }
+ else
+ pszTrunk = "";
+
+ AssertMsgReturn(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End,
+ ("%d\n", enmTrunkType), VERR_INVALID_PARAMETER);
+ switch (enmTrunkType)
+ {
+ case kIntNetTrunkType_None:
+ case kIntNetTrunkType_WhateverNone:
+#ifdef VBOX_WITH_NAT_SERVICE
+ case kIntNetTrunkType_SrvNat:
+#endif
+ if (*pszTrunk)
+ return VERR_INVALID_PARAMETER;
+ break;
+
+ case kIntNetTrunkType_NetFlt:
+ case kIntNetTrunkType_NetAdp:
+ if (!*pszTrunk)
+ return VERR_INVALID_PARAMETER;
+ break;
+
+ default:
+ return VERR_NOT_IMPLEMENTED;
+ }
+
+ AssertMsgReturn(!(fFlags & ~INTNET_OPEN_FLAGS_MASK), ("%#x\n", fFlags), VERR_INVALID_PARAMETER);
+ for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
+ AssertMsgReturn((fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair) != g_afIntNetOpenNetworkNetFlags[i].fPair,
+ ("%#x (%#x)\n", fFlags, g_afIntNetOpenNetworkNetFlags[i].fPair), VERR_INVALID_PARAMETER);
+ for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
+ AssertMsgReturn((fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair) != g_afIntNetOpenNetworkIfFlags[i].fPair,
+ ("%#x (%#x)\n", fFlags, g_afIntNetOpenNetworkIfFlags[i].fPair), VERR_INVALID_PARAMETER);
+ AssertPtrReturn(phIf, VERR_INVALID_PARAMETER);
+
+ /*
+ * Acquire the mutex to serialize open/create/close.
+ */
+ int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /*
+ * Try open / create the network and create an interface on it for the
+ * caller to use.
+ */
+ PINTNETNETWORK pNetwork = NULL;
+ rc = intnetR0OpenNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
+ if (RT_SUCCESS(rc))
+ {
+ rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, fFlags, pfnRecvAvail, pvUser, phIf);
+ if (RT_SUCCESS(rc))
+ {
+ intnetR0AdaptOpenNetworkFlags(pNetwork, fFlags);
+ rc = VINF_ALREADY_INITIALIZED;
+ }
+ else
+ SUPR0ObjRelease(pNetwork->pvObj, pSession);
+ }
+ else if (rc == VERR_NOT_FOUND)
+ {
+ rc = intnetR0CreateNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
+ if (RT_SUCCESS(rc))
+ {
+ rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, fFlags, pfnRecvAvail, pvUser, phIf);
+ if (RT_FAILURE(rc))
+ SUPR0ObjRelease(pNetwork->pvObj, pSession);
+ }
+ }
+
+ RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
+ LogFlow(("IntNetR0Open: return %Rrc *phIf=%RX32\n", rc, *phIf));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for IntNetR0Open.
+ *
+ * @returns see GMMR0MapUnmapChunk.
+ * @param pSession The caller's session.
+ * @param pReq The request packet.
+ */
+INTNETR0DECL(int) IntNetR0OpenReq(PSUPDRVSESSION pSession, PINTNETOPENREQ pReq)
+{
+ if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
+ return VERR_INVALID_PARAMETER;
+ return IntNetR0Open(pSession, &pReq->szNetwork[0], pReq->enmTrunkType, pReq->szTrunk,
+ pReq->fFlags, pReq->cbSend, pReq->cbRecv, NULL /*pfnRecvAvail*/, NULL /*pvUser*/, &pReq->hIf);
+}
+
+
+#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) && defined(IN_RING3)
+INTNETR3DECL(int) IntNetR3Open(PSUPDRVSESSION pSession, const char *pszNetwork,
+ INTNETTRUNKTYPE enmTrunkType, const char *pszTrunk, uint32_t fFlags,
+ uint32_t cbSend, uint32_t cbRecv, PFNINTNETIFRECVAVAIL pfnRecvAvail, void *pvUser,
+ PINTNETIFHANDLE phIf)
+{
+ return IntNetR0Open(pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, cbSend, cbRecv, pfnRecvAvail, pvUser, phIf);
+}
+#endif
+
+
+/**
+ * Count the internal networks.
+ *
+ * This is mainly for providing the testcase with some introspection to validate
+ * behavior when closing interfaces.
+ *
+ * @returns The number of networks.
+ */
+INTNETR0DECL(uint32_t) IntNetR0GetNetworkCount(void)
+{
+ /*
+ * Grab the instance.
+ */
+ PINTNET pIntNet = g_pIntNet;
+ if (!pIntNet)
+ return 0;
+ AssertPtrReturn(pIntNet, 0);
+ AssertReturn(pIntNet->u32Magic == INTNET_MAGIC, 0);
+
+ /*
+ * Grab the mutex and count the networks.
+ */
+ int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
+ if (RT_FAILURE(rc))
+ return 0;
+
+ uint32_t cNetworks = 0;
+ for (PINTNETNETWORK pCur = pIntNet->pNetworks; pCur; pCur = pCur->pNext)
+ cNetworks++;
+
+ RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
+
+ return cNetworks;
+}
+
+
+
+/**
+ * Destroys an instance of the Ring-0 internal networking service.
+ */
+INTNETR0DECL(void) IntNetR0Term(void)
+{
+ LogFlow(("IntNetR0Term:\n"));
+
+ /*
+ * Zap the global pointer and validate it.
+ */
+ PINTNET pIntNet = g_pIntNet;
+ g_pIntNet = NULL;
+ if (!pIntNet)
+ return;
+ AssertPtrReturnVoid(pIntNet);
+ AssertReturnVoid(pIntNet->u32Magic == INTNET_MAGIC);
+
+ /*
+ * There is not supposed to be any networks hanging around at this time.
+ */
+ AssertReturnVoid(ASMAtomicCmpXchgU32(&pIntNet->u32Magic, ~INTNET_MAGIC, INTNET_MAGIC));
+ Assert(pIntNet->pNetworks == NULL);
+ /*
+ * @todo Do we really need to be paranoid enough to go over the list of networks here,
+ * trying to terminate trunk re-connection threads here?
+ */
+ if (pIntNet->hMtxCreateOpenDestroy != NIL_RTSEMMUTEX)
+ {
+ RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
+ pIntNet->hMtxCreateOpenDestroy = NIL_RTSEMMUTEX;
+ }
+ if (pIntNet->hHtIfs != NIL_RTHANDLETABLE)
+ {
+ /** @todo does it make sense to have a deleter here? */
+ RTHandleTableDestroy(pIntNet->hHtIfs, NULL, NULL);
+ pIntNet->hHtIfs = NIL_RTHANDLETABLE;
+ }
+
+ RTMemFree(pIntNet);
+}
+
+
+/**
+ * Initializes the internal network ring-0 service.
+ *
+ * @returns VBox status code.
+ */
+INTNETR0DECL(int) IntNetR0Init(void)
+{
+ LogFlow(("IntNetR0Init:\n"));
+ int rc = VERR_NO_MEMORY;
+ PINTNET pIntNet = (PINTNET)RTMemAllocZ(sizeof(*pIntNet));
+ if (pIntNet)
+ {
+ //pIntNet->pNetworks = NULL;
+
+ rc = RTSemMutexCreate(&pIntNet->hMtxCreateOpenDestroy);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTHandleTableCreateEx(&pIntNet->hHtIfs, RTHANDLETABLE_FLAGS_LOCKED | RTHANDLETABLE_FLAGS_CONTEXT,
+ UINT32_C(0x8ffe0000), 4096, intnetR0IfRetainHandle, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ pIntNet->u32Magic = INTNET_MAGIC;
+ g_pIntNet = pIntNet;
+ LogFlow(("IntNetR0Init: returns VINF_SUCCESS pIntNet=%p\n", pIntNet));
+ return VINF_SUCCESS;
+ }
+
+ RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
+ }
+ RTMemFree(pIntNet);
+ }
+ LogFlow(("IntNetR0Init: returns %Rrc\n", rc));
+ return rc;
+}
+